Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions askbot/conf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def init():
import askbot.conf.access_control
import askbot.conf.site_modes
import askbot.conf.words
import askbot.conf.spam_defense

#import main settings object
from askbot.conf.settings_wrapper import settings
Expand Down
66 changes: 66 additions & 0 deletions askbot/conf/spam_defense.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
"""Livesettings for the spam defense system (Bayesian filter + first-post confirmation)."""
from django.utils.translation import gettext_lazy as _
from livesettings import values as livesettings
from askbot.conf.settings_wrapper import settings
from askbot.conf.super_groups import EXTERNAL_SERVICES

SPAM_DEFENSE = livesettings.ConfigurationGroup(
'SPAM_DEFENSE',
_('Spam defense settings'),
super_group=EXTERNAL_SERVICES
)

settings.register(
livesettings.BooleanValue(
SPAM_DEFENSE,
'FIRST_POST_EMAIL_CONFIRMATION',
description=_('Require email confirmation for first post'),
help_text=_(
'When enabled, watched users must confirm their first post '
'via an email link before it goes live.'
),
default=False
)
)

settings.register(
livesettings.BooleanValue(
SPAM_DEFENSE,
'FIRST_POST_MODERATE_AFTER_CONFIRMATION',
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a measure that all posts of "watched" users are pre-moderated, if the moderation mode is "premoderation"; it's not clear how this would be compatible with the proposed "FIRST_POST_MODERATE_AFTER_CONFIRMATION" - what if the moderation mode is "premoderation" and this setting is "False"?

description=_('Require moderator approval after email confirmation'),
help_text=_(
'When enabled, first posts that pass email confirmation are '
'placed in the moderator queue for approval. When disabled, '
'confirmed posts go live immediately.'
),
default=True
)
)

settings.register(
livesettings.BooleanValue(
SPAM_DEFENSE,
'DELETE_BLOCKED_USERS',
description=_('Delete blocked spammer accounts entirely'),
help_text=_(
'When enabled, blocking a spammer deletes the user account '
'along with all their content, preventing accumulation of '
'dead accounts. When disabled, the account is kept with '
'blocked status (original behavior).'
),
default=True
)
)

settings.register(
livesettings.BooleanValue(
SPAM_DEFENSE,
'BAYESIAN_SPAM_SILENT_DELETE',
description=_('Silently delete obvious spam from new users'),
help_text=_(
'When enabled, first posts that are flagged as spam (but not ham) '
'result in silent deletion of the user and post.'
),
default=False
)
)
17 changes: 17 additions & 0 deletions askbot/jinja2/email/post_confirmation/body.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
{% extends "email/base_mail.html"%}
{% block title %}{% trans %}Please confirm your post{% endtrans %}{% endblock %}
{% block headline %}{% trans %}Please confirm your post{% endtrans %}{% endblock %}

{% block content %}
<p>{% trans %}Thank you for posting on {{ site_name }}. To prevent spam, we ask new users to confirm their first post.{% endtrans %}</p>

<p>{% trans %}Please follow the link below to confirm and publish your post:{% endtrans %}</p>

<p><a href="{{ confirmation_link }}">{{ confirmation_link }}</a></p>

<p>{% trans %}This link will expire in 3 days. Posts that are not confirmed will be removed.{% endtrans %}</p>
{% endblock %}

{% block footer %}
{% include "email/footer.html" %}
{% endblock %}
1 change: 1 addition & 0 deletions askbot/jinja2/email/post_confirmation/subject.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{% trans %}Please confirm your post on {{ site_name }}{% endtrans %}
53 changes: 53 additions & 0 deletions askbot/jinja2/post_confirmation.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
{% extends "base.html" %}
{% block title %}{% trans %}Confirm your post{% endtrans %}{% endblock %}

{% block body %}
<div class="content-wrapper" style="max-width: 700px; margin: 2em auto; padding: 1em;">

{% if error %}
<h1>{% trans %}Post confirmation{% endtrans %}</h1>
<p class="message">{{ error }}</p>
{% elif confirmed and pending_moderation %}
<h1>{% trans %}Post confirmed{% endtrans %}</h1>
<p>{% trans %}Thank you for confirming your post. It has been submitted for moderator review and will appear on the site once approved.{% endtrans %}</p>
{% elif confirmed %}
<h1>{% trans %}Post published!{% endtrans %}</h1>
<p>{% trans %}Your post has been published. Thank you!{% endtrans %}</p>
<p><a href="{{ post_url }}">{% trans %}View your post{% endtrans %}</a></p>
{% else %}
<h1>{% trans %}Confirm your post{% endtrans %}</h1>

<div style="background: #f5f5f5; border: 1px solid #ddd; padding: 1em; margin: 1em 0; border-radius: 4px;">
<h3>{% trans %}Your post preview:{% endtrans %}</h3>
{{ post_html|safe }}
</div>

<form method="post" id="js-confirm-post-form">
{{ csrf_input }}
<p>
<label>
<input type="checkbox" id="js-confirm-checkbox" name="confirm_checkbox" value="1" />
{% trans %}I am posting a question about Wing IDE or Python programming{% endtrans %}
</label>
</p>
<p>
<button type="submit" id="js-confirm-button" disabled="disabled"
style="padding: 8px 24px; font-size: 14px; cursor: pointer;">
{% trans %}Publish My Post{% endtrans %}
</button>
</p>
</form>

<script>
(function() {
var checkbox = document.getElementById('js-confirm-checkbox');
var button = document.getElementById('js-confirm-button');
checkbox.addEventListener('change', function() {
button.disabled = !checkbox.checked;
});
})();
</script>
{% endif %}

</div>
{% endblock %}
16 changes: 16 additions & 0 deletions askbot/mail/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,22 @@ def process_context(self, context):
# 'recipient_user': get_user()
# }

class PostConfirmationEmail(BaseEmail):
template_path = 'email/post_confirmation'
title = _('Post confirmation')
description = _('Sent to new users to confirm their first post via email')
mock_contexts = ({'key': 'abc123def456'},)

def process_context(self, context):
context.update({
'site_name': askbot_settings.APP_SHORT_NAME,
'recipient_user': None,
'confirmation_link': site_url(reverse('confirm_post',
kwargs={'key': context['key']}))
})
return context


class FeedbackEmail(BaseEmail):
template_path = 'email/feedback'
title = _('Feedback email')
Expand Down
18 changes: 18 additions & 0 deletions askbot/management/commands/askbot_delete_expired_confirmations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
"""Delete expired unconfirmed first-post confirmations.

Removes the post and (if the user has no other posts) the user.
Intended to be run periodically via cron, e.g. daily.

Usage:
python manage.py askbot_delete_expired_confirmations
"""
from django.core.management.base import BaseCommand


class Command(BaseCommand):
help = 'Delete expired unconfirmed first-post confirmations and their users'

def handle(self, *args, **kwargs):
from askbot.models.post_confirmation import PostConfirmation
count = PostConfirmation.delete_expired_unconfirmed()
self.stdout.write(f'Deleted {count} expired unconfirmed confirmation(s).')
137 changes: 137 additions & 0 deletions askbot/management/commands/askbot_train_spam_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
"""Train the dual Bayesian spam filter models.

Usage:
python manage.py askbot_train_spam_filter --from-db
python manage.py askbot_train_spam_filter --input spam-ham.json
python manage.py askbot_train_spam_filter --input spam-ham.json --model spam
"""
import json
import os

from django.conf import settings as django_settings
from django.core.management.base import BaseCommand, CommandError


class Command(BaseCommand):
help = 'Train the Bayesian spam filter (spam and/or ham models)'

def add_arguments(self, parser):
parser.add_argument(
'--from-db', action='store_true', default=False,
help='Train from live database (blocked users = spam, approved users = ham)'
)
parser.add_argument(
'--input', dest='input_file', default=None,
help='Path to JSON file from askbot_get_spam_training_set'
)
parser.add_argument(
'--model', choices=['spam', 'ham', 'both'], default='both',
help='Which model(s) to train (default: both)'
)

def handle(self, *args, **kwargs):
from_db = kwargs['from_db']
input_file = kwargs['input_file']
model_choice = kwargs['model']

if not from_db and not input_file:
raise CommandError('Specify --from-db or --input FILE')

if from_db and input_file:
raise CommandError('Specify only one of --from-db or --input')

if input_file:
spam_texts, ham_texts = self._load_from_file(input_file)
else:
spam_texts, ham_texts = self._load_from_db()

self.stdout.write(f'Spam examples: {len(spam_texts)}')
self.stdout.write(f'Ham examples: {len(ham_texts)}')

if model_choice in ('spam', 'both'):
if len(spam_texts) < 10:
self.stderr.write('WARNING: Very few spam examples, model may be unreliable')
self._train_model('spam', spam_texts, ham_texts)

if model_choice in ('ham', 'both'):
if len(ham_texts) < 10:
self.stderr.write('WARNING: Very few ham examples, model may be unreliable')
self._train_model('ham', ham_texts, spam_texts)

self.stdout.write(self.style.SUCCESS('Training complete.'))

def _load_from_file(self, path):
if not os.path.exists(path):
raise CommandError(f'File not found: {path}')
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
return data.get('spam', []), data.get('ham', [])

def _load_from_db(self):
from askbot.models import Post, User
from askbot import const

self.stdout.write('Loading training data from database...')

from django.db.models import Count
spam_users = User.objects.filter(
askbot_profile__reputation=const.MIN_REPUTATION,
askbot_profile__status='b'
).annotate(post_count=Count('posts')).filter(post_count=1)
spam_posts = Post.objects.filter(
author__in=spam_users,
post_type__in=('question', 'answer', 'comment')
).only('text')
spam_texts = [p.text for p in spam_posts]

ham_users = User.objects.filter(
askbot_profile__reputation__gte=10
).order_by('-askbot_profile__reputation')
ham_posts = Post.objects.filter(
author__in=ham_users,
post_type__in=('question', 'answer', 'comment')
).only('text')[:3000]
ham_texts = [p.text for p in ham_posts]

return spam_texts, ham_texts

def _train_model(self, model_type, positive_texts, negative_texts):
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report

self.stdout.write(f'\nTraining {model_type} model...')

all_texts = positive_texts + negative_texts
labels = np.array([1] * len(positive_texts) + [0] * len(negative_texts))

X_train_text, X_test_text, y_train, y_test = train_test_split(
all_texts, labels, test_size=0.2, random_state=42, stratify=labels
)

vectorizer = TfidfVectorizer(
max_features=50000,
ngram_range=(1, 2),
min_df=2
)
X_train = vectorizer.fit_transform(X_train_text)
X_test = vectorizer.transform(X_test_text)

classifier = MultinomialNB(alpha=0.1)
classifier.fit(X_train, y_train)

y_pred = classifier.predict(X_test)
report = classification_report(y_test, y_pred, target_names=['negative', 'positive'])
self.stdout.write(f'\n{model_type.upper()} model evaluation:\n{report}')

import joblib
model_dir = os.path.join(django_settings.MEDIA_ROOT, 'spam_filter')
os.makedirs(model_dir, exist_ok=True)

vec_path = os.path.join(model_dir, f'{model_type}_vectorizer.joblib')
clf_path = os.path.join(model_dir, f'{model_type}_classifier.joblib')
joblib.dump(vectorizer, vec_path)
joblib.dump(classifier, clf_path)
self.stdout.write(f'Saved {model_type} model to {model_dir}')
43 changes: 43 additions & 0 deletions askbot/migrations/0036_postconfirmation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
import django.utils.timezone
import askbot.models.post_confirmation


class Migration(migrations.Migration):

dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
('askbot', '0035_set_global_group_used_for_analytics'),
]

operations = [
migrations.CreateModel(
name='PostConfirmation',
fields=[
('key', models.CharField(
default=askbot.models.post_confirmation._make_key,
max_length=64,
primary_key=True,
serialize=False,
)),
('created_at', models.DateTimeField(default=django.utils.timezone.now)),
('confirmed_at', models.DateTimeField(blank=True, null=True)),
('expires_on', models.DateTimeField(blank=True)),
('post', models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='confirmations',
to='askbot.post',
)),
('user', models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name='post_confirmations',
to=settings.AUTH_USER_MODEL,
)),
],
options={
'app_label': 'askbot',
},
),
]
Loading