package EnsEMBL::Web::Filter::Spam;

use strict;
use warnings;
use Class::Std;

use base qw(EnsEMBL::Web::Filter);

### Checks if a form's fields are spam-free. Use 'catch' to check an entire form,
### or 'check' to check an individual field. 
### 'Honeypots' are fields intended to trap spambots, by tricking them into filling
### in fields that are hidden from the legitimate user

{

my %Threshold :ATTR(:set<threshold> :get<threshold>);
my %Honeypots :ATTR(:set<honeypots> :get<honeypots>);

sub BUILD {
  my ($self, $ident, $args) = @_;
  $Threshold{$ident}  = $args->{threshold} || 60;
  ## Set the messages hash here
  $self->set_messages({
    'spam' => 'Sorry, one of your form entries was identified as spam. Please remove excess URLs and try again.', 
    'empty' => 'Sorry, one of the required fields was empty. Please try again.', 
  });
}

sub catch {
  my $self = shift;

  ## Check honeypot fields for content - they should be empty!
  foreach my $field (@{$self->object->interface->honeypots}) {
    if ($self->object->param($field)) {
      $self->set_error_code('spam');
      warn "@@@ FILTERED DUE TO CONTENT IN HONEYPOT $field.....";
    }
  }

  ## Check legitimate fields for bogus content
  foreach my $field ($self->object->param) {
    $self->check($self->object->param($field), 1);
  }
}

sub check {
  my ($self, $content, $threshold) = @_;
  $threshold = $self->get_threshold unless $threshold;
  return 0 if !$content && $threshold == 1; ## Only way to OK optional fields as spam-free!

  ## Strip out links
  (my $check = $content) =~ s/<a\s+href=.*?>.*?<\/a>//smg;
  $check =~ s/\[url=.*?\].*?\[\/url\]//smg;
  $check =~ s/\[link=.*?\].*?\[\/link\]//smg;
  $check =~ s/https?:\/\/\S+//smg;
  ## If insufficient legit content left after link removal, it's probably spam!
  if( length($check)<length($content)/$threshold ) {
    $self->set_error_code('spam');
    warn "@@@ FILTERED DUE TO BLOG SPAM.....";
    return 1;
  }
  $check =~ s/\s+//gsm;
  if( $check eq '' ) {
    $self->set_error_code('empty');
    warn "@@@ FILTERED DUE TO ZERO CONTENT!";
    return 1;
  }
  return 0;

}
}

1;