2000-09-02 15:28:14 +00:00
|
|
|
#
|
|
|
|
# Search for bad words in strings
|
|
|
|
#
|
|
|
|
# Copyright (c) 2000 Dirk Koopman
|
|
|
|
#
|
|
|
|
# $Id$
|
|
|
|
#
|
|
|
|
|
|
|
|
package BadWords;
|
|
|
|
|
|
|
|
use strict;
|
|
|
|
|
|
|
|
use DXUtil;
|
|
|
|
use DXVars;
|
2001-09-13 19:58:05 +00:00
|
|
|
use DXHash;
|
2001-10-04 13:53:47 +00:00
|
|
|
use DXDebug;
|
|
|
|
|
2000-09-02 15:28:14 +00:00
|
|
|
use IO::File;
|
|
|
|
|
2001-10-04 13:53:47 +00:00
|
|
|
use vars qw($badword @regex);
|
2000-09-02 15:28:14 +00:00
|
|
|
|
2001-09-13 19:58:05 +00:00
|
|
|
my $oldfn = "$main::data/badwords";
|
2001-10-04 13:53:47 +00:00
|
|
|
my $regex = "$main::data/badw_regex";
|
|
|
|
my $bwfn = "$main::data/badword";
|
|
|
|
|
|
|
|
# copy issue ones across
|
|
|
|
filecopy("$regex.issue", $regex) unless -e $regex;
|
|
|
|
filecopy("$bwfn.issue", $bwfn) unless -e $bwfn;
|
|
|
|
|
2001-09-13 19:58:05 +00:00
|
|
|
$badword = new DXHash "badword";
|
2000-09-02 15:28:14 +00:00
|
|
|
|
2001-09-01 12:15:09 +00:00
|
|
|
use vars qw($VERSION $BRANCH);
|
|
|
|
$VERSION = sprintf( "%d.%03d", q$Revision$ =~ /(\d+)\.(\d+)/ );
|
|
|
|
$BRANCH = sprintf( "%d.%03d", q$Revision$ =~ /\d+\.\d+\.(\d+)\.(\d+)/ ) || 0;
|
|
|
|
$main::build += $VERSION;
|
|
|
|
$main::branch += $BRANCH;
|
|
|
|
|
2000-09-02 15:28:14 +00:00
|
|
|
# load the badwords file
|
|
|
|
sub load
|
|
|
|
{
|
|
|
|
my @out;
|
2001-09-13 19:58:05 +00:00
|
|
|
my $fh = new IO::File $oldfn;
|
2000-09-02 15:28:14 +00:00
|
|
|
|
|
|
|
if ($fh) {
|
|
|
|
while (<$fh>) {
|
|
|
|
chomp;
|
|
|
|
next if /^\s*\#/;
|
|
|
|
my @list = split " ";
|
|
|
|
for (@list) {
|
2001-09-13 19:58:05 +00:00
|
|
|
$badword->add($_);
|
2000-09-02 15:28:14 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
$fh->close;
|
2001-09-13 19:58:05 +00:00
|
|
|
$badword->put;
|
|
|
|
unlink $oldfn;
|
2001-10-04 13:53:47 +00:00
|
|
|
}
|
|
|
|
push @out, create_regex();
|
|
|
|
return @out;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub create_regex
|
|
|
|
{
|
|
|
|
my @out;
|
|
|
|
@regex = ();
|
|
|
|
|
|
|
|
my $fh = new IO::File $regex;
|
|
|
|
|
|
|
|
if ($fh) {
|
|
|
|
while (<$fh>) {
|
|
|
|
chomp;
|
|
|
|
next if /^\s*\#/;
|
|
|
|
my @list = split " ";
|
|
|
|
for (@list) {
|
|
|
|
# create a closure for each word so that it matches stuff with spaces/punctuation
|
|
|
|
# and repeated characters in it
|
|
|
|
my $w = uc $_;
|
2001-10-04 14:46:41 +00:00
|
|
|
my @l = split //, $w;
|
2001-10-04 13:53:47 +00:00
|
|
|
my $e = join '+[\s\W]+', @l;
|
|
|
|
my $s = eval qq{sub { return \$_[0] =~ /$e+/ ? '$w' : () } };
|
|
|
|
push @regex, $s unless $@;
|
|
|
|
dbg("create_regex: $@") if $@;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$fh->close;
|
2000-09-02 15:28:14 +00:00
|
|
|
} else {
|
2001-10-04 13:53:47 +00:00
|
|
|
my $l = "can't open $regex $!";
|
2001-09-13 19:58:05 +00:00
|
|
|
dbg($l);
|
2000-09-02 15:28:14 +00:00
|
|
|
push @out, $l;
|
|
|
|
}
|
2001-10-04 13:53:47 +00:00
|
|
|
|
2000-09-02 15:28:14 +00:00
|
|
|
return @out;
|
|
|
|
}
|
|
|
|
|
|
|
|
# check the text against the badwords list
|
|
|
|
sub check
|
|
|
|
{
|
2001-10-01 14:30:18 +00:00
|
|
|
my $s = uc shift;
|
2001-10-04 13:53:47 +00:00
|
|
|
my @out;
|
|
|
|
|
|
|
|
for (@regex) {
|
|
|
|
push @out, &$_($s);
|
|
|
|
}
|
|
|
|
|
|
|
|
return @out if @out;
|
2001-10-01 14:30:18 +00:00
|
|
|
|
|
|
|
for (split(/\s+/, $s)) {
|
|
|
|
s/\'?S$//;
|
2001-10-04 13:53:47 +00:00
|
|
|
push @out, $_ if $badword->in($_);
|
2001-10-01 14:30:18 +00:00
|
|
|
}
|
2001-10-04 13:53:47 +00:00
|
|
|
|
|
|
|
return @out;
|
2000-09-02 15:28:14 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
1;
|