LotGDocker/lotgd-web/lotgd/lib/censor.php
2020-08-17 19:16:42 -04:00

139 lines
4.8 KiB
PHP
Executable File
Raw Permalink Blame History

<?php
// translator ready
// addnews ready
// mail ready
function soap($input,$debug=false,$skiphook=false){
global $session;
require_once("lib/sanitize.php");
$final_output = $input;
// $output is the color code-less (fully sanitized) input against which
// we search.
$output = full_sanitize($input);
// the mask of displayable chars that should be masked out;
// X displays, _ masks.
$mix_mask = str_pad("",strlen($output),"X");
if (getsetting("soap",1)){
$search = nasty_word_list();
$exceptions = array_flip(good_word_list());
$changed_content = false;
while (list($key,$word)=each($search)){
do {
if ($word > "")
$times = preg_match_all($word,$output,$matches);
else
$times = 0;
for ($x=0; $x<$times; $x++){
if (strlen($matches[0][$x]) < strlen($matches[1][$x])){
$shortword = $matches[0][$x];
$longword = $matches[1][$x];
}else{
$shortword = $matches[1][$x];
$longword = $matches[0][$x];
}
if (isset($exceptions[strtolower($longword)])){
$x--;
$times--;
if ($debug)
output("This word is ok because it was caught by an exception: `b`^%s`7`b`n",$longword);
}else{
if ($debug)
output("`7This word is not ok: \"`%%s`7\"; it blocks on the pattern `i%s`i at \"`\$%s`7\".`n",$longword,$word,$shortword);
// if the word should be filtered, drop it from the
// search terms ($output), and mask its bytes out of
// the output mask.
$len = strlen($shortword);
$pad = str_pad("",$len,"_");
//while (($p = strpos($output,$shortword))!==false){
$p = strpos($output,$shortword);
$output = substr($output,0,$p) . $pad .
substr($output,$p+$len);
$mix_mask = substr($mix_mask,0,$p) . $pad .
substr($mix_mask,$p+$len);
//}
$changed_content = true;
}//end if
}//end for
} while ($times > 0);
}
$y = 0; //position within final output
$pad = '#@%$!';
for ($x=0; $x<strlen($mix_mask); $x++){
while (substr($final_output,$y,1)=="`"){
$y+=2; //when encountering appo encoding, skip over it.
}
//this character should be masked out.
if (substr($mix_mask,$x,1)=="_"){
$final_output = substr($final_output,0,$y) .
substr($pad,$x % strlen($pad),1) .
substr($final_output,$y+1);
}
$y++;
}
if ($session['user']['superuser'] & SU_EDIT_COMMENTS &&
$changed_content){
output("`0The filter would have tripped on \"`#%s`0\" but since you're a moderator, I'm going to be lenient on you. The text would have read, \"`#%s`0\"`n`n",$input,$final_output);
return $input;
}else{
if ($changed_content && !$skiphook)
modulehook("censor", array("input"=>$input));
return $final_output;
}
}else{
return $final_output;
}
}
function good_word_list(){
$sql = "SELECT * FROM " . db_prefix("nastywords") . " WHERE type='good'";
$result = db_query_cached($sql,"goodwordlist");
$row = db_fetch_assoc($result);
return explode(" ",$row['words']);
}
function nasty_word_list(){
$search = datacache("nastywordlist",600);
if ($search!==false && is_array($search)) return $search;
$sql = "SELECT * FROM " . db_prefix("nastywords") . " WHERE type='nasty'";
$result = db_query($sql);
$row = db_fetch_assoc($result);
$search = " ".$row['words']." ";
$search = preg_replace('/(?<=.)(?<!\\\\)\'(?=.)/', '\\\'', $search);
$search = str_replace("a",'[a4@<40><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>]',$search);
$search = str_replace("b",'[b<>]',$search);
$search = str_replace("d",'[d<><64><EFBFBD>]',$search);
$search = str_replace("e",'[e3<65><33><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>]',$search);
$search = str_replace("n",'[n<><6E>]',$search);
$search = str_replace("o",'[o<><6F>0<EFBFBD><30><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>]',$search);
$search = str_replace("p",'[p<><70><EFBFBD>]',$search);
$search = str_replace("r",'[r<>]',$search);
// $search = str_replace("s",'[sz$<24>]',$search);
$search = preg_replace('/(?<!\\\\)s/','[sz$<24>]',$search);
$search = str_replace("t",'[t7+]',$search);
$search = str_replace("u",'[u<><75><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>]',$search);
$search = str_replace("x",'[xפ]',$search);
$search = str_replace("y",'[yݥ<79><DDA5>]',$search);
//these must happen in exactly this order:
$search = str_replace("l",'[l1!<21>]',$search);
$search = str_replace("i",'[li1!<21><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>]',$search);
$search = str_replace("k",'c',$search);
$search = str_replace("c",'[c\\(k穢]',$search);
$start = "'\\b";
$end = "\\b'iU";
$ws = "[^[:space:]\\t]*"; //whitespace (\w is not hungry enough)
//space not preceeded by a star
$search = preg_replace("'(?<!\\*) '",")+$end ",$search);
//space not anteceeded by a star
$search = preg_replace("' (?!\\*)'"," $start(",$search);
//space preceeded by a star
$search = str_replace("* ",")+$ws$end ",$search);
//space anteceeded by a star
$search = str_replace(" *"," $start$ws(",$search);
$search = "$start(".trim($search).")+$end";
$search = str_replace("$start()+$end","",$search);
$search = explode(" ",$search);
updatedatacache("nastywordlist",$search);
return $search;
}
?>