# http://www.berklix.com/~jhs/dots/.procmailrc_fonts # ~jhs/.procmailrc_fonts included by ~jhs/.procmailrc # This file deals with font spam & other generic spam, other files included # from ~jhs/.procmailrc deal with specific spam phrases & domains. # USA 1st Can Spam case in court Jan 2006, # law applies as of beginning of 2004: # Controlling the Assault of Non-Solicited Pornography and Marketing Act # http://www.spiegel.de/netzwelt/politik/0,1518,395648,00.html SPAM_NULL_FONT=$SPAM_NULL_NO_RCVSTORE # SPAM_NULL_FONT=spam/font/. SPAM_NULL_NUMERIC_IP=$SPAM_NULL_NO_RCVSTORE # SPAM_NULL_NUMERIC_IP=spam/numeric_ip/. SPAM_NULL_FORMAT=$SPAM_NULL_NO_RCVSTORE # SPAM_NULL_FORMAT=spam/audio/. :0 B * charset="windows\-1250" * ^Content\-Type: text/plain # | $RCVSTORE +$SPAM_NULL_FONT # Note $RCVSTORE must have a directory, # not a file such as $SPAM_NULL_NO_RCVSTORE # so as I often define $SPAM_NULL_FONT to # $SPAM_NULL_NO_RCVSTORE, avoid rcvstore. $SPAM_NULL_FONT :0 H * ^Subject: (|\[SPAM\] )=\?windows\-1251\?B\? $SPAM_NULL_FONT :0 HB * charset=Windows\-1251 $SPAM_NULL_FONT :0 HB * charset=3DWindows\-1251 $SPAM_NULL_FONT :0 B # Thai * ^Content\-type:\stext/html; charset=windows\-874 # Message-Id: # | $RCVSTORE +$SPAM_NULL_FONT # Note $RCVSTORE must have a directory, # not a file such as $SPAM_NULL_NO_RCVSTORE # so as I often define $SPAM_NULL_FONT to # $SPAM_NULL_NO_RCVSTORE, avoid rcvstore. $SPAM_NULL_FONT :0 B # Thai @hotmail.co.th # Subject: =?windows-874?B?4LvUtMPR * ^Subject: =\?windows-874\?B $SPAM_NULL_FONT :0 H * ^Subject: =\?x\-mac\-thai\?B $SPAM_NULL_FONT :0 H * ^Content\-type: text/plain; charset="x\-mac\-thai" $SPAM_NULL_FONT :0 H # Subject: =?Windows-1251?B?zc7CztHSyCDUxcTF0MDL3M3Ow84gx8DKzs3OxMDSxcvc0dLCwCDOIMPO0cfAys (fwd) * ^Subject: =\?Windows-1251\?B\? $SPAM_NULL_FONT # Do not block Windows-1252 as Ive seen non spam: # Subject: (GEA) =?Windows-1252?Q?W=FCrmtal_stammtisch_this_Friday?= (fwd) # Jewish/ Israel .il :0 H * ^MIME\-Version: 1.0 * ^Content-Type: text/plain; * charset="windows\-1255" * ^Content\-Transfer\-Encoding: 8bit * ^X\-MIME\-Autoconverted: from quoted\-printable to 8bit by (webmail|land|slim).berklix.org { :0 H * ^From: "=\?windows\-1255\?Q\?=*".*\<.*@[a-z0-9\-]+\.il\> # From: "=?windows-1255?Q?=F4=E9=F7=F1?=" # $SPAM_NULL_FONT spam/font/. :0 H * ^Subject: =\?windows\-1255\?Q * ^Subject: .+\?= # $SPAM_NULL_FONT spam/font/. } # JJLATER New rule 2007.11.23 , I want to keep an eye on it. # ie Might it also catch genuine replies from abuse@ postmasters, # who I mailed as abuse@ # Polish .pl :0 H # Subject: =?iso-8859-2?Q?Szybki_przyrost_masy_mi=EA=B6niowej.?= * ^Subject: =\?iso\-8859\-2\?Q\? # From: "=?iso-8859-2?Q?Rafa=B3_Bierzan?=" { :0 B * ^Content-Type: text/html * charset=\"iso\-8859\-2\" * \ # * \ * \ spam/font/. # Korean spam body :0 HB * charset= "ks_c_5601\-1987" $SPAM_NULL_FONT :0 HB * content="text/html; charset=euc\-kr" $SPAM_NULL_FONT :0 HB * charset="euc\-kr" $SPAM_NULL_FONT :0 HB * charset=euc\-kr $SPAM_NULL_FONT :0 HB * charset=3Deuc\-kr $SPAM_NULL_FONT :0 HB * charset="ks_c_5601\-1987" $SPAM_NULL_FONT :0 HB * charset=ks_c_5601\-1987 $SPAM_NULL_FONT :0 HB * charset="ISO\-2022\-KR" $SPAM_NULL_FONT # Chinese spam header :0 H # Pound Symbol: £ # Subject:\s*±q¥¼¨£¹L³o»ò Q ªº²£«~!! ¥©³s´¼ Ä_Ä_ª© DVD®M¸Ë ! * ^Subject:\s*± $SPAM_NULL_FONT :0 H # Subject:\s*²³¹¿µÄ¨èÜ· * ^Subject:\s*² $SPAM_NULL_FONT :0 H * ^Subject:\s*=\?Big5 $SPAM_NULL_FONT :0 H * ^Subject:\s*=\?Big5 $SPAM_NULL_FONT :0 H * ^Subject:=\?big5\? $SPAM_NULL_FONT :0 H * ^From:\s*=\?Big5\? $SPAM_NULL_FONT :0 H * =\?big5\? $SPAM_NULL_FONT # No idea what language this 1252 spam is. Maybe Korean ? :0 HB * ^Subject:\s*=\?Windows\-1252\?B\? $SPAM_NULL_FONT :0 HB * ^From:\s*=\?Windows\-1252\?B\? $SPAM_NULL_FONT # Chinese spam body :0 HB * charset=big5 $SPAM_NULL_FONT :0 HB * charset="BIG\-5" $SPAM_NULL_FONT :0 HB * charset="big5" $SPAM_NULL_FONT :0 HB * charset= "big5" $SPAM_NULL_FONT :0 HB * charset=3Dbig5 $SPAM_NULL_FONT :0 HB * charset=gb2312 $SPAM_NULL_FONT :0 HB * charset="GB2312" $SPAM_NULL_FONT :0 B * ^Content-Type: text/(plain|html); charset=GBK * ^Content-Transfer-Encoding: base64 $SPAM_NULL_FONT :0 H * ^Subject:\s*=\?GB2312\?B\? $SPAM_NULL_FONT # # This rule: # * ^Subject:\s*=\?GB2312\?B\? # failed to catch this: # Subject: =?GB2312?B?PGpocnVkPrxzp2kxLiCsUK78unGwZ7r0IDoyLjIwMDimV7PmuvQ=?= # :3. =?GB2312?B?pVukSrxzp2k8amhydWQ+?= # MIME-Version: 1.0 # Content-Transfer-Encoding: base64 # So 25.04.2012 add this new rule: :0 H * ^Subject: \?GB2312\?B\? * ^Subject: .+\?= $SPAM_NULL_FONT :0 B * charset="CHINESEBIG5" $SPAM_NULL_FONT # https://en.wikipedia.org/wiki/Utf-8 # A multibyte character encoding for Unicode. Like UTF-16 # and UTF-32, UTF-8 can represent every character in the # Unicode character set, but unlike them possesses the # advantages of being backward-compatible with ASCII and of # avoiding the complications of endianness and the resulting # need to use byte order marks. # https://en.wikipedia.org/wiki/Unicode_and_email # https://en.wikipedia.org/wiki/International_email#UTF-8_headers # UTF Could be anything, I''ve seen Russian, Turkish, Chinese, German, Asian, Japanese :0 H # Subject: ?UTF-8?B?0JDQktCi0J7QotCV0KXQptCV0J3QotCgICDQsiDRhtC10L3RgtGA0LUg0JzQvg==?= # =?UTF-8?B?0YHQutCy0Ys=?= * ^Subject: (|=)\?utf-8\?.\? * ^Subject: .+\?=$ # There is often no charset=UTF-8 in header, it often waits till body, # in which case rule above will not catch it. { :0 H # German # This sample was NOT spam, but valid mail, All from Header: # Subject: =?utf-8?Q?Re:_WWW_=E2=80=BA_Contact_(Germany/Service)?= # Content-Type: text/plain; charset="utf-8" # Subject: =?utf-8?Q?_____________________?= * ^Subject: (|=)\?utf-8\?Q\? * ^Subject: .+\?= * charset=\"utf-8\" | $NOMIME_FORCE | $RCVSTORE +spam/font/utf-8/q=english-german-polish-spanish-turkish # spam/font/utf-8/Q=german-italian-turkish/. :0 H # Chinese Or Russian # Subject: =?utf-8?B?________==?= # Note in original header there was a \n\t here. # =?utf-8?B?ZHdzeA==?= # Subject: =?utf-8?B?______________?= # =?utf-8?B?LC3lkI7kv4pfd2p6emg=?= # Subject: =?utf-8?B?___________?= # =?utf-8?B?ZemrmOe6p+eglOS/ruePremCgOivt+WHvQ==?= # Sometimes the subject extends on to a second line, eg here: # "^Subject: =?utf-8?B?5omT6YCg5rC45LiN5pa36Zu755qE5Y2w6YiU5qmfISEuLi4uLi4uLi5obXJu?=" # "^ =?utf-8?B?aWNvdGFmdXBjaG1uaWRvdGZidXBraG1ybmlkeHR5ZQ==?=" * ^Subject: (|=)\?utf-8\?B\? * ^Subject: .+\?=$ { :0 H # Chinese # Subject: =?UTF-8?B?Ny8xMOKXpDk6MDDimIblpKfmipjmiaPil6I5LjVST1cg576O5ZyL6I+v55ub6aCT5qu75qGDIDJrZ+KGmDk4MOKYheWkj+aXpeWwj+eUnOW/g+a4oeWBh+a0i+ijneKGmDEyMA==?= # * ^From: =\?utf-8\?B\?.+\?= \<.+@.+\.cn\> # The rule above is too restrictive it will not catch addresses # without human names, eg From qingzaocavh@yahoo.cn # * ^From: .+@.+\.(cn|tw|hk)(\>|$) # Also allow: # Message-Id: <201305101858.r4AIwBTB030967@linux2.teclink.com.hk> * ^(From|Message\-Id): .*\.(cn|tw|hk)>$ # cn = china, tw=taiwan, hk = hong kong # The rule above failed to catch # From: =?utf-8?B?_______=?= # Riga, LV-1048, Latvija # From: =?utf-8?B?5pmC6ZaT6Ieq55Sx?= # From: =?utf-8?B?57ay6Lev6KGM6Yq3?= { :0 B * ^Content\-Transfer\-Encoding: base64 # I have also seen: quoted-printable * ^\tcharset=\"utf-8\"$ { :0 H # From: "=?UTF-8?B?UENob21l5ZWG5bqX6KGX?=" # To: "=?UTF-8?B?UENob21l5ZWG5bqX6KGX?=" * ^From: \"=\?UTF\-8\?B\?.+\?=\" * ^(To|Cc): \"=\?UTF\-8\?B\?.+\?=\" { :0 B * ^Content-Type: text/html spam/font/utf-8/B=chinese/t1/. # $SPAM_NULL_FONT } } :0 $SPAM_NULL_FONT } :0 H # Russian .ru * charset=(|\")utf-8 # From: |=?utf-8?B?0JrQvtC90LTRgNCw0YI=?= { :0 H * ^From: .+\.ru(|>)$ # I have also seen spam in Russian # from Brazil .br & Uruguay .uy & @yahoo.com) # | $NOMIME_FORCE | $RCVSTORE +spam/font/utf-8/B=russian # spam/font/utf-8/B=russian/. $SPAM_NULL_FONT :0 # | $NOMIME_FORCE | $RCVSTORE +spam/font/utf-8/B=russian_probably # spam/font/utf-8/B=russian_probably/. $SPAM_NULL_FONT } :0 H # Japanese Sample 2022-05: # From: =?utf-8?B?5LiJ5LqV5L2P5Y+L6YqA6KGM?= # Subject: =?utf-8?B?44CQ5LiJ5LqV5L2P5Y+LU01CQ+ODgOOCpOODrOOCr+ODiOOAkeacrOS6uueiuuiqjeOBrg==?= # =?utf-8?B?44GK55+l44KJ44Gb?= * ^From: .+\.jp(|>)$ # Dont know if line below is correct, added 2022-05 * utf-8\?B\?44 | $RCVSTORE +spam/font/utf-8/japanese } # 2013-05-10 I saw Russian spam from Brazil with trailing asciii after # the end of the utf-8 delimiter, eg: # Subject: =?utf-8?B?0L/RgNC10LTQu9Cw0LPQsNGO0YnQuNGFINC60YDQtdC00LjRgtGLINC90LAg0LvRjtCx0YvQtSDRhtC10LvQuCDQstC40LQg0L3QsCDQvdC40LfQutC+0Lwg0YPRgNC+0LLQvdC1INC40L3RgtC10YDQtdGB0L7QstCw0YLRjNGB0Y8=?= ($ 10,000.00 - $ 20,000,000.00). # # so I had to discard assumption that lines ended with a ?= # detected with a match rule with a trailing # \?=$ # (& instead first thought to insert to match rule to allow more text with: # \?=.*$ # ) but above can be simplified to # \?= # Perhaps JJLATER I should also allow plain ascii in # * ^Subject: =\?utf-8\?.\? # between Subject: and =\?utf-8 # # Note also Match patterns that look both for opening & closing utf delimiters # in one line, are too simple as Subject can trail across extension lines. # SO we need 2 match patterns, one for each, eg # * ^Subject: .+\?= :0 H # Subject: =?UTF-8?B?0_________=?= # Subject: =?utf-8?B?0_________=?= * ^Subject: =\?utf-8\?B\?0 * ^Subject: .+\?= { :0 B * charset=(|\")utf-8 { # If the domain is russian select it, # else leave to default. :0 H # From: =?utf-8?B?0JDQu9C10LrRgdC10Lk=?= \ # # * ^From: =\?utf-8\?B\?0 # Might not be a human name, dont require it. * ^From: .+\.ru(|>)$ # | $RCVSTORE +spam/font/utf-8/B=russian # spam/font/utf-8/B=russian/. $SPAM_NULL_FONT :0 # | $RCVSTORE +spam/font/utf-8/B=chinese-russian # spam/font/utf-8/B=chinese-russian/. $SPAM_NULL_FONT } } :0 H * ^Content\-Type: text/plain; charset="utf-8" | $RCVSTORE +spam/font/utf-8/unknown # spam/font/utf-8/unknown. # $SPAM_NULL_FONT :0 B * ^Content\-Type: text/plain; charset=(|\")utf-8(|\") * ^Content-Transfer-Encoding: quoted-printable # I have seen chinese sent without base64. | $NOMIME_FORCE | $RCVSTORE +spam/font/utf-8/unknown # spam/font/utf-8/unknown. # $SPAM_NULL_FONT } :0 HB # Japanese spam * charset="Shift_JIS" $SPAM_NULL_FONT :0 HB * charset=ISO\-2022\-JP $SPAM_NULL_FONT :0 H * ^Subject:\s*=\?ISO\-2022\-JP\? $SPAM_NULL_FONT :0 H * ^Subject:\s*=\?ISO\-2022\-JP\? $SPAM_NULL_FONT :0 H * ^Subject:\s*=\?shift\-jis\? $SPAM_NULL_FONT :0 HB * charset="iso\-2022\-jp" $SPAM_NULL_FONT :0 B # Content-Type: text/plain * charset="shift\-jis" $SPAM_NULL_FONT :0 HB # Russian spam Cyrillic * charset=(|3d)koi8\-r $SPAM_NULL_FONT :0 H # Subject: =?koi8-r?B?8sHT09nMy8kg0M8g8s/T08nJ?= # Subject: =?koi8-r?B?7MXHxc7EydLP18HOycUsINL # Subject: =?koi8-r?B?78bJ0yDXIMHSxc7E1SDTIM # Subject: =?koi8-r?B?8sHT0NLPxMHWwSDaxc3FzN # Subject: =?koi8-r?Q?=F3=C2=C1=D7=C9=D4=D8_=D7=C5 * ^Subject: =\?koi8\-r\?(B|Q)\? $SPAM_NULL_FONT :0 H # From: =?koi8-r?B?88/axMHOycUg0NLFwMTJw8nJ?= # From: =?koi8-r?B?IvPPwtPU18XOzsnLIg==?= * ^From: =\?koi8\-r\?B\? $SPAM_NULL_FONT :0 B * charset="iso\-2838\-4" | $RCVSTORE +spam/charset # Anyone quoting a numeric is suspicious, maybe a spammer, # or someone on dynamic DNS who doesnt want to be traced back. :0 B # JJLATER might FAIL: * http://[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+\.[[:digit:]]+ * http://[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+ $SPAM_NULL_NUMERIC_IP :0 B * http://% $SPAM_NULL_NUMERIC_IP :0 B * http://www\.% $SPAM_NULL_NUMERIC_IP # EG: http://%11%11.%11%11%11.%11%11%11%1e%11%11/%11%11%1a%1f%11%11%11%11%11 # All 1-9 converted to 1 so the spammers dont benefit # http://o%11%1Eo%11o%11s @oow %1Coosao%11ed bo%1A/ooo %11 /?fo %11oo :0 B * http://\&# # The # in the line above does not need to be delimited. $SPAM_NULL_NUMERIC_IP # Problem yet to solve # http://acl ro @wwo.oogoooono io/ooo /?o oteo # http://joocaooa1 @oow.hugoooooo boo/unoobocoibo.ooo?oiveoy # Intercept: http://11.111.111.11/ads/precision/debtspecialist # But not intercept EG 01051.com which is a non spamming # (as far as I know) cheap phone caller. # I have tested next line, it works. # As spammers send spam masquerading as me, lots of sites reject back to me # spam that I never sent. # I used to have these reject messages in my spam phrases list, # but to allow for times (such as during a reconfig) when I suspect I really # may have had a genuine bounce, it is better to seperate them here. # \<\<\< 550 Email rejected by sandiego.com spam blocker :0 B * ^banned filename in an email to you from: | $RCVSTORE +spam/filename :0 B * ^\<\<\< 550 Email rejected by * spam blocker | $RCVSTORE +spam/blocker :0 B * Action: failed * Relaying denied\. Proper authentication required\. | $RCVSTORE +error/auth-sasl :0 H * ^Received: by mail\.brierdr\.com # brierdr runs amavisd detector, forwards to me # Subject:\s*\*\*\* JUNK MAIL \*\*\*Original_spam_subject # Mime-Version: 1.0 # X-Spam-Status: Yes, hits=3.187 tagged_above=-999 required=1 # tests=BAYES_00, HELO_DYNAMIC_DHCP, HTML_10_20, # HTML_IMAGE_ONLY_24, HTML_MESSAGE, MSGID_FROM_MTA_ID # X-Spam-Level: \*\*\* # X-Spam-Flag: YES * ^Subject:\s*\*\*\* JUNK MAIL \*\*\* * ^X\-Spam\-Flag: YES | $RCVSTORE +spam/amavisd # Hashed out, as it caught mail from mjm_ERASE@codito.de & one other person. # :0 # * ^Received: from unknown # | $NOMIME_FORCE | $RCVSTORE +spam/unknown # JJLATER Block commented out till I add something, eg a "to:" clause # someone who genuinely mailed me as they were webmaster@www.somewhere # get caught by this # :0 B # * .[a-z][a-z][a-z]@www # JJLATER # | $RCVSTORE +spam/redirect # 2 letter country codes EG uk fm tv us it de # :0 B # * .[a-z][a-z]@www # | $RCVSTORE +spam/redirect # Other odd top level domain names: # :0 B # * .family@www # | $RCVSTORE +spam/redirect # :0 B # * .info@www # | $RCVSTORE +spam/redirect # :0 B # * .name@www # | $RCVSTORE +spam/redirect :0 B # MIME Enclosures: Much is just HTML spam, but not all. * ^Content\-type: audio $SPAM_NULL_FORMAT :0 B * ^Content\-Type: application/x\-shockwave\-flash $SPAM_NULL_FORMAT :0 B * ^Content\-Type: Content-Type: application/vnd\.ms-excel $SPAM_NULL_FORMAT :0 B # Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet; * ^Content\-Type: Content-Type: application/vnd\.openxmlformats $SPAM_NULL_FORMAT :0 B * ^Content\-Type: application/x\-msdownload $SPAM_NULL_FORMAT :0 B * ^Content\-Type: audio/x\-midi $SPAM_NULL_FORMAT # Cant use # * ^MIME-Version: # as EG Gary & Ernst send: # Mime-version: 1.0 # Content-type: text/plain; charset=us-ascii :0 H # Incompetent spammers run spam software unloaded # with addresses & subject, sending generic macro spam. * !^Subject: { :0 B * ^Content\-Type: text/html * ^Date: \%CURRENT_DATE_TIME * ^\%MESSAGE_BODY $SPAM_NULL_FORMAT } :0 H # High bit strings - Maybe 16 bit Chinese ? # Brackets compensator: [ # Example: Subject: ¦]À³¹L¦~¨ì¦³40»õ»È¦æ¥Á¶¡©ñ´Ú # Brackets compensator: [ # XD:.....: Subject: ?]???L?~????40??????????????? # XD:.....: 576666732A5CBB4A7AEAB33BFBCAEACBAAFBD0 # XD:.....: 352A534A06D039C6E8C6340B5B8665161914AA # To generate nasty high bit bytes in next line I used: # cd ~/src/bsd/jhs/bin/local/inob ; inob 0x80 > 80 ; inob 0xff > ff * ^Subject: .+[€-ÿ][€-ÿ][€-ÿ][€-ÿ][€-ÿ][€-ÿ] | $RCVSTORE +spam/subject_8bit # Asiatic excrement # Subject: =?UTF-8?B?2YrYs9ix2YbYpyDZhdi02KfYsdmD2KrZg9mFIA==?= # Subject: =?UTF-8?B?5Lq655Sf4piF5Y+Y5bm754Shy4rnqq4=?= :0 H * ^Subject: =\?UTF-8\?B\? * ^MIME-Version: 1.0 { :0 B * ^Content-Transfer-Encoding: base64 { :0 B * ^Content-Type: text/(plain|html); charset=UTF-8 | $RCVSTORE +spam/font/utf-8/suspect :0 B * ^Content-Disposition: attachment; filename="=\?UTF-8\?b\? # brackets.c " | $RCVSTORE +spam/font/utf-8/suspect :0 H # To: =?UTF-8?B?2YjYrdiv2Kkg2KfZhNio2LHYp9mF2Kwg2KfZhNiq2K/YsdmK2KjZitmH?= * ^To: =\?UTF-8\?B\? | $RCVSTORE +spam/font/utf-8/to-also-utf } } :0 H * ^Content-Type: multipart/mixed; { :0 B * ^This is a multi-part message in MIME format. * ^Content-Disposition: attachment; * ^Content-Transfer-Encoding: base64 * ^Content-Type: application/octet-stream; * ^Content-Type: multipart/alternative; * ^Content-Type: text/plain; * charset="utf-8" | $RCVSTORE +spam/font/utf-8/suspect } # :0 H # ( # * ^DEBUGtestingxxx:file=procmailrc_font:debug=last # | $RCVSTORE +test # # )