| 1 | ### -*-m4-*- |
| 2 | ### |
| 3 | ### Spam filtering for distorted.org.uk Exim configuration |
| 4 | ### |
| 5 | ### (c) 2012 Mark Wooding |
| 6 | ### |
| 7 | |
| 8 | ###----- Licensing notice --------------------------------------------------- |
| 9 | ### |
| 10 | ### This program is free software; you can redistribute it and/or modify |
| 11 | ### it under the terms of the GNU General Public License as published by |
| 12 | ### the Free Software Foundation; either version 2 of the License, or |
| 13 | ### (at your option) any later version. |
| 14 | ### |
| 15 | ### This program is distributed in the hope that it will be useful, |
| 16 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ### GNU General Public License for more details. |
| 19 | ### |
| 20 | ### You should have received a copy of the GNU General Public License |
| 21 | ### along with this program; if not, write to the Free Software Foundation, |
| 22 | ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 23 | |
| 24 | DIVERT(null) |
| 25 | ###-------------------------------------------------------------------------- |
| 26 | ### Spam filtering. |
| 27 | |
| 28 | ## The Exim documentation tells lies. |
| 29 | ## |
| 30 | ## : *${run{*<_command_>* *<_args_>*}{*<_string1_>*}{*<_string2_>*}}* |
| 31 | ## : The command and its arguments are first expanded separately, [...] |
| 32 | ## |
| 33 | ## They aren't. The whole command-and-args are expanded together, and then |
| 34 | ## split at unquoted spaces. This unpleasant hack sorts out the mess. |
| 35 | m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>) |
| 36 | |
| 37 | ## Utilities for collecting spam limits. |
| 38 | m4_define(<:SPAMLIMIT_CHECK:>, |
| 39 | <:${if match{$1}{\N^-?[0-9]+$\N} {spam_limit=$1} {}}:>) |
| 40 | |
| 41 | m4_define(<:SPAMLIMIT_ROUTER:>, |
| 42 | <:$1: |
| 43 | driver = redirect |
| 44 | data = :unknown: |
| 45 | verify_only = true |
| 46 | condition = ${if !eq{$acl_c_mode}{submission}} |
| 47 | condition = ${extract{spam_limit}{$address_data}{false}{true}}:>) |
| 48 | |
| 49 | m4_define(<:SPAMLIMIT_SET:>, |
| 50 | <:address_data = \ |
| 51 | ${if def:address_data {$address_data}{}} \ |
| 52 | $1:>) |
| 53 | |
| 54 | m4_define(<:SPAMLIMIT_LOOKUP:>, |
| 55 | <:condition = ${if exists{$1}} |
| 56 | SPAMLIMIT_SET(<:${lookup {$2$3$4@$5/$6} nwildlsearch {$1} \ |
| 57 | {SPAMLIMIT_CHECK($value)}}:>):>) |
| 58 | |
| 59 | m4_define(<:SPAMLIMIT_USERV:>, |
| 60 | <:SPAMLIMIT_SET(<:${run {/usr/bin/timeout 5s \ |
| 61 | userv CONF_userv_opts \ |
| 62 | SHQUOTE($1) exim-spam-limit \ |
| 63 | SHQUOTE($6) SHQUOTE($2) SHQUOTE($3) \ |
| 64 | SHQUOTE($4) SHQUOTE(@$5)} \ |
| 65 | {SPAMLIMIT_CHECK($value)}}:>):>) |
| 66 | |
| 67 | SECTION(global, policy)m4_dnl |
| 68 | spamd_address = CONF_spamd_address CONF_spamd_port |
| 69 | |
| 70 | SECTION(routers, allspam)m4_dnl |
| 71 | ## If we're verifying an address and the recipient has a `~/.mail/spam-limit' |
| 72 | ## file, then look up the recipient and sender addresses to find a plausible |
| 73 | ## limit and insert it into the `address_data' where the RCPT ACL can find |
| 74 | ## it. This router always declines, so it doesn't affect the overall outcome |
| 75 | ## of the verification. |
| 76 | SPAMLIMIT_ROUTER(fetch_spam_limit_lookup) |
| 77 | check_local_user |
| 78 | local_part_suffix = CONF_user_suffix_list |
| 79 | local_part_suffix_optional = true |
| 80 | SPAMLIMIT_LOOKUP(CONF_userconf_dir/spam-limit, |
| 81 | $local_part_prefix, $local_part, $local_part_suffix, $domain, |
| 82 | $sender_address) |
| 83 | |
| 84 | SPAMLIMIT_ROUTER(fetch_spam_limit_userv) |
| 85 | check_local_user |
| 86 | local_part_suffix = CONF_user_suffix_list |
| 87 | local_part_suffix_optional = true |
| 88 | condition = ${if exists{CONF_userconf_dir/spam-limit.userv}} |
| 89 | SPAMLIMIT_USERV(SHQUOTE($local_part), |
| 90 | $local_part_prefix, $local_part, $local_part_suffix, $domain, |
| 91 | $sender_address) |
| 92 | |
| 93 | SECTION(acl, rcpt-hooks)m4_dnl |
| 94 | ## Do per-recipient spam-filter processing. |
| 95 | require acl = rcpt_spam |
| 96 | |
| 97 | SECTION(acl, misc)m4_dnl |
| 98 | skip_spam_check: |
| 99 | |
| 100 | ## If the client is trusted, or this is a new submission, don't |
| 101 | ## bother with any of this. We will have verified the sender |
| 102 | ## fairly aggressively before granting this level of trust. |
| 103 | accept hosts = +trusted |
| 104 | accept condition = ${if eq{$acl_c_mode}{submission}} |
| 105 | |
| 106 | ## If this is a virtual domain, and it says `spam-check=no', then |
| 107 | ## skip. |
| 108 | accept domains = ${if exists{CONF_sysconf_dir/domains.conf} \ |
| 109 | {partial0-lsearch; CONF_sysconf_dir/domains.conf} \ |
| 110 | {}} |
| 111 | !condition = DOMKV(spam-check, {${expand:$value}}{true}) |
| 112 | |
| 113 | ## Otherwise we should check. |
| 114 | deny |
| 115 | |
| 116 | rcpt_spam: |
| 117 | |
| 118 | ## See if we should do this check. |
| 119 | accept acl = skip_spam_check |
| 120 | |
| 121 | ## Always accept mail to `postmaster'. Currently this is not |
| 122 | ## negotiable; maybe a tweak can be added to `domains.conf' if |
| 123 | ## necessary. |
| 124 | accept local_parts = postmaster |
| 125 | |
| 126 | ## Collect the user's spam threshold from the `address_data' |
| 127 | ## variable, where it was left by the `fetch_spam_limit' router |
| 128 | ## during recipient verification. (This just saves duplicating this |
| 129 | ## enormous expression.) |
| 130 | warn set acl_m_this_spam_limit = \ |
| 131 | ${sg {${extract {spam_limit} \ |
| 132 | {${if def:address_data \ |
| 133 | {$address_data}{}}} \ |
| 134 | {$value}{nil}}} \ |
| 135 | {^(|.*\\D.*)\$}{CONF_spam_max}} |
| 136 | |
| 137 | ## If there's a spam limit already established, and it's different |
| 138 | ## from this user's limit, then the sender will have to try this user |
| 139 | ## again later. |
| 140 | defer !hosts = +trusted |
| 141 | message = "You'd better try this one later" |
| 142 | condition = ${if def:acl_m_spam_limit {true}{false}} |
| 143 | condition = ${if ={$acl_m_spam_limit} \ |
| 144 | {$acl_m_this_spam_limit} \ |
| 145 | {false}{true}} |
| 146 | |
| 147 | ## There's no limit set yet, or the user's limit is the same as the |
| 148 | ## existing one, or the client's local and we're not checking for |
| 149 | ## spam anyway. Whichever way, it's safe to set it now. |
| 150 | warn set acl_m_spam_limit = $acl_m_this_spam_limit |
| 151 | |
| 152 | ## All done. |
| 153 | accept |
| 154 | |
| 155 | SECTION(acl, data-spam)m4_dnl |
| 156 | ## Do spam checking. |
| 157 | require acl = data_spam |
| 158 | |
| 159 | SECTION(acl, misc)m4_dnl |
| 160 | data_spam: |
| 161 | |
| 162 | ## See if we should do this check. |
| 163 | accept acl = skip_spam_check |
| 164 | |
| 165 | ## Check header validity. |
| 166 | require verify = header_syntax |
| 167 | |
| 168 | ## Check the message for spam, comparing to the configured limit. |
| 169 | deny spam = exim:true |
| 170 | message = Tinned meat product detected ($spam_score) |
| 171 | condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \ |
| 172 | {true}{false}} |
| 173 | |
| 174 | ## Insert headers from the spam check now that we've decided to |
| 175 | ## accept the message. |
| 176 | warn |
| 177 | |
| 178 | ## Convert the limit (currently 10x fixed point) into a |
| 179 | ## decimal for presentation. |
| 180 | set acl_m_spam_limit_presentation = \ |
| 181 | ${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}} |
| 182 | |
| 183 | ## Convert the report into something less obnoxious. Plain |
| 184 | ## old SpamAssassin has an `X-Spam-Status' header which |
| 185 | ## lists the matched rules and provides some other basic |
| 186 | ## information. Try to extract something similar from the |
| 187 | ## report. |
| 188 | ## |
| 189 | ## This is rather fiddly. |
| 190 | |
| 191 | ## Firstly, escape angle brackets, because we'll be using |
| 192 | ## them for our own purposes. |
| 193 | set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}} |
| 194 | |
| 195 | ## Trim off the blurb paragraph and the preview. The rest |
| 196 | ## should be fairly well behaved. Wrap double angle- |
| 197 | ## brackets around the remainder; these can't appear in the |
| 198 | ## body because we escaped them all earlier. |
| 199 | set acl_m_spam_tests = \ |
| 200 | ${sg{$acl_m_spam_tests} \ |
| 201 | {\N^(?s).*\n Content analysis details:(.*)$\N} \ |
| 202 | {<<\$1>>}} |
| 203 | |
| 204 | ## Extract the information about the matching rules and |
| 205 | ## their scores. Leave `<<...>>' around everything else. |
| 206 | set acl_m_spam_tests = \ |
| 207 | ${sg{$acl_m_spam_tests} \ |
| 208 | {\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \ |
| 209 | {>>\$2:\$1,<<}} |
| 210 | |
| 211 | ## Strip everything still in `<<...>>' pairs, including any |
| 212 | ## escaped characters inside. |
| 213 | set acl_m_spam_tests = \ |
| 214 | ${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+|!.)*>>\N}{}} |
| 215 | |
| 216 | ## Trim off a trailing comma. |
| 217 | set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}} |
| 218 | |
| 219 | ## Undo the escaping. |
| 220 | set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}} |
| 221 | |
| 222 | ## Insert the headers. |
| 223 | add_header = X-SpamAssassin-Score: \ |
| 224 | $spam_score/$acl_m_spam_limit_presentation \ |
| 225 | ($spam_bar) |
| 226 | add_header = X-SpamAssassin-Status: \ |
| 227 | score=$spam_score, \ |
| 228 | limit=$acl_m_spam_limit_presentation, \n\t\ |
| 229 | tests=$acl_m_spam_tests |
| 230 | |
| 231 | ## We're good. |
| 232 | accept |
| 233 | |
| 234 | DIVERT(null) |
| 235 | ###----- That's all, folks -------------------------------------------------- |