| 1 | ### -*-m4-*- |
| 2 | ### |
| 3 | ### Spam filtering for distorted.org.uk Exim configuration |
| 4 | ### |
| 5 | ### (c) 2012 Mark Wooding |
| 6 | ### |
| 7 | |
| 8 | ###----- Licensing notice --------------------------------------------------- |
| 9 | ### |
| 10 | ### This program is free software; you can redistribute it and/or modify |
| 11 | ### it under the terms of the GNU General Public License as published by |
| 12 | ### the Free Software Foundation; either version 2 of the License, or |
| 13 | ### (at your option) any later version. |
| 14 | ### |
| 15 | ### This program is distributed in the hope that it will be useful, |
| 16 | ### but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 17 | ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 18 | ### GNU General Public License for more details. |
| 19 | ### |
| 20 | ### You should have received a copy of the GNU General Public License |
| 21 | ### along with this program; if not, write to the Free Software Foundation, |
| 22 | ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
| 23 | |
| 24 | DIVERT(null) |
| 25 | ###-------------------------------------------------------------------------- |
| 26 | ### Spam filtering. |
| 27 | |
| 28 | ## The Exim documentation tells lies. |
| 29 | ## |
| 30 | ## : *${run{*<_command_>* *<_args_>*}{*<_string1_>*}{*<_string2_>*}}* |
| 31 | ## : The command and its arguments are first expanded separately, [...] |
| 32 | ## |
| 33 | ## They aren't. The whole command-and-args are expanded together, and then |
| 34 | ## split at unquoted spaces. This unpleasant hack sorts out the mess. |
| 35 | m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>) |
| 36 | |
| 37 | ## Utilities for collecting spam limits. |
| 38 | m4_define(<:SPAMLIMIT_CHECK:>, |
| 39 | <:${if match{$1}{\N^-?[0-9]+$\N} {spam_limit=$1} {}}:>) |
| 40 | |
| 41 | m4_define(<:SPAMLIMIT_ROUTER:>, |
| 42 | <:$1: |
| 43 | driver = redirect |
| 44 | data = :unknown: |
| 45 | verify_only = true |
| 46 | condition = ${if !eq{$acl_c_mode}{submission}} |
| 47 | condition = ${extract{spam_limit}{$address_data}{false}{true}}:>) |
| 48 | |
| 49 | m4_define(<:SPAMLIMIT_SET:>, |
| 50 | <:address_data = \ |
| 51 | ${if def:address_data {$address_data}{}} \ |
| 52 | m4_ifelse(<:$2:>, <::>, <::>, <:$2 \ |
| 53 | :>)$1:>) |
| 54 | |
| 55 | m4_define(<:SPAMLIMIT_LOOKUP:>, |
| 56 | <:condition = ${if exists{$1}} |
| 57 | SPAMLIMIT_SET(<:${lookup {$2@$3/$4} nwildlsearch {$1} \ |
| 58 | {SPAMLIMIT_CHECK(<:$value:>)}}:>, <:$5:>):>) |
| 59 | |
| 60 | m4_define(<:SPAMLIMIT_USERV:>, |
| 61 | <:SPAMLIMIT_SET(<:${run {/usr/bin/timeout 5s \ |
| 62 | /usr/bin/userv CONF_userv_opts \ |
| 63 | SHQUOTE($1) exim-spam-limit \ |
| 64 | SHQUOTE($4) \ |
| 65 | SHQUOTE($2) SHQUOTE(@$3)} \ |
| 66 | {SPAMLIMIT_CHECK(<:$value:>)}}:>, <:$5:>):>) |
| 67 | |
| 68 | m4_define(<:GET_ADDRDATA:>, |
| 69 | <:extract{<:$1:>}{${if def:address_data{$address_data}{}}}:>) |
| 70 | |
| 71 | SECTION(global, policy)m4_dnl |
| 72 | spamd_address = CONF_spamd_address CONF_spamd_port |
| 73 | |
| 74 | SECTION(acl, rcpt-hooks)m4_dnl |
| 75 | ## Do per-recipient spam-filter processing. |
| 76 | require acl = rcpt_spam |
| 77 | |
| 78 | SECTION(acl, misc)m4_dnl |
| 79 | skip_spam_check: |
| 80 | |
| 81 | ## If the client is trusted, or this is a new submission, don't |
| 82 | ## bother with any of this. We will have verified the sender |
| 83 | ## fairly aggressively before granting this level of trust. |
| 84 | accept hosts = +trusted |
| 85 | accept condition = ${if eq{$acl_c_mode}{submission}} |
| 86 | |
| 87 | ## If all domains have disabled spam checking then don't check. |
| 88 | accept !condition = $acl_c_spam_check_domain |
| 89 | |
| 90 | ## Otherwise we should check. |
| 91 | deny |
| 92 | |
| 93 | rcpt_spam: |
| 94 | |
| 95 | ## If this is a virtual domain, and it says `spam-check=no', then we |
| 96 | ## shouldn't check spam. But we can't check domains at DATA time, so |
| 97 | ## instead we must track whether all recipients have disabled |
| 98 | ## checking. |
| 99 | warn !domains = ${if exists{CONF_sysconf_dir/domains.conf} \ |
| 100 | {partial0-lsearch; CONF_sysconf_dir/domains.conf} \ |
| 101 | {}} |
| 102 | set acl_c_spam_check_domain = true |
| 103 | warn !condition = $acl_c_spam_check_domain |
| 104 | condition = DOMKV(spam-check, {${expand:$value}}{true}) |
| 105 | set acl_c_spam_check_domain = true |
| 106 | |
| 107 | ## See if we should do this check. |
| 108 | accept acl = skip_spam_check |
| 109 | |
| 110 | ## Always accept mail to `postmaster'. Currently this is not |
| 111 | ## negotiable; maybe a tweak can be added to `domains.conf' if |
| 112 | ## necessary. |
| 113 | accept local_parts = postmaster |
| 114 | |
| 115 | ## Collect the user's spam threshold from the `address_data' |
| 116 | ## variable, where it was left by the `fetch_spam_limit' router |
| 117 | ## during recipient verification. (This just saves duplicating this |
| 118 | ## enormous expression.) |
| 119 | warn set acl_m_this_spam_limit = \ |
| 120 | ${sg {${GET_ADDRDATA(spam_limit){$value}{nil}}} \ |
| 121 | {^(|.*\\D.*)\$}{CONF_spam_max}} |
| 122 | |
| 123 | warn condition = ${GET_ADDRDATA(user){true}{false}} |
| 124 | set acl_m_spam_users = \ |
| 125 | ${if def:acl_m_spam_users {$acl_m_spam_users::}{}}\ |
| 126 | ${GET_ADDRDATA(user) \ |
| 127 | {$value=${sg{$local_part@$domain}\ |
| 128 | {([!:])}{!\$1}}} \ |
| 129 | fail} |
| 130 | |
| 131 | ## If there's a spam limit already established, and it's different |
| 132 | ## from this user's limit, then the sender will have to try this user |
| 133 | ## again later. |
| 134 | defer !hosts = +trusted |
| 135 | message = "You'd better try this one later" |
| 136 | condition = ${if def:acl_m_spam_limit {true}{false}} |
| 137 | condition = ${if ={$acl_m_spam_limit} \ |
| 138 | {$acl_m_this_spam_limit} \ |
| 139 | {false}{true}} |
| 140 | |
| 141 | ## There's no limit set yet, or the user's limit is the same as the |
| 142 | ## existing one, or the client's local and we're not checking for |
| 143 | ## spam anyway. Whichever way, it's safe to set it now. |
| 144 | warn set acl_m_spam_limit = $acl_m_this_spam_limit |
| 145 | |
| 146 | ## All done. |
| 147 | accept |
| 148 | |
| 149 | SECTION(acl, data-hooks)m4_dnl |
| 150 | ## Do spam checking. |
| 151 | require acl = data_spam |
| 152 | |
| 153 | SECTION(acl, misc)m4_dnl |
| 154 | data_spam: |
| 155 | |
| 156 | ## See if we should do this check. |
| 157 | accept acl = skip_spam_check |
| 158 | |
| 159 | ## Check header validity. |
| 160 | require verify = header_syntax |
| 161 | |
| 162 | ## Check the message for spam, comparing to the configured limit. |
| 163 | warn spam = exim:true |
| 164 | |
| 165 | ## Format some reporting stuff. |
| 166 | warn |
| 167 | |
| 168 | ## Convert the limit (currently 10x fixed point) into a |
| 169 | ## decimal for presentation. |
| 170 | set acl_m_spam_limit_presentation = \ |
| 171 | ${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}} |
| 172 | |
| 173 | ## Convert the report into something less obnoxious. Plain |
| 174 | ## old SpamAssassin has an `X-Spam-Status' header which |
| 175 | ## lists the matched rules and provides some other basic |
| 176 | ## information. Try to extract something similar from the |
| 177 | ## report. |
| 178 | ## |
| 179 | ## This is rather fiddly. |
| 180 | |
| 181 | ## Firstly, escape angle brackets, because we'll be using |
| 182 | ## them for our own purposes. |
| 183 | set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}} |
| 184 | |
| 185 | ## Trim off the blurb paragraph and the preview. The rest |
| 186 | ## should be fairly well behaved. Wrap double angle- |
| 187 | ## brackets around the remainder; these can't appear in the |
| 188 | ## body because we escaped them all earlier. |
| 189 | set acl_m_spam_tests = \ |
| 190 | ${sg{$acl_m_spam_tests} \ |
| 191 | {\N^(?s).*\n Content analysis details:(.*)$\N} \ |
| 192 | {<<\$1>>}} |
| 193 | |
| 194 | ## Extract the information about the matching rules and |
| 195 | ## their scores. Leave `<<...>>' around everything else. |
| 196 | set acl_m_spam_tests = \ |
| 197 | ${sg{$acl_m_spam_tests} \ |
| 198 | {\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \ |
| 199 | {>>\$2:\$1,<<}} |
| 200 | |
| 201 | ## Strip everything still in `<<...>>' pairs, including any |
| 202 | ## escaped characters inside. |
| 203 | set acl_m_spam_tests = \ |
| 204 | ${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+|!.)*>>\N}{}} |
| 205 | |
| 206 | ## Trim off a trailing comma. |
| 207 | set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}} |
| 208 | |
| 209 | ## Undo the escaping. |
| 210 | set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}} |
| 211 | |
| 212 | ## If we've decided to reject, then leave a dropping in the log file |
| 213 | ## so that users can analyse rejections for incoming messages, and |
| 214 | ## tell the sender to get knotted. |
| 215 | deny message = Tinned meat product detected ($spam_score) |
| 216 | log_message = Spam rejection \ |
| 217 | score=$spam_score \ |
| 218 | limit=$acl_m_spam_limit_presentation \ |
| 219 | tests=$acl_m_spam_tests \ |
| 220 | users=$acl_m_spam_users |
| 221 | condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \ |
| 222 | {true}{false}} |
| 223 | |
| 224 | ## Insert headers from the spam check now that we've decided to |
| 225 | ## accept the message. |
| 226 | warn |
| 227 | ADD_HEADER(<:X-CONF_header_token-SpamAssassin-Score: \ |
| 228 | $spam_score/$acl_m_spam_limit_presentation \ |
| 229 | ($spam_bar):>) |
| 230 | ADD_HEADER(<:X-CONF_header_token-SpamAssassin-Status: \ |
| 231 | score=$spam_score, \ |
| 232 | limit=$acl_m_spam_limit_presentation, \n\t\ |
| 233 | tests=$acl_m_spam_tests:>) |
| 234 | |
| 235 | ## We're good. |
| 236 | accept |
| 237 | |
| 238 | DIVERT(null) |
| 239 | ###----- That's all, folks -------------------------------------------------- |