[exim-config] / spam.m4

### -*-m4-*-
###
### Spam filtering for distorted.org.uk Exim configuration
###
### (c) 2012 Mark Wooding
###

###----- Licensing notice ---------------------------------------------------
###
### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software Foundation,
### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

DIVERT(null)
###--------------------------------------------------------------------------
### Spam filtering.

## The Exim documentation tells lies.
##
## : *${run{*<_command_>* *<_args_>*}{*<_string1_>*}{*<_string2_>*}}*
## :     The command and its arguments are first expanded separately, [...]
##
## They aren't.  The whole command-and-args are expanded together, and then
## split at unquoted spaces.  This unpleasant hack sorts out the mess.
m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>)

## Utilities for collecting spam limits.
m4_define(<:SPAMLIMIT_CHECK:>,
	<:${if match{$1}{\N^-?[0-9]+$\N} {spam_limit=$1} {}}:>)

m4_define(<:SPAMLIMIT_ROUTER:>,
<:$1:
	driver = redirect
	data = :unknown:
	verify_only = true
	condition = ${if !eq{$acl_c_mode}{submission}}
	condition = ${extract{spam_limit}{$address_data}{false}{true}}:>)

m4_define(<:SPAMLIMIT_SET:>,
	<:address_data = \
		${if def:address_data {$address_data}{}} \
		$1:>)

m4_define(<:SPAMLIMIT_LOOKUP:>,
	<:condition = ${if exists{$1}}
	SPAMLIMIT_SET(<:${lookup {$2$3$4@$5/$6} nwildlsearch {$1} \
			       {SPAMLIMIT_CHECK($value)}}:>):>)

m4_define(<:SPAMLIMIT_USERV:>,
	<:SPAMLIMIT_SET(<:${run {/usr/bin/timeout 5s \
					userv CONF_userv_opts \
					SHQUOTE($1) exim-spam-limit \
					SHQUOTE($6) SHQUOTE($2) SHQUOTE($3) \
					SHQUOTE($4) SHQUOTE(@$5)} \
				{SPAMLIMIT_CHECK($value)}}:>):>)

SECTION(global, policy)m4_dnl
spamd_address = CONF_spamd_address CONF_spamd_port

SECTION(routers, allspam)m4_dnl
## If we're verifying an address and the recipient has a `~/.mail/spam-limit'
## file, then look up the recipient and sender addresses to find a plausible
## limit and insert it into the `address_data' where the RCPT ACL can find
## it.  This router always declines, so it doesn't affect the overall outcome
## of the verification.
SPAMLIMIT_ROUTER(fetch_spam_limit_lookup)
	check_local_user
	local_part_suffix = CONF_user_suffix_list
	local_part_suffix_optional = true
	SPAMLIMIT_LOOKUP(CONF_userconf_dir/spam-limit,
		$local_part_prefix, $local_part, $local_part_suffix, $domain,
		$sender_address)

SPAMLIMIT_ROUTER(fetch_spam_limit_userv)
	check_local_user
	local_part_suffix = CONF_user_suffix_list
	local_part_suffix_optional = true
	condition = ${if exists{CONF_userconf_dir/spam-limit.userv}}
	SPAMLIMIT_USERV(SHQUOTE($local_part),
		$local_part_prefix, $local_part, $local_part_suffix, $domain,
		$sender_address)

SECTION(acl, rcpt-hooks)m4_dnl
	## Do per-recipient spam-filter processing.
	require	 acl = rcpt_spam

SECTION(acl, misc)m4_dnl
skip_spam_check:

	## If the client is trusted, or this is a new submission, don't
	## bother with any of this.  We will have verified the sender
	## fairly aggressively before granting this level of trust.
	accept	 hosts = +trusted
	accept	 condition = ${if eq{$acl_c_mode}{submission}}

	## Otherwise we should check.
	deny

rcpt_spam:

	## See if we should do this check.
	accept	 acl = skip_spam_check

	## Always accept mail to `postmaster'.  Currently this is not
	## negotiable; maybe a tweak can be added to `domains.conf' if
	## necessary.
	accept	 local_parts = postmaster

	## Collect the user's spam threshold from the `address_data'
	## variable, where it was left by the `fetch_spam_limit' router
	## during recipient verification.  (This just saves duplicating this
	## enormous expression.)
	warn	 set acl_m_this_spam_limit = \
			${sg {${extract {spam_limit} \
					{${if def:address_data \
					      {$address_data}{}}} \
					{$value}{nil}}} \
			     {^(|.*\\D.*)\$}{CONF_spam_max}}

	## If there's a spam limit already established, and it's different
	## from this user's limit, then the sender will have to try this user
	## again later.
	defer	!hosts = +trusted
		 message = "You'd better try this one later"
		 condition = ${if def:acl_m_spam_limit {true}{false}}
		 condition = ${if ={$acl_m_spam_limit} \
				   {$acl_m_this_spam_limit} \
				  {false}{true}}

	## There's no limit set yet, or the user's limit is the same as the
	## existing one, or the client's local and we're not checking for
	## spam anyway.  Whichever way, it's safe to set it now.
	warn	 set acl_m_spam_limit = $acl_m_this_spam_limit

	## All done.
	accept

SECTION(acl, data-spam)m4_dnl
	## Do spam checking.
	require	 acl = data_spam

SECTION(acl, misc)m4_dnl
data_spam:

	## See if we should do this check.
	accept	 acl = skip_spam_check

	## Check header validity.
	require	 verify = header_syntax

	## Check the message for spam, comparing to the configured limit.
	deny	 spam = exim:true
		 message = Tinned meat product detected ($spam_score)
		 condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \
				  {true}{false}}

	## Insert headers from the spam check now that we've decided to
	## accept the message.
	warn

		 ## Convert the limit (currently 10x fixed point) into a
		 ## decimal for presentation.
		 set acl_m_spam_limit_presentation = \
			${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}}

		 ## Convert the report into something less obnoxious.  Plain
		 ## old SpamAssassin has an `X-Spam-Status' header which
		 ## lists the matched rules and provides some other basic
		 ## information.  Try to extract something similar from the
		 ## report.
		 ##
		 ## This is rather fiddly.

		 ## Firstly, escape angle brackets, because we'll be using
		 ## them for our own purposes.
		 set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}}

		 ## Trim off the blurb paragraph and the preview.  The rest
		 ## should be fairly well behaved.  Wrap double angle-
		 ## brackets around the remainder; these can't appear in the
		 ## body because we escaped them all earlier.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests} \
			    {\N^(?s).*\n Content analysis details:(.*)$\N} \
			    {<<\$1>>}}

		 ## Extract the information about the matching rules and
		 ## their scores.  Leave `<<...>>' around everything else.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests} \
			    {\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \
			    {>>\$2:\$1,<<}}

		 ## Strip everything still in `<<...>>' pairs, including any
		 ## escaped characters inside.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+|!.)*>>\N}{}}

		 ## Trim off a trailing comma.
		 set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}}

		 ## Undo the escaping.
		 set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}}

		 ## Insert the headers.
		 add_header = X-SpamAssassin-Score: \
			$spam_score/$acl_m_spam_limit_presentation \
			($spam_bar)
		 add_header = X-SpamAssassin-Status: \
			score=$spam_score, \
			limit=$acl_m_spam_limit_presentation, \n\t\
			tests=$acl_m_spam_tests

	## We're good.
	accept

DIVERT(null)
###----- That's all, folks --------------------------------------------------
Commit	Line	Data
185b5456 MW	1	### --m4--
	2	###
	3	### Spam filtering for distorted.org.uk Exim configuration
	4	###
	5	### (c) 2012 Mark Wooding
	6	###
	7
	8	###----- Licensing notice ---------------------------------------------------
	9	###
	10	### This program is free software; you can redistribute it and/or modify
	11	### it under the terms of the GNU General Public License as published by
	12	### the Free Software Foundation; either version 2 of the License, or
	13	### (at your option) any later version.
	14	###
	15	### This program is distributed in the hope that it will be useful,
	16	### but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	### GNU General Public License for more details.
	19	###
	20	### You should have received a copy of the GNU General Public License
	21	### along with this program; if not, write to the Free Software Foundation,
	22	### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	23
	24	DIVERT(null)
	25	###--------------------------------------------------------------------------
	26	### Spam filtering.
	27
953ae20e MW	28	## The Exim documentation tells lies.
	29	##
	30	## : ${run{<_command_>* <_args_>}{<_string1_>}{<_string2_>}}*
	31	## : The command and its arguments are first expanded separately, [...]
	32	##
	33	## They aren't. The whole command-and-args are expanded together, and then
	34	## split at unquoted spaces. This unpleasant hack sorts out the mess.
	35	m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>)
	36
02af00e7 MW	37	## Utilities for collecting spam limits.
	38	m4_define(<:SPAMLIMIT_CHECK:>,
	39	<:${if match{$1}{\N^-?[0-9]+$\N} {spam_limit=$1} {}}:>)
	40
	41	m4_define(<:SPAMLIMIT_ROUTER:>,
	42	<:$1:
	43	driver = redirect
	44	data = :unknown:
	45	verify_only = true
	46	condition = ${if !eq{$acl_c_mode}{submission}}
	47	condition = ${extract{spam_limit}{$address_data}{false}{true}}:>)
	48
	49	m4_define(<:SPAMLIMIT_SET:>,
	50	<:address_data = \
	51	${if def:address_data {$address_data}{}} \
	52	$1:>)
	53
	54	m4_define(<:SPAMLIMIT_LOOKUP:>,
	55	<:condition = ${if exists{$1}}
	56	SPAMLIMIT_SET(<:${lookup {$2$3$4@$5/$6} nwildlsearch {$1} \
	57	{SPAMLIMIT_CHECK($value)}}:>):>)
	58
	59	m4_define(<:SPAMLIMIT_USERV:>,
	60	<:SPAMLIMIT_SET(<:${run {/usr/bin/timeout 5s \
	61	userv CONF_userv_opts \
	62	SHQUOTE($1) exim-spam-limit \
	63	SHQUOTE($6) SHQUOTE($2) SHQUOTE($3) \
	64	SHQUOTE($4) SHQUOTE(@$5)} \
	65	{SPAMLIMIT_CHECK($value)}}:>):>)
	66
185b5456 MW	67	SECTION(global, policy)m4_dnl
	68	spamd_address = CONF_spamd_address CONF_spamd_port
	69
	70	SECTION(routers, allspam)m4_dnl
	71	## If we're verifying an address and the recipient has a `~/.mail/spam-limit'
	72	## file, then look up the recipient and sender addresses to find a plausible
	73	## limit and insert it into the `address_data' where the RCPT ACL can find
	74	## it. This router always declines, so it doesn't affect the overall outcome
	75	## of the verification.
02af00e7 MW	76	SPAMLIMIT_ROUTER(fetch_spam_limit_lookup)
02af00e7 MW	77	check_local_user
185b5456 MW	78	local_part_suffix = CONF_user_suffix_list
185b5456 MW	79	local_part_suffix_optional = true
02af00e7 MW	80	SPAMLIMIT_LOOKUP(CONF_userconf_dir/spam-limit,
	81	$local_part_prefix, $local_part, $local_part_suffix, $domain,
	82	$sender_address)
	83
	84	SPAMLIMIT_ROUTER(fetch_spam_limit_userv)
185b5456	85	check_local_user
02af00e7 MW	86	local_part_suffix = CONF_user_suffix_list
	87	local_part_suffix_optional = true
	88	condition = ${if exists{CONF_userconf_dir/spam-limit.userv}}
	89	SPAMLIMIT_USERV(SHQUOTE($local_part),
	90	$local_part_prefix, $local_part, $local_part_suffix, $domain,
	91	$sender_address)
185b5456 MW	92
	93	SECTION(acl, rcpt-hooks)m4_dnl
	94	## Do per-recipient spam-filter processing.
	95	require acl = rcpt_spam
	96
	97	SECTION(acl, misc)m4_dnl
b8b0f13c	98	skip_spam_check:
185b5456	99
b8b0f13c MW	100	## If the client is trusted, or this is a new submission, don't
	101	## bother with any of this. We will have verified the sender
	102	## fairly aggressively before granting this level of trust.
185b5456	103	accept hosts = +trusted
b8b0f13c MW	104	accept condition = ${if eq{$acl_c_mode}{submission}}
	105
	106	## Otherwise we should check.
	107	deny
	108
	109	rcpt_spam:
	110
	111	## See if we should do this check.
	112	accept acl = skip_spam_check
185b5456	113
aa935c91 MW	114	## Always accept mail to `postmaster'. Currently this is not
	115	## negotiable; maybe a tweak can be added to `domains.conf' if
	116	## necessary.
	117	accept local_parts = postmaster
	118
185b5456 MW	119	## Collect the user's spam threshold from the `address_data'
	120	## variable, where it was left by the `fetch_spam_limit' router
	121	## during recipient verification. (This just saves duplicating this
	122	## enormous expression.)
	123	warn set acl_m_this_spam_limit = \
	124	${sg {${extract {spam_limit} \
	125	{${if def:address_data \
	126	{$address_data}{}}} \
	127	{$value}{nil}}} \
	128	{^(\|.\\D.)\$}{CONF_spam_max}}
	129
	130	## If there's a spam limit already established, and it's different
	131	## from this user's limit, then the sender will have to try this user
	132	## again later.
	133	defer !hosts = +trusted
	134	message = "You'd better try this one later"
	135	condition = ${if def:acl_m_spam_limit {true}{false}}
	136	condition = ${if ={$acl_m_spam_limit} \
	137	{$acl_m_this_spam_limit} \
	138	{false}{true}}
	139
	140	## There's no limit set yet, or the user's limit is the same as the
	141	## existing one, or the client's local and we're not checking for
	142	## spam anyway. Whichever way, it's safe to set it now.
	143	warn set acl_m_spam_limit = $acl_m_this_spam_limit
	144
	145	## All done.
	146	accept
	147
	148	SECTION(acl, data-spam)m4_dnl
	149	## Do spam checking.
	150	require acl = data_spam
	151
	152	SECTION(acl, misc)m4_dnl
	153	data_spam:
	154
b8b0f13c MW	155	## See if we should do this check.
b8b0f13c MW	156	accept acl = skip_spam_check
185b5456	157
09ca3919 MW	158	## Check header validity.
	159	require verify = header_syntax
	160
185b5456 MW	161	## Check the message for spam, comparing to the configured limit.
	162	deny spam = exim:true
	163	message = Tinned meat product detected ($spam_score)
	164	condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \
	165	{true}{false}}
	166
	167	## Insert headers from the spam check now that we've decided to
	168	## accept the message.
	169	warn
a882a548	170
185b5456 MW	171	## Convert the limit (currently 10x fixed point) into a
	172	## decimal for presentation.
	173	set acl_m_spam_limit_presentation = \
	174	${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}}
	175
	176	## Convert the report into something less obnoxious. Plain
	177	## old SpamAssassin has an `X-Spam-Status' header which
	178	## lists the matched rules and provides some other basic
	179	## information. Try to extract something similar from the
	180	## report.
	181	##
	182	## This is rather fiddly.
	183
	184	## Firstly, escape angle brackets, because we'll be using
	185	## them for our own purposes.
	186	set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}}
	187
	188	## Trim off the blurb paragraph and the preview. The rest
	189	## should be fairly well behaved. Wrap double angle-
	190	## brackets around the remainder; these can't appear in the
	191	## body because we escaped them all earlier.
	192	set acl_m_spam_tests = \
	193	${sg{$acl_m_spam_tests} \
	194	{\N^(?s).\n Content analysis details:(.)$\N} \
	195	{<<\$1>>}}
	196
	197	## Extract the information about the matching rules and
	198	## their scores. Leave `<<...>>' around everything else.
	199	set acl_m_spam_tests = \
	200	${sg{$acl_m_spam_tests} \
4ff4d304	201	{\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \
185b5456 MW	202	{>>\$2:\$1,<<}}
	203
	204	## Strip everything still in `<<...>>' pairs, including any
	205	## escaped characters inside.
	206	set acl_m_spam_tests = \
	207	${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+\|!.)*>>\N}{}}
	208
	209	## Trim off a trailing comma.
	210	set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}}
	211
	212	## Undo the escaping.
	213	set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}}
	214
	215	## Insert the headers.
	216	add_header = X-SpamAssassin-Score: \
	217	$spam_score/$acl_m_spam_limit_presentation \
	218	($spam_bar)
	219	add_header = X-SpamAssassin-Status: \
	220	score=$spam_score, \
	221	limit=$acl_m_spam_limit_presentation, \n\t\
	222	tests=$acl_m_spam_tests
	223
185b5456 MW	224	## We're good.
	225	accept
	226
	227	DIVERT(null)
	228	###----- That's all, folks --------------------------------------------------