[exim-config] / spam.m4

### -*-m4-*-
###
### Spam filtering for distorted.org.uk Exim configuration
###
### (c) 2012 Mark Wooding
###

###----- Licensing notice ---------------------------------------------------
###
### This program is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.
###
### This program is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
### GNU General Public License for more details.
###
### You should have received a copy of the GNU General Public License
### along with this program; if not, write to the Free Software Foundation,
### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.

DIVERT(null)
###--------------------------------------------------------------------------
### Spam filtering.

## The Exim documentation tells lies.
##
## : *${run{*<_command_>* *<_args_>*}{*<_string1_>*}{*<_string2_>*}}*
## :     The command and its arguments are first expanded separately, [...]
##
## They aren't.  The whole command-and-args are expanded together, and then
## split at unquoted spaces.  This unpleasant hack sorts out the mess.
m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>)

SECTION(global, policy)m4_dnl
spamd_address = CONF_spamd_address CONF_spamd_port

SECTION(routers, allspam)m4_dnl
## If we're verifying an address and the recipient has a `~/.mail/spam-limit'
## file, then look up the recipient and sender addresses to find a plausible
## limit and insert it into the `address_data' where the RCPT ACL can find
## it.  This router always declines, so it doesn't affect the overall outcome
## of the verification.
fetch_spam_limit:
	driver = redirect
	data = :unknown:
	verify_only = true
	local_part_suffix = CONF_user_suffix_list
	local_part_suffix_optional = true
	check_local_user
	address_data = \
		${if def:address_data {$address_data}{}} \
		${if and {{!eq{$acl_c_mode}{submission}} \
			  {exists {CONF_userconf_dir/spam-limit}}} \
		     {${lookup {$local_part_prefix\
					$local_part\
					$local_part_suffix\
					@$domain/\
					$sender_address} \
			       nwildlsearch {CONF_userconf_dir/spam-limit} \
			       {${if match{$value}{\N^-?[0-9]+$\N} \
				     {spam_limit=$value} \
				     {}}} \
			       {}}} \
		     {}} \
		${if and {{!eq{$acl_c_mode}{submission}} \
			  {exists {CONF_userconf_dir/spam-limit.userv}}} \
		     {${run {/usr/bin/timeout 5s -- \
				userv SHQUOTE($local_part) exim-spam-limit \
					SHQUOTE($sender_address) \
					SHQUOTE($local_part_prefix) \
					SHQUOTE($local_part) \
					SHQUOTE($local_part_suffix) \
					SHQUOTE(@$domain)} \
			    {${if match{$value}{\N^-?[0-9]+$\N} \
				  {spam_limit=$value} \
				  {}}} \
			    {}}} \
		     {}}

SECTION(acl, rcpt-hooks)m4_dnl
	## Do per-recipient spam-filter processing.
	require	 acl = rcpt_spam

SECTION(acl, misc)m4_dnl
skip_spam_check:

	## If the client is trusted, or this is a new submission, don't
	## bother with any of this.  We will have verified the sender
	## fairly aggressively before granting this level of trust.
	accept	 hosts = +trusted
	accept	 condition = ${if eq{$acl_c_mode}{submission}}

	## Otherwise we should check.
	deny

rcpt_spam:

	## See if we should do this check.
	accept	 acl = skip_spam_check

	## Always accept mail to `postmaster'.  Currently this is not
	## negotiable; maybe a tweak can be added to `domains.conf' if
	## necessary.
	accept	 local_parts = postmaster

	## Collect the user's spam threshold from the `address_data'
	## variable, where it was left by the `fetch_spam_limit' router
	## during recipient verification.  (This just saves duplicating this
	## enormous expression.)
	warn	 set acl_m_this_spam_limit = \
			${sg {${extract {spam_limit} \
					{${if def:address_data \
					      {$address_data}{}}} \
					{$value}{nil}}} \
			     {^(|.*\\D.*)\$}{CONF_spam_max}}

	## If there's a spam limit already established, and it's different
	## from this user's limit, then the sender will have to try this user
	## again later.
	defer	!hosts = +trusted
		 message = "You'd better try this one later"
		 condition = ${if def:acl_m_spam_limit {true}{false}}
		 condition = ${if ={$acl_m_spam_limit} \
				   {$acl_m_this_spam_limit} \
				  {false}{true}}

	## There's no limit set yet, or the user's limit is the same as the
	## existing one, or the client's local and we're not checking for
	## spam anyway.  Whichever way, it's safe to set it now.
	warn	 set acl_m_spam_limit = $acl_m_this_spam_limit

	## All done.
	accept

SECTION(acl, data-spam)m4_dnl
	## Do spam checking.
	require	 acl = data_spam

SECTION(acl, misc)m4_dnl
data_spam:

	## See if we should do this check.
	accept	 acl = skip_spam_check

	## Check header validity.
	require	 verify = header_syntax

	## Check the message for spam, comparing to the configured limit.
	deny	 spam = exim:true
		 message = Tinned meat product detected ($spam_score)
		 condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \
				  {true}{false}}

	## Insert headers from the spam check now that we've decided to
	## accept the message.
	warn

		 ## Convert the limit (currently 10x fixed point) into a
		 ## decimal for presentation.
		 set acl_m_spam_limit_presentation = \
			${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}}

		 ## Convert the report into something less obnoxious.  Plain
		 ## old SpamAssassin has an `X-Spam-Status' header which
		 ## lists the matched rules and provides some other basic
		 ## information.  Try to extract something similar from the
		 ## report.
		 ##
		 ## This is rather fiddly.

		 ## Firstly, escape angle brackets, because we'll be using
		 ## them for our own purposes.
		 set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}}

		 ## Trim off the blurb paragraph and the preview.  The rest
		 ## should be fairly well behaved.  Wrap double angle-
		 ## brackets around the remainder; these can't appear in the
		 ## body because we escaped them all earlier.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests} \
			    {\N^(?s).*\n Content analysis details:(.*)$\N} \
			    {<<\$1>>}}

		 ## Extract the information about the matching rules and
		 ## their scores.  Leave `<<...>>' around everything else.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests} \
			    {\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \
			    {>>\$2:\$1,<<}}

		 ## Strip everything still in `<<...>>' pairs, including any
		 ## escaped characters inside.
		 set acl_m_spam_tests = \
			${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+|!.)*>>\N}{}}

		 ## Trim off a trailing comma.
		 set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}}

		 ## Undo the escaping.
		 set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}}

		 ## Insert the headers.
		 add_header = X-SpamAssassin-Score: \
			$spam_score/$acl_m_spam_limit_presentation \
			($spam_bar)
		 add_header = X-SpamAssassin-Status: \
			score=$spam_score, \
			limit=$acl_m_spam_limit_presentation, \n\t\
			tests=$acl_m_spam_tests

	## We're good.
	accept

DIVERT(null)
###----- That's all, folks --------------------------------------------------
Commit	Line	Data
185b5456 MW	1	### --m4--
	2	###
	3	### Spam filtering for distorted.org.uk Exim configuration
	4	###
	5	### (c) 2012 Mark Wooding
	6	###
	7
	8	###----- Licensing notice ---------------------------------------------------
	9	###
	10	### This program is free software; you can redistribute it and/or modify
	11	### it under the terms of the GNU General Public License as published by
	12	### the Free Software Foundation; either version 2 of the License, or
	13	### (at your option) any later version.
	14	###
	15	### This program is distributed in the hope that it will be useful,
	16	### but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	18	### GNU General Public License for more details.
	19	###
	20	### You should have received a copy of the GNU General Public License
	21	### along with this program; if not, write to the Free Software Foundation,
	22	### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
	23
	24	DIVERT(null)
	25	###--------------------------------------------------------------------------
	26	### Spam filtering.
	27
953ae20e MW	28	## The Exim documentation tells lies.
	29	##
	30	## : ${run{<_command_>* <_args_>}{<_string1_>}{<_string2_>}}*
	31	## : The command and its arguments are first expanded separately, [...]
	32	##
	33	## They aren't. The whole command-and-args are expanded together, and then
	34	## split at unquoted spaces. This unpleasant hack sorts out the mess.
	35	m4_define(<:SHQUOTE:>, <:"${rxquote:$1}":>)
	36
185b5456 MW	37	SECTION(global, policy)m4_dnl
	38	spamd_address = CONF_spamd_address CONF_spamd_port
	39
	40	SECTION(routers, allspam)m4_dnl
	41	## If we're verifying an address and the recipient has a `~/.mail/spam-limit'
	42	## file, then look up the recipient and sender addresses to find a plausible
	43	## limit and insert it into the `address_data' where the RCPT ACL can find
	44	## it. This router always declines, so it doesn't affect the overall outcome
	45	## of the verification.
	46	fetch_spam_limit:
	47	driver = redirect
	48	data = :unknown:
	49	verify_only = true
	50	local_part_suffix = CONF_user_suffix_list
	51	local_part_suffix_optional = true
	52	check_local_user
	53	address_data = \
	54	${if def:address_data {$address_data}{}} \
2fb42245 MW	55	${if and {{!eq{$acl_c_mode}{submission}} \
2fb42245 MW	56	{exists {CONF_userconf_dir/spam-limit}}} \
185b5456 MW	57	{${lookup {$local_part_prefix\
	58	$local_part\
	59	$local_part_suffix\
	60	@$domain/\
	61	$sender_address} \
	62	nwildlsearch {CONF_userconf_dir/spam-limit} \
b84f89e0	63	{${if match{$value}{\N^-?[0-9]+$\N} \
57e46ac8 MW	64	{spam_limit=$value} \
57e46ac8 MW	65	{}}} \
185b5456 MW	66	{}}} \
185b5456 MW	67	{}} \
2fb42245 MW	68	${if and {{!eq{$acl_c_mode}{submission}} \
2fb42245 MW	69	{exists {CONF_userconf_dir/spam-limit.userv}}} \
aa8d0e75	70	{${run {/usr/bin/timeout 5s -- \
953ae20e MW	71	userv SHQUOTE($local_part) exim-spam-limit \
	72	SHQUOTE($sender_address) \
	73	SHQUOTE($local_part_prefix) \
	74	SHQUOTE($local_part) \
	75	SHQUOTE($local_part_suffix) \
	76	SHQUOTE(@$domain)} \
b84f89e0	77	{${if match{$value}{\N^-?[0-9]+$\N} \
185b5456 MW	78	{spam_limit=$value} \
	79	{}}} \
	80	{}}} \
	81	{}}
	82
	83	SECTION(acl, rcpt-hooks)m4_dnl
	84	## Do per-recipient spam-filter processing.
	85	require acl = rcpt_spam
	86
	87	SECTION(acl, misc)m4_dnl
b8b0f13c	88	skip_spam_check:
185b5456	89
b8b0f13c MW	90	## If the client is trusted, or this is a new submission, don't
	91	## bother with any of this. We will have verified the sender
	92	## fairly aggressively before granting this level of trust.
185b5456	93	accept hosts = +trusted
b8b0f13c MW	94	accept condition = ${if eq{$acl_c_mode}{submission}}
	95
	96	## Otherwise we should check.
	97	deny
	98
	99	rcpt_spam:
	100
	101	## See if we should do this check.
	102	accept acl = skip_spam_check
185b5456	103
aa935c91 MW	104	## Always accept mail to `postmaster'. Currently this is not
	105	## negotiable; maybe a tweak can be added to `domains.conf' if
	106	## necessary.
	107	accept local_parts = postmaster
	108
185b5456 MW	109	## Collect the user's spam threshold from the `address_data'
	110	## variable, where it was left by the `fetch_spam_limit' router
	111	## during recipient verification. (This just saves duplicating this
	112	## enormous expression.)
	113	warn set acl_m_this_spam_limit = \
	114	${sg {${extract {spam_limit} \
	115	{${if def:address_data \
	116	{$address_data}{}}} \
	117	{$value}{nil}}} \
	118	{^(\|.\\D.)\$}{CONF_spam_max}}
	119
	120	## If there's a spam limit already established, and it's different
	121	## from this user's limit, then the sender will have to try this user
	122	## again later.
	123	defer !hosts = +trusted
	124	message = "You'd better try this one later"
	125	condition = ${if def:acl_m_spam_limit {true}{false}}
	126	condition = ${if ={$acl_m_spam_limit} \
	127	{$acl_m_this_spam_limit} \
	128	{false}{true}}
	129
	130	## There's no limit set yet, or the user's limit is the same as the
	131	## existing one, or the client's local and we're not checking for
	132	## spam anyway. Whichever way, it's safe to set it now.
	133	warn set acl_m_spam_limit = $acl_m_this_spam_limit
	134
	135	## All done.
	136	accept
	137
	138	SECTION(acl, data-spam)m4_dnl
	139	## Do spam checking.
	140	require acl = data_spam
	141
	142	SECTION(acl, misc)m4_dnl
	143	data_spam:
	144
b8b0f13c MW	145	## See if we should do this check.
b8b0f13c MW	146	accept acl = skip_spam_check
185b5456	147
09ca3919 MW	148	## Check header validity.
	149	require verify = header_syntax
	150
185b5456 MW	151	## Check the message for spam, comparing to the configured limit.
	152	deny spam = exim:true
	153	message = Tinned meat product detected ($spam_score)
	154	condition = ${if >{$spam_score_int}{$acl_m_spam_limit} \
	155	{true}{false}}
	156
	157	## Insert headers from the spam check now that we've decided to
	158	## accept the message.
	159	warn
a882a548	160
185b5456 MW	161	## Convert the limit (currently 10x fixed point) into a
	162	## decimal for presentation.
	163	set acl_m_spam_limit_presentation = \
	164	${sg{$acl_m_spam_limit}{\N(\d)$\N}{.\$1}}
	165
	166	## Convert the report into something less obnoxious. Plain
	167	## old SpamAssassin has an `X-Spam-Status' header which
	168	## lists the matched rules and provides some other basic
	169	## information. Try to extract something similar from the
	170	## report.
	171	##
	172	## This is rather fiddly.
	173
	174	## Firstly, escape angle brackets, because we'll be using
	175	## them for our own purposes.
	176	set acl_m_spam_tests = ${sg{$spam_report}{([!<>])}{!\$1}}
	177
	178	## Trim off the blurb paragraph and the preview. The rest
	179	## should be fairly well behaved. Wrap double angle-
	180	## brackets around the remainder; these can't appear in the
	181	## body because we escaped them all earlier.
	182	set acl_m_spam_tests = \
	183	${sg{$acl_m_spam_tests} \
	184	{\N^(?s).\n Content analysis details:(.)$\N} \
	185	{<<\$1>>}}
	186
	187	## Extract the information about the matching rules and
	188	## their scores. Leave `<<...>>' around everything else.
	189	set acl_m_spam_tests = \
	190	${sg{$acl_m_spam_tests} \
4ff4d304	191	{\N(?s)\n\s*(-?[\d.]+)\s+([-\w]+)\s\N} \
185b5456 MW	192	{>>\$2:\$1,<<}}
	193
	194	## Strip everything still in `<<...>>' pairs, including any
	195	## escaped characters inside.
	196	set acl_m_spam_tests = \
	197	${sg{$acl_m_spam_tests}{\N(?s)<<([^!>]+\|!.)*>>\N}{}}
	198
	199	## Trim off a trailing comma.
	200	set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{,\s*\$}{}}
	201
	202	## Undo the escaping.
	203	set acl_m_spam_tests = ${sg{$acl_m_spam_tests}{!(.)}{\$1}}
	204
	205	## Insert the headers.
	206	add_header = X-SpamAssassin-Score: \
	207	$spam_score/$acl_m_spam_limit_presentation \
	208	($spam_bar)
	209	add_header = X-SpamAssassin-Status: \
	210	score=$spam_score, \
	211	limit=$acl_m_spam_limit_presentation, \n\t\
	212	tests=$acl_m_spam_tests
	213
185b5456 MW	214	## We're good.
	215	accept
	216
	217	DIVERT(null)
	218	###----- That's all, folks --------------------------------------------------