
learn_bin="/usr/bin/sa-learn"

maildir="/var/qmail/mailnames"
maildir_subdir="Maildir"

train_subdir=".spamtrain"
train_confdir="$maildir/$train_subdir"

stamp_file="$train_confdir/last_update"

spam_dirs=''
spam_subdirs="*.Spam*"
spamassassin_subdir=".spamassassin"

ham_dirs=''
skip_subdirs=".Drafts .Sent .Trash .spamassassin @attachments courierimap* .sent-mail"

# do not train spamfilter if messages size greater than mas_size_max
msg_size_max="256000"

# remove messages from .Spam folder after period(in days)
delete_interval="30"

owner="popuser"
group="popuser"
system="$(uname -s)"

if [ "$system" = "Linux" ]; then
	su_opts="-s /bin/sh"
fi

accounts=''
domains=''

cur_time="$(date +'%s')"
expired_time="$(expr $cur_time - $delete_interval \* 3600 \* 24)"

#------------------------------------------------------------------------------

get_domains_list()
{
	if [ -z "$domains" ]; then
		domains="`find $maildir/ -maxdepth 1 -mindepth 1 -type d ! -name $train_subdir -exec basename {} \;`"	
	fi
}

get_accounts_list()
{
	local domain="$1"

	if [ -z "$domain" ]; then
		print_err "Domain not defined for searching mail accounts"
	fi

	accounts="`find $maildir/$domain -maxdepth 1 -mindepth 1 -type d -exec basename {} \;`"
}

get_spamdirs_list()
{
	local spam_find
	local domain="$1"
	local account="$2"

	if [ -z "$domain" -o -z "$account" ]; then
		print_err "Domain or account not defined for searching spam dirs"
	fi

	# generate find command for spam directories
	for dir in $spam_subdirs; do
		if [ -z "$spam_find" ]; then
			spam_find="find $maildir/$domain/$account/$maildir_subdir \
					-maxdepth 1 -mindepth 1 -type d -name "$dir""
			continue
		fi
		spam_find="$spam_find -or -name "$dir""
	done

	spam_find="$spam_find ! -name "tmp" ! -name "new" ! -name "cur" -exec basename {} \;"

	spam_dirs="`eval $spam_find`"
}

get_hamdirs_list()
{
	local ham_find
	local _domain="$1"
	local _account="$2"

	if [ -z "$domain" -o -z "$account" ]; then
		print_err "Domain or account not defined for searching spam dirs"
	fi

	for dir in $spam_subdirs $skip_subdirs; do
		if [ -z "$ham_find" ]; then
			ham_find="find $maildir/$domain/$account/$maildir_subdir \
					-maxdepth 1 -mindepth 1 -type d ! -name "$dir""
			continue
		fi
		ham_find="$ham_find -and ! -name "$dir"" 
	done

	ham_find="$ham_find ! -name "tmp" ! -name "new" ! -name "cur" -exec basename {} \;"

	ham_dirs=". `eval $ham_find`"
}

spam_learn()
{
	local type="$1"
	local path="$2"
	local folders="$3"
	local subdirs="cur"
	local stamp

	[ "$type" = "ham" -o "$type" = "spam" ] || print_err "Wrong type of content spam/ham: $type"

	if [ "$type" = "spam" ]; then
		subdirs="$subdirs new tmp"	
	fi
	
	if [ -f "$stamp_file" ]; then
		stamp="`cat $stamp_file`"
	fi
    
	for folder in $folders; do
		for dir in $subdirs; do
			for msg in `find $path/$maildir_subdir/$folder/$dir -type f`; do
				msg_file="${msg##*/}"
				msg_date="${msg_file%%.*}"

				if [ -n "$stamp" ]; then
					# skip messages greater than size limit..
					msg_size="`/usr/bin/stat -c '%s' $msg`"
					if [ "$msg_size" -gt "$msg_size_max" ]; then
						continue
					fi 

					if [ "$msg_date" -gt "$stamp" ]; then
						su - $owner $su_opts -c "$learn_bin --$type --no-sync -L \
							--dbpath $path/$spamassassin_subdir $msg"
					fi

					# Remove spam messages from mailbox after training
					if [ "$type" = "spam" -a "$msg_date" -lt "$expired_time" ]; then
						rm -f $msg
					fi

					continue
				fi 

				su - $owner $su_opts -c "$learn_bin --$type --no-sync -L \
					--dbpath $path/$spamassassin_subdir $msg"

				# Remove spam messages from mailbox after training
				if [ "$type" = "spam" -a "$msg_date" -lt "$expired_time" ]; then
					rm -f $msg
				fi
			done
		done
	done

	su - $owner $su_opts -c "$learn_bin --sync --dbpath $path/$spamassassin_subdir"
}

spam_train()
{
	local cur_domain
	local cur_account

	local found
	local domain_file="$train_confdir/domain"
	local account_file="$train_confdir/account"

	if [ -f "$domain_file" -a -f "$account_file" ]; then
		cur_domain="`cat $domain_file`"
		cur_account="`cat $account_file`"
	fi

	get_domains_list

	for domain in $domains; do
		# begin from previous point if the training was stopped or killed before..
		if [ -n "$cur_domain" -a -n "$cur_account" ]; then
			if [ "$domain" != "$cur_domain" -a "$account" != "$cur_account" ]; then
				continue
			fi
		fi

		found=1

		get_accounts_list "$domain"

		echo "$domain" > $domain_file

		for account in $accounts; do
			get_spamdirs_list "$domain" "$account"
			get_hamdirs_list "$domain" "$account"

			if [ ! -d "$maildir/$domain/$account/$spamassassin_subdir" ]; then
				mkdir $maildir/$domain/$account/$spamassassin_subdir || \
					print_err "Unable to create spamassassin subdir for ${account}@${domain}"

				chmod 700 $maildir/$domain/$account/$spamassassin_subdir
				chown $owner:$group $maildir/$domain/$account/$spamassassin_subdir
			fi

			if [ -n "$spam_dirs" ]; then
				spam_learn "spam" "$maildir/$domain/$account" "$spam_dirs"
				spam_learn "ham" "$maildir/$domain/$account" "$ham_dirs"
			fi

			echo "$account" > $account_file
		done
	done

	# remove reference on domain/account 
	# if checking was not killed/stopped by timeout etc..
	rm -f $domain_file $account_file

	# Looking for all domains/accounts if reference domain/account
	# was removed after last spamfilter learning
	if [ -z "$found" -a -n "$domains" ]; then
		spam_train
	fi

	echo "$cur_time" >$stamp_file
}

print_err()
{
	echo "ERROR: $*"
	exit 1
}

#------------------------------------------------------------------------------

# The some checks for avoid a stupid errors
if [ -z "$maildir" -o -z "$learn_bin" ]; then
	print_err "There are some arguments is not defined."
fi

if [ ! -d "$train_confdir" ]; then
	mkdir $train_confdir || print_err "Unable to create config dir: $train_confdir"
fi

spam_train


