#!/usr/local/bin/perl -w
use strict;

# $Id: process-spam.pl,v 1.4 2004/10/07 03:49:44 perlstalker Exp $

use Pod::Usage;
use File::Find;
use DBI;

###############################################################################
# Configuration
###############################################################################
# Database driver.
my $db_driver = 'mysql';

# Database name
my $db_dbname = 'Accounts';

# Database host name
my $db_host = 'localhost';

# Database username and password
my $db_user = 'courier';
my $db_pass = 'pass';

# Number of times to retry DB connections.
my $db_retries = 3;

# SpamAssassin user preferences table.
my $userprefs_table = 'SA_userprefs';

# Log spam statistics
my $spam_log = '/usr/local/www/cgi-data/spam.log';

# Make script quieter.
my $quiet = 0;

# The default top level to the maildirs.
my $default_maildir = '/var/mail/virtual';

# Spam folder name
my $spam_folder = 'Spam';

# LEARNING OPTIONS
# Path to sa-learn
my $sa_learn = '/usr/local/bin/sa-learn';

# Path to dspam
my $dspam = '/usr/local/dspam/bin/dspam';

# These folder named should match $learn_spam_folder and $learn_fp_folder
# in sasql_conf.php
# Learn spam folder
my $learn_spam = 'Learn Spam';

# Learn false positive
my $learn_fp = 'Learn FP';

# Hmm. It seems that sa-learn doesn't support SQL user prefs or
# virtual users yet. :-( I've learned that SA is putting bayes stuff
# in SQL in HEAD so perhaps soon.
# Commands to run when learning spam
my @learn_spam_cmds = (
		       "$sa_learn --spam --no-rebuild --configpath $default_maildir/{user}/.spamassassin < '{file}'",
		       "$dspam --user {user} --addspam < '{file}'"
		       );

# Commands to run when learning false positives
my @learn_fp_cmds = (
		     "$sa_learn --ham --no-rebuild --configpath $default_maildir/{user}/.spamassassin < '{file}'",
		     "$dspam --user {user} --falsepositive < '{file}'"
		     );

###############################################################################
# Code "Abandon hope all ye enter here."
###############################################################################

sub DEBUG { 0 };

my $dbh = db_connect();
my $sth = prep_sth($dbh);

$| = 1;

my @args = @ARGV;
my $dry_run = 0;
my $default_stale_days = 14;
for (my $i = 0; $i < @args; $i++)
{
    if    ($args[$i] eq '-n') { $dry_run = 1; }
    elsif ($args[$i] eq '-d') { $default_stale_days = $args[$i+1]; }
}
$default_stale_days = 14 if not $default_stale_days or $default_stale_days =~ /\D/;

my $maildir = pop(@args) || $default_maildir;

print "Dry run: Not doing anything\n" if $dry_run;

my $spam_killed = 0;
my $spam_killed_size = 0;
my $spam_total = 0;
my $spam_total_size = 0;

process_spam();

if (not $quiet) {
    print "Total spam: $spam_total\n";
    print "Total size: $spam_total_size bytes (", bytes_to_human($spam_total_size, 'm'), " M)\n";
    print "Deleted: $spam_killed\n";
    print "Deleted size: $spam_killed_size bytes (", bytes_to_human($spam_killed_size, 'm'), " M)\n";
}

if ($spam_log and not $dry_run)
{
    open (LOG, ">>$spam_log") or die "Can't append to $spam_log: $!\n";
    print LOG time(), "|";
    print LOG "$spam_total|$spam_total_size|$spam_killed|$spam_killed_size\n";
    close LOG;
}

$dbh->disconnect;

sub process_spam { find (\&check_spam, $maildir); }

sub check_spam
{
    my $username = '';
    if ($File::Find::dir =~ m!/\Qvirtual\E/	# dir
	          (.+)		# domain
	          /../		# 1st 2 chars of user
                  (.+)		# username
                  /Maildir/	# The Maildir
	!x) {
	$username = "$2\@$1";
    }

    if ($File::Find::dir =~ m!/\.\Q$spam_folder\E/(?:new|cur)$!o) {
	rm_old_spam($File::Find::dir, $File::Find::name, $username);
    } elsif ($File::Find::dir =~ m!\.\Q$learn_spam\E/(?:new|cur)$!o) {
	learn_spam($File::Find::dir, $File::Find::name, $username);
    } elsif ($File::Find::dir =~ m!\.\Q$learn_fp\E/(?:new|cur)$!o) {
	learn_fp($File::Find::dir, $File::Find::name, $username);
    }
}

sub rm_old_spam
{
    my $dir = shift;
    my $name = shift;
    my $user = shift;

    my $stale_days = $default_stale_days;
    my $age = int(-M $_);
    my $size = -s _;
    ++$spam_total;
    $spam_total_size += $size;

    print "$dir\n" if DEBUG;

    if ($dir =~ m!/virtual/	# dir
	          (.+)		# domain
	          /../		# 1st 2 chars of user
                  (.+)		# username
                  /Maildir/	# The Maildir
	!x)
    {
	my ($domain, $user) = ($1, $2);
	my $acct = "$user\@$domain";
	print "$acct\n" if DEBUG;
	my $done = 0;
	my $attempt = 0;
	while ($attempt < $retries
	       and not $done) {
	    if ($sth->execute($acct))
	    {
		if (my $res = $sth->fetchrow_hashref())
		{
		    $stale_days = $res->{value};
		}
		$done = 1;
	    } elsif ($sth->errstr =~ /MySQL server has gone away/) {
		$dbh = db_connect();
		$sth = prep_sth($dbh);
	    }
	    else
	    {
		warn "Can't read settings for $acct: ".$sth->errstr."\n";
	    }
	    $sth->finish;
	}
    }
    
    print "User's stale_days = $stale_days\n" if DEBUG;
    
    if ($age >= $stale_days)
    {
	if ($dry_run)
	{
	    print("unlink $name\t",
		  "($age > ", "$stale_days",
		  ")\n");
	}
	else
	{
	    print("unlink $name age = $age\n") if DEBUG;
	    unlink $File::Find::name
		or warn "Can't unlink $name: $!\n";
	}
	++$spam_killed;
	$spam_killed_size += $size;
    }
}

sub learn_spam { run_learners(@_, 1); }

sub learn_fp { run_learners(@_, 0); }

sub run_learners
{
    my $dir = shift;
    my $file = shift;
    my $user = shift;
    my $spam = shift;

    my $cmds;

    if ($spam) {
	$cmds = [@learn_spam_cmds];
    } else {
	$cmds = [@learn_fp_cmds];
    }

    foreach my $cmd (@$cmds) {
	$cmd =~ s/\{user\}/$user/e;
	$cmd =~ s/\{file\}/$file/e;
	$cmd =~ s/\{dir\}/$dir/e;
	if (DEBUG) {
	    print "$cmd\n";
	} else {
	    system($cmd);
	}
    }
    unlink $file unless DEBUG;
}

sub help() { pod2usage(); }

sub bytes_to_human
{
    my $bytes = shift;
    my $format = shift || 'k';
    my $kb = $bytes / 1024;
    my $mb = $kb / 1024;
    my $gb = $mb / 1024;

    if    (lc $format eq 'b') { return $bytes; }
    elsif (lc $format eq 'k') { return $kb; }
    elsif (lc $format eq 'm') { return $mb; }
    elsif (lc $format eq 'g') { return $gb; }
}

sub db_connect
{
    my $dbh = DBI->connect("dbi:$db_driver:database=$db_dbname;host=$db_host",
			   $db_user,
			   $db_pass)
	or die "Can't connect to DB: ".$DBI::errstr."\n";
    return $dbh;
}

sub prep_sth
{
    my $dbh = shift;
    my $sql = "SELECT value from $userprefs_table where username=? and preference='x-spam-days';";
    my $sth = $dbh->prepare($sql) or die "Can't prepare SQL: ".$dbh->errstr."\n";
    return $sth;
}

__END__

=head1 NAME

process-spam.pl - Clean out old spam

=head1 SYNOPSIS

 process-spam.pl [-n] [-d days] maildir

=head1 DESCRIPTION

Clean out old messages from users' .Spam folders.

=over 4

=item -n

Dry run. Don't actually delete the files. Instead print what would have
been done.

=item -d days

Number of days for a file to be considered old.

=item maildir

The location of the mail dirs.

=back

=head1 LICENSE

 Copyright (c) 2003 Randy Smith
 All rights reserved.

 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
    notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above copyright
    notice, this list of conditions and the following disclaimer in the
    documentation and/or other materials provided with the distribution.

 THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 SUCH DAMAGE.

=head1 AUTHOR

Randy Smith <randys@amigo.net>

=cut

