#!/usr/bin/perl -w use strict; # $Id: mbox-purge,v 1.18 2005-03-01 16:57:15 roderick Exp $ # # Copyright (c) 1997 Roderick Schertler. All rights reserved. This # program is free software; you can redistribute it and/or modify it # under the same terms as Perl itself. use sigtrap qw(die untrapped normal-signals); use POSIX qw(:errno_h); use Proc::WaitStat qw(waitstat_die); use RS::Handy qw(:stat $Me xdie badinvo exclusive_create mbox_read_head mbox_read_body mbox_escape); use File::Spec::Functions qw[splitpath canonpath splitdir abs2rel]; # Compile code from the user. This comes before anything else so it # can't access my lexicals. sub user_eval { @_ == 1 || badinvo; local $SIG{__DIE__}; no strict 'vars'; return eval shift; } my $Usage = <(\\\$head, \\\$body, \\\$msg) returns true Any messages in the given files which match all the criteria are deleted. Use \`perldoc $Me\' to see the full documentation. EOF my $Debug = 0; my $Exit = 0; my $No = 0; my $Quiet = 0; my @Tmp = (); my $Verbose = 0; my $Archive = 0; my $Version = q$Revision: 1.18 $ =~ /(\d\S+)/ ? $1 : '?'; # globals with info about current message my $File_name = undef; my $Archive_name = undef; my $Msg_num = undef; my %Message_info = (); my $archivepath = "z_Archive"; # first found is used, sub can return either undef or -1 on failure my @Parse_date = qw(Date::Parse::str2time Date::GetDate::getdate); sub xwarn { RS::Handy::xwarn @_; $Exit ||= 1; } sub usage { xwarn @_ if @_; die $Usage; } sub info { print "$Me: ", @_, "\n" unless $Quiet; } sub verbose { print "$Me: ", @_, "\n" if $Verbose; } sub debug { print "debug: ", @_, "\n" if $Debug; } sub create_tmp { my ($orig) = @_; my ($base, $ext, $new, $fh); $base = "$orig.tmp.$$"; $ext = 1; $new = $base; until ($fh = exclusive_create $new) { $! == EEXIST or xdie "can't create $new:"; xdie "can't create a file named like $base.* in $ext tries\n" if $ext == 100; $ext++; $new = "$base.$ext"; } return $new, $fh; } { my $sub; sub parse_date { my ($in) = @_; if (!$sub) { for my $full (@Parse_date) { (my $mod = $full) =~ s/::[^:]+$// or die; next unless eval "require $mod"; if (!defined &$full) { xwarn "$mod doesn't define $full\n"; next; } debug "parse_date using $full"; $sub = do { no strict 'refs'; \&$full }; last; } $sub or xdie "no date parsing function available, I tried to load:", " @Parse_date\n"; } my $t = $sub->($in); if (defined $t && $t == -1) { $t = undef; } if ($Debug) { my $out = defined $t ? localtime $t : undef; debug sprintf "%s -> %s (%s)", map { defined $_ ? $_ : 'undef' } $in, $t, $out; } return $t; } } sub parse_from_line { @_ == 0 || badinvo; return if exists $Message_info{from_line}; if (${ $Message_info{rhead} } !~ /^(From\s+.*)/) { info "no From_ line in message $Msg_num of $File_name"; $Message_info{from_line} = undef; return; } $Message_info{from_line} = $1; if ($Message_info{from_line} !~ /^From \s+ (.*?) \s+ (\w\w\w \s \w\w\w \s+ \d+ \s+ \d+:\d+.*)/x) { info "can't parse From_ line in message $Msg_num of $File_name"; return; } $Message_info{from_sender} = $1; $Message_info{from_date} = $2; $Message_info{delivery_time} = parse_date $Message_info{from_date} or info "invalid delivery date ($Message_info{from_date})", " in message $Msg_num of $File_name"; } sub delivery_time { @_ == 0 || badinvo; parse_from_line unless exists $Message_info{from_line}; return $Message_info{delivery_time}; } sub envelope_sender { @_ == 0 || badinvo; parse_from_line unless exists $Message_info{from_line}; return $Message_info{from_sender}; } sub header_all { @_ == 1 || badinvo; my ($pat) = @_; return ${ $Message_info{rhead} } =~ /^$pat\s*:\s*(.*)/gim; } sub header_first { return (header_all @_)[0]; } sub header_last { return (header_all @_)[-1]; } sub main { my (@rule, $any_date); @ARGV || usage; while (@ARGV && $ARGV[0] =~ /^-/) { $_ = shift @ARGV; if ($_ eq '--') { last; } elsif (/^--?debug\z/) { $Debug = 1; } elsif ($_ eq '--help') { usage; } elsif ($_ eq '--no') { $No = 1; } elsif (/^--?quiet\z/) { $Quiet = 1; } elsif (/^--?verbose\z/) { $Verbose = 1; } elsif (/^--?archive\z/) { $Archive = 1; } elsif (/^--?noarchive\z/) { $Archive = 0; } elsif ($_ eq '--version') { print "$Me version $Version\n"; exit 0; } elsif (/^--?(before|after)\z/) { my $rule = $1; @ARGV or xdie "no arg for $rule\n"; my $spec = shift @ARGV; my $time = parse_date $spec; defined $time && $time > 0 or xdie "invalid time `$spec'\n"; # getdate() has ambituities (eg, 040101 = 2004-01-01), so # help by choking on dates in the future. if ($time > time) { xdie "$rule value is in the future (", scalar localtime $time, ")\n"; } push @rule, [$rule, $time]; $any_date = 1; } elsif (/^--?((head-|body-)?pattern)\z/) { my $rule = $1; @ARGV or xdie "no arg for $rule\n"; my $pat = shift @ARGV; my $sub = eval 'sub { ${ $_[0] } =~ /$pat/om }'; # Validate and compile the pattern by calling the closure # for the first time. eval { $sub->(\ "") }; # space after \ helps emacs if ($@) { $@ =~ s/ at .eval \d+. line \d+.\n//; xdie "invalid pattern `$pat': $@\n"; } push @rule, [$rule, $sub]; } elsif (/^--?(eval)\z/) { my $rule = $1; @ARGV or xdie "no arg for $rule\n"; my $code = shift @ARGV; my $sub = user_eval "sub { $code }"; if ($@) { $@ =~ s/ at .eval \d+. line \d+.\n//; xdie "invalid -eval code `$code': $@\n"; } push @rule, [$rule, $sub]; } else { usage "invalid switch $_\n"; } } @rule or usage "no rules specified\n"; @ARGV or usage "no files specified\n"; for my $file_name (@ARGV) { my ($lock_file, @stat, $new_file, $archive_file, $new_fh, $archive_fh, $n_kept, $a_kept, @stat2); $File_name = $file_name; verbose "processing $File_name"; $lock_file = "$File_name.lock"; system qw(lockfile -1 -r 10), $lock_file; waitstat_die $?, "lockfile for $lock_file"; push @Tmp, $lock_file; open FILE, $File_name or xdie "can't read $File_name:"; if ($Archive && !$No) { # split a path into logical pieces my ($volume, $dir_path, $file_fixed) = splitpath( $File_name ); $dir_path = canonpath $dir_path; unless ($dir_path) { $dir_path = "./"; } $archivepath = $dir_path."/".$archivepath; mkdir $archivepath; $Archive_name = $archivepath."/".$file_fixed."-Archive"; open($archive_fh,">>",$Archive_name) or xdie "can't write $Archive_name:"; } @stat = stat FILE or xdie "error statting open $File_name:"; ($new_file, $new_fh) = create_tmp $File_name; push @Tmp, $new_file; # XXX These are a security hole when this is run as root on user's # files. I need fchmod() and fchown(). chmod $stat[ST_MODE], $new_file or xdie "can't chmod $new_file:"; chown @stat[ST_UID, ST_GID], $new_file or xdie "can't chmod $new_file:"; $Msg_num = $n_kept = $a_kept = 0; while (my ($orig_head, $clen) = mbox_read_head *FILE) { my ($head, $body, $msg, $keep, $delivery_time); %Message_info = (); my $read_body = sub { $body = mbox_read_body *FILE, 0, $clen; $msg = "$orig_head\n$body"; }; ($head = $orig_head) =~ s/\n[ \t]+/ /g; $Msg_num++; $Message_info{rhead} = \$head; # I'm not using Mail::Header because it doesn't handle # From_.*\n>From headers. if ($any_date) { parse_from_line; $delivery_time = $Message_info{delivery_time}; } $keep = 0; for my $rrule (@rule) { my ($rule, @arg) = @$rrule; # This is done a little backwards. The default is to # purge messages. If a rule matches (meaning to purge # this message) a simple next is done. Any rule which # doesn't match (meaning to keep this message) falls to # the bottom from whence the loop is exited (since I # only purge if all rules match). if ($rule eq 'before') { next if defined $delivery_time && $delivery_time < $arg[0]; } elsif ($rule eq 'after') { next if defined $delivery_time && $delivery_time > $arg[0]; } elsif ($rule eq 'pattern') { $read_body->() if !defined $body; next if $arg[0]->(\$msg); } elsif ($rule eq 'head-pattern') { next if $arg[0]->(\$head); } elsif ($rule eq 'body-pattern') { $read_body->() if !defined $body; next if $arg[0]->(\$body); } elsif ($rule eq 'eval') { $read_body->() if !defined $body; next if $arg[0]->(\$head, \$body, \$msg); } else { xdie "bug: bad rule `$rule'\n"; } # This rule didn't match, therefore keep this message. $keep = 1; last; } # xxxxxxxxx if (!$keep && $Archive && !$No) { $read_body->() if !defined $body; $a_kept++; print $archive_fh mbox_escape $msg or xdie "error writing to $archive_file:"; } if (!$keep && $Archive && $No) { $a_kept++; } if (!$keep) { mbox_read_body *FILE, 1, $clen if !defined $body; next; } $read_body->() if !defined $body; $n_kept++; print $new_fh mbox_escape $msg or xdie "error writing to $new_file:"; } my $n_dropped = $Msg_num - $n_kept - $a_kept; close $new_fh or xdie "error closing $new_file:"; @stat2 = stat FILE or xdie "error doing stat 2 on open $File_name:"; $stat[ST_MTIME] == $stat2[ST_MTIME] or xdie "$File_name was modified while I had it locked\n"; close FILE or xdie "error closing $File_name:"; @Tmp = grep { $_ ne $new_file } @Tmp; if ($No || ($n_dropped == 0 && $a_kept == 0) ) { unlink $new_file or xdie "error unlinking $new_file:" } else { rename $new_file, $File_name or xdie "error renaming $new_file to $File_name:"; unlink $File_name or xdie "error unlinking $File_name:" if $n_kept == 0; } unlink $lock_file or xdie "error unlinking $lock_file:"; @Tmp = grep { $_ ne $lock_file } @Tmp; info sprintf "%5d kept %5d discarded %5d archived %s", $n_kept, $n_dropped, $a_kept, $File_name; } return 0; } END { for (@Tmp) { unless (unlink) { xwarn "error unlinking $_:"; $? = 1 unless $?; } } } $Exit = main || $Exit; $Exit = 1 if $Exit and not $Exit % 256; exit $Exit; __END__ =head1 NAME mbox-purge - perform batch deletion of mail messages from mbox files =head1 SYNOPSIS B [B<--debug>] [B<--help>] [B<--no>] [B<--quiet>] [B<--verbose>] [B<--version>] [B<--before> I] [B<--after> I] [B<--pattern> I] [B<--head-pattern> I] [B<--body-pattern> I] [B<--eval> I] I... =head1 DESCRIPTION B performs batch deletion of email messages from mbox format files based on rules you specify. It uses F-style locking (using B's B under the hood). Because of this you have to have write permission in the directory in which the I being processed is stored. The file to be processed can be in mbox, mboxrd, mboxcl, mboxcl2 or buggy Elm mboxcl2 format. The data written will always be in mboxrd format. See http://www.qmail.org/qmail-manual-html/man5/mbox.html for an explanation of these terms. =head1 OPTIONS - GENERAL =over =item B<--debug> Turn debugging on. =item B<--help> Show the help and die. =item B<--no> Don't actually modify any files, just go through the motions. =item B<--quiet> Suppress informational messages. =item B<--verbose> Output additional informational messages. =item B<--version> Show the version and exit. =back =head1 OPTIONS - MESSAGE SELECTION If multiple rules are given they all have to match for a message to be purged. =over =item B<--before> I Purge messages delivered before I. =item B<--after> I Purge messages delivered after I. =item B<--pattern> I Purge messages which match I. The pattern is run against the message after its mbox-style encoding has been unescaped. The match uses Perl's //m flag. =item B<--head-pattern> I Purge messages which match I. The pattern is run against the message after its mbox-style encoding has been unescaped. The match uses Perl's //m flag. Additionally, the headers have line continuations undone (newline followed by whitespace is replaced with a single space) before the match. =item B<--body-pattern> I Purge messages whose bodies match I. The pattern is run against the message after its mbox-style encoding has been unescaped. The match uses Perl's //m flag. =item B<--eval> I Evaluate I and purge the message if it returns true. I is compiled as the body of a subroutine. The subroutine receives references to the head, body and full text of the message as its arguments. The head argument has had continuation lines undone, and the body in both of the second arguments has had its mbox encoding unescaped. See also L. =back =head1 CONVENIENCE SUBS Here are some subs you can use from code passed in via B<-eval>: =over =item B Return the epoch time() when the message was delivered, as read from the From_ line. =item B Return the envelope sender, as read from the From_ line. =item B I Return the data part of all the header lines whose field names match I. Eg, my @recv = header_all 'Received'; =item B I =item B I These are like B, but they only return the first or last matching header. =item B I Return the epoch time() which corresponds to I, or B. =back =head1 EXAMPLES # Delete messages older than the given date from all your folders. mbox-purge --before 2000-05-01 ~/Mail/* # Delete messages from April 2000. mbox-purge --before 2000-05-01 --after 2000-03-31 file # Delete a chain letter from all user's mailboxes. mbox-purge \ --head-pattern '^Subject: (Re: )?GOOD LUCK TOTEM( \(fwd\))?$' \ /var/spool/mail/* # Delete messages larger than 1M. mbox-purge --eval 'length ${ $_[2] } > 1_000_000' file # Delete messages older than 6 months from all your folders. mbox-purge --eval 'time - delivery_time > 60*60*24 * 30 * 6' ~/Mail/* # Same, but use the Date: field's date rather than the delivery date. mbox-purge --eval 'time - parse_date(header_first "Date") > 60*60*24 * 30 * 6' ~/Mail/* =head1 BUGS You can't delete from your mail spool on a system which doesn't have a world-writable spool directory if you're a regular user, both because B doesn't know to special-case B's invocation for that and because it creates the temporary file in the same directory as the file it is processing. =head1 TODO - Add --purged-to (name?) to output purged messages somewhere? =head1 CHANGES $Log: mbox-purge,v $ Revision 1.18 2005-03-01 16:57:15 roderick Oops, set $File_name correctly. Revision 1.17 2004-09-02 10:49:57-04 roderick Important changes: Use the delivery date rather than the Date: header for --before and --after. Add and prefer --switch to -switch, but still allow the latter for old switches. Add convenience subs: delivery_time(), envelope_sender(), header_all(), header_first(), header_last(). Add --help, --no, --quiet, --verbose, --version. Less important: If no messages were purged from a file, leave it as is rather than replacing it with the new (identical) copy. Treat a parsed date of -1 as undef. Add %Message_info, $File_name, $Msg_num. Improve the usage message. For --eval, turn off strict vars, and don't let the user get at my lexicals. Don't trap signals which were ignored. =head1 AUTHOR Roderick Schertler > =cut