#!/usr/local/bin/perl # # MKDIGEST(1) 1996 MKDIGEST(1) # # NAME # mkdigest - Create an rfc-compliant digest from an mbox file # of messages # # SYNOPSIS # mkdigest [-r style] [-h file] [-f file] [-t] < mbox > digest # # DESCRIPTION # mkdigest accepts an mbox-style file of email messages on its # standard input and produces a single digest of the messages # on its standard output. A mbox file is a file of email mes- # sages separated by lines beginning with the 5 characters # "From ". # # mkdigest does not produce the headers for the digest message # itself (except for MIME-Version: and Content-Type: if MIME- # style digests are being created). # # OPTIONS # -r style # The -r option controls the style of the digest. Three # digest styles are supported: "1153", which conforms to # RFC 1153, LISTSERV-style digests; "934", which conforms # to RFC 934, the preferred non-MIME digest format; and # "1521", which conforms to RFC 1521, the MIME # multipart/digest format. The default format is 934. # 1153 can also be referred to as "l"; 1521 can also be # referred to as "m". # # -h file # The -h option specifies a file to include in the # digest's prologue. The file is inserted before the # table of contents, if any. # # -f file # The -f option specifies a file to include as the # digest's epilogue. The file is inserted after the last # message in the digest. # # -t The -t option causes mkdigest to produce a table of # contents at the beginning of the digest, listing the # subject and author of each message. # # AUTHOR # Alan Schwartz # # SEE ALSO # Internet RFC's 934, 1153, 1521 # require 'getopts.pl'; # If run with no arguments, show usage info &usage if -t STDIN; # Handle command-line args &Getopts('h:f:tr:'); SWITCH: { $style = 934, last SWITCH unless $opt_r; $style = 1153, last SWITCH if $opt_r =~ /^l/i; $style = 1521, last SWITCH if $opt_r =~ /^m/i; $style = $opt_r, last SWITCH if ($opt_r == 934 || $opt_r == $1153 || $opt_r == 1521); &usage; } if ($opt_h) { if (open(HDR,$opt_h)) { @header = ; close(HDR); } else { warn "Unable to open header file $opt_h, skipping.\n"; } } if ($opt_f) { if (open(FTR,$opt_f)) { @footer = ; close(FTR); } else { warn "Unable to open footer file $opt_f, skipping.\n"; } } $table_of_contents = $opt_t; # We've got to read in the entire stdin in order to make # the table of contents and to ensure that mime boundaries # are unique. $boundary = "a", $bcount = 1 if $style == 1521; while () { # See if we're in headers, and snag subject, author if so. $inheader = 1,$msgno++ if !$inheader && /^From /; if ($inheader) { $inheader = 0 if /^$/; chop; if (/^([^:]+): (.*)/) { ($header,$value) = ($1,$2); $header =~ tr/-A-Z/_a-z/; eval "\$$header\[$msgno\] = \$value"; die $@ if $@; } elsif (/^\s+(.*)/) { $value = $1; eval "\$$header\[$msgno\] .= \" \$value\""; die $@ if $@; } } if ($style == 1521 && /^$boundary/) { substr($boundary,$bcount,1) = &different(substr($_,$bcount,1)); $bcount++; } push(@mbox,$_); } $nummsgs = $msgno; # The header of the digest. We'll make some fake headers. # RFC1153 specifies the subject header # RFC1521 specifies MIME headers. # Append the header file print "MIME-Version: 1.0\n", "Content-type: multipart/digest; boundary=\"$boundary\"\n" if $style == 1521; print "\n"; print @header; # Table of contents here if ($table_of_contents) { print "Table of Contents:\n\n"; for ($i = 1; $i <= $nummsgs; $i++) { printf " %-40s %-25s\n", substr($subject[$i],0,40),substr($from[$i],0,25); } print "\n"; } # First boundary if ($style == 1153) { print "\n","-"x70,"\n\n"; $finalboundary = $boundary = "\n------------------------------\n\n"; } elsif ($style == 934) { $finalboundary = $boundary = "\n------------------------------\n\n"; print $boundary; } else { $boundary = "--" . $boundary; $finalboundary = $boundary."--\n"; $boundary .= "\n"; print $boundary; } # Read in the messages and produce the digest # We expect each message to begin with an SMTP From: header, # followed by RFC822ish headers, a blank line, and a body. # $msgno = 0; $inheader = 0; foreach (@mbox) { if (!$inheader && /^From /) { $inheader = 1; $msgno++; print $boundary if $msgno > 1; } elsif ($inheader && /^$/) { $inheader = 0; # Construct headers in the order required by RFC 1153 print "Date: $date[$msgno]\n" if $date[$msgno]; print "From: $from[$msgno]\n" if $from[$msgno]; print "To: $to[$msgno]\n" if $to[$msgno]; print "Cc: $cc[$msgno]\n" if $cc[$msgno]; print "Subject: $subject[$msgno]\n" if $subject[$msgno]; print "Message-ID: $message_id[$msgno]\n" if $message_id[$msgno]; print "Keywords: $keywords[$msgno]\n" if $keywords[$msgno]; print "Summary: $summary[$msgno]\n" if $summary[$msgno]; print "\n"; } next if $inheader; $_ = "- $_" if ($style == 934 && /^-/); print; } print $finalboundary; # Append the footer file # RFC1153 specifies the digest epilogue. print @footer; sub usage { print < digest Options: -h headerfile File to put at the top of the digest -f footerfile File to put at the footer of the digest -t Create table of contents from message subjects/authors -r Make digest conform to the appropriate RFC# (1153, 934, or 1521). "m" (MIME) is 1521 "l" (LISTSERV) is 1153. 934 is the default EOP exit 1; } # Given a character, return any different 7-bit character # Simply return "A" if it's a lowercase letter and "a" if not. sub different { local($char) = $_[0]; return "A" if ($char =~ /[a-z]/); return "a"; }