#!/usr/bin/perl -w
##
## email2sms - email to SMS formatter
##
## Copyright (c) 1999--2000 Adam Spiers <adam@spiers.net>. 
## Miniscule portions Copyright (c) 1999 Ry4an Brase <ry4an@ry4an.org>.
##
## All rights reserved. This program is free software; you can redistribute
## it and/or modify it under the same terms as Perl itself.
##
## $Id$
##


use strict;

use Lingua::EN::Squeeze;
use MIME::Entity;
use MIME::Body;
use MIME::Parser;
use Getopt::Std;


##
## Process options and config file
##

my %opts = ();
getopts('f:h', \%opts);

my $configfile = $opts{'f'} || "$ENV{HOME}/.email2smsrc";

if (@ARGV or $opts{h}) {
  die <<USAGE;
email2sms, (c) 1999 Adam Spiers <adam\@spiers.net>

Command-line usage: email2sms [ -f configfile ] < email_in > sms_out

Please see the accompanying README/INSTALL files for full instructions.
USAGE
}

# Configuration defaults
my %conf = (
            maxlen  => 160,
            logfile => '',
            section => '|',
            newline => '|',
            attrib  => '',
            quoted  => '',
            squeeze_modes => [ 'noconv' ],
            optimize => 0,
            respond => 0,
            smtphost => 'localhost',
           );

&parse_config_file($configfile);

# Open log file
if ($conf{logfile}) {
  open(LOG, ">>$conf{logfile}")
    or die "Couldn't open log file $conf{logfile} for appending.\n";
}


##
## Parse and munge e-mail
##

# FIXME: This is a security hole!
my $tmp_dir = "/tmp/email2sms.$>.$$";

&log_this("Using $tmp_dir as MIME temporary directory\n");
mkdir $tmp_dir, 0700 or die "mkdir: $!";
my $parser  = new MIME::Parser;
$parser->output_dir($tmp_dir);

my $mail_in = $parser->read(\*STDIN)
  or die "Couldn't parse STDIN as MIME stream\n";

my $body_in = join '', @{ body $mail_in };

my $why_not = &check_content_type($mail_in);
die "$why_not\n" if $why_not;

# These globals are our scratchpad, and get used by &final_out()
my ($from_in, $from_out, $subject_out, $body_out);

# Munge body first
$body_out = $body_in;
&munge_body($body_in);

# Then munge header, depending on how much we managed to squeeze the body
my $header_in = head $mail_in;
my $header_out = &munge_header($header_in);


##
## Send message
##

my $sms = substr(&final_out, 0, $conf{maxlen});

&log_this("Final message:\n$sms\n");
&log_delim();
&log_this("Final length: ", length($sms), "\n");

if ($conf{respond}) {
  my $matched = eval qq{\$from_in =~ $conf{respond}};
  if ($@) {
    &log_delim();
    &log_this("`respond' regexp $conf{respond} didn't compile:\n  $@\n");
  }
  elsif ($matched) {
    &log_delim();
    &respond($mail_in);
  }
  else {
    &log_delim();
    &log_this("didn't match respond regexp\n");
  }
}

&log_delim('-');

print $sms, "\n";

my $all_tmps = "$tmp_dir/*";
unlink glob($all_tmps) or die "unlink: $!";
rmdir $tmp_dir;

exit 0;


##############################################################################

sub parse_config_file {
  my $config_file = shift;

  open(CONFIG, $config_file)
    or die "Couldn't open config file $config_file: $!\n";
  while (<CONFIG>) {
    next if /^\s*\#/ || /^\s*$/;           # damn cperl-mode

    s/^\s*//;                   # trim leading whitespace

    # This is a butt-ugly switch
    if (/^maxlen\s+(\d+)/) {
      $conf{maxlen} = $1;
    }
    elsif (/^logfile\s+(.*?)\s*$/) {
      ($conf{logfile} = $1) =~ s/~/$ENV{HOME}/g;
      $conf{logfile} =~ s/\$(\w)/$ENV{$1}/g;
    }
    elsif (/^section\s+'(.*)'\s*$/) {
      $conf{section} = $1;
    }
    elsif (/^newline\s+'(.*)'\s*$/) {
      $conf{newline} = $1;
    }
    elsif (/^attrib\s+'(.*)'\s*$/) {
      $conf{attrib} = $1;
    }
    elsif (/^quoted\s+'(.*)'\s*$/) {
      $conf{quoted} = $1;
    }
    elsif (/^fromsub\s+(.*)$/) {
      push @{$conf{from_substs}}, $1;
    }
    elsif (/^squeeze\s+(.*)\s*$/) {
      @{$conf{squeeze_modes}} = split /,\s*/, $1;
    }
    elsif (/^optimize\s+([01])\s*$/) {
      $conf{optimize} = $1;
    }
    elsif (/^respond\s+(.*)\s*$/) {
      $conf{respond} = $1;
    }
    elsif (/^response-from\s+(.*)\s*$/) {
      $conf{response_from} = $1;
    }
    elsif (/^smtphost\s+(.*)\s*$/) {
      $conf{smtphost} = $1;
    }
  }
  close(CONFIG);
}

##

sub check_content_type {
  my ($mail_in) = @_;

  my $why_not = '';

  my $content_type = $mail_in->mime_type;
  &log_this("Content-Type is $content_type\n");
  
  if ($mail_in->is_multipart) {
    &log_this("Message is multipart; splitting ...\n");

    # Get text/plain bits only
    my @parts = $mail_in->parts;
    &log_this(@parts . " parts found\n");

    if (@parts > 0) {
      my @parts_in = ();
      foreach my $part (@parts) {
        my $mime_type = $part->mime_type;
        &log_this("part type $mime_type\n");
        if ($mime_type =~ m!^text/plain!i) {
          push @parts_in, $part->body_as_string();
        }
        else {
          &log_this("Skipping $mime_type attachment\n");
        }
      }
      if (@parts_in) {
        $body_in = join $conf{section}, @parts_in;
      }
      else {
        $why_not = "No text/plain message parts found.";
      }
    }
    else {
      $why_not = "Multipart message had no parts.";
    }
  }

  &log_delim();

  return $why_not;
}

##

sub munge_body {
  my ($body_in) = @_;

  #&log_this("*** Untouched message body:\n$body_in\n");
  #&log_delim();

  # Remove quoted material
  #$body_in =~ s/(^> *.*?$()\n)+//gm;
  $body_in =~ s/
                ( $conf{attrib} \n ) ?           # attribution line
                ( $conf{quoted} .*? $ \n )+      # quoted lines
               //gmx
    if exists $conf{attrib} and exists $conf{quoted};

  &log_this("*** Dequoted message body:\n$body_in\n");
  &log_delim();

  # Newlines collapse ...
  $body_in =~ s/^\n+\s*//;
  $body_in =~ s/\s*\n+\s*/$conf{newline}/g;

  my $mode = 0;
  my @squeeze_modes = @{$conf{squeeze_modes}};
  $Lingua::EN::Squeeze::SQZ_OPTIMIZE_LEVEL = $conf{optimize};

  # Shrink body, but not more than necessary

  do {
    my $new_mode = $squeeze_modes[$mode++];
    &log_this("Trying squeeze mode $new_mode on body ... ");
    SqueezeControl($new_mode);

    $body_out = SqueezeText $body_in;

    # SqueezeText seems to add a \n
    chomp $body_out;

    # It doesn't eliminate multiple consecutive spaces either ... weird
    $body_out =~ s/\s+/ /g;

    &log_this(length(&final_out) . " characters\n");
  } 
  until length(&final_out) <= $conf{maxlen} or $mode > $#squeeze_modes;

  &log_delim();
}

##

sub munge_header {
  my ($header_in) = @_;

  # Who's it from?
  $from_in = $header_in->get('From') || $header_in->get('From ') || '?';
  $from_in =~ s/\w{3} \w{3} \d\d \d\d:\d\d:\d\d \d{4}$//; # remove date

  $from_out = $from_in || '?';

  # Eliminate multiple consecutive spaces
  $from_out =~ s/\s+/ /g;

  if ($from_out) {
    chomp $from_out;
    &munge_from();
  }

  my $subject_in = $header_in->get('Subject') || '';
  chomp $subject_in;
  &munge_subject($subject_in) if $subject_in;
}

##

sub munge_from {
  return unless @{$conf{from_substs}};

  my $munger_code = <<'EVAL';
sub {
  my $from = shift;

EVAL

  $munger_code .= join '', 
                       map { '  $from =~ ' . $_ . ";\n" } 
                           @{$conf{from_substs}};
  $munger_code .= <<'EVAL';
  return $from;
}
EVAL

  &log_this("from munger is:\n$munger_code");
  &log_delim();
  my $munger = eval $munger_code;

  &log_this("From header before munging is $from_out\n");
  $from_out = $munger->($from_out);
  &log_this("From header after munging is $from_out\n");
  &log_delim();
}

##

sub munge_subject {
  my ($subject_in) = @_;

  # Shrink subject if we're still over the limit, but not more than necessary

  $subject_out = $subject_in;

  if (length(&final_out) > $conf{maxlen}) {
    my $mode = 0;
    my @squeeze_modes = @{$conf{squeeze_modes}};
    do {
      my $new_mode = $squeeze_modes[$mode++];
      &log_this("Trying squeeze mode $new_mode on subject ... ");
      SqueezeControl($new_mode);

      $subject_out = SqueezeText $subject_in;

      # SqueezeText seems to add a \n
      chomp $subject_out;

      # It doesn't eliminate multiple consecutive spaces either ... weird
      $subject_out =~ s/\s+/ /g;

      &log_this(length(&final_out) . " characters\n");
    }
    until length(&final_out) <= $conf{maxlen} or $mode > $#squeeze_modes;

    &log_delim();
  }
}

##

sub final_out {
  my @sections = ();

  foreach ($from_out, $subject_out, $body_out) {
    my $section = $_;           # stop aliasing effect
    if ($section) {
      # damnit, thought this would have gone by now
      $section =~ s/^\s*(.+?)\s*$/$1/;
      push @sections, $section;
    }
  }

  return join $conf{section}, @sections;
}

##

sub log_this {
  return unless $conf{logfile};

  print LOG @_;
}

##

sub log_delim {
  my $delimiter = shift;
  $delimiter ||= '. ';
  &log_this(substr($delimiter x 80, 0, 79), "\n");
}

##

sub respond {
  my $mail = shift;

  # Mail::Util looks at $MAILADDRESS when Mail::Internet is deciding
  # what the From header should be.
  #
  # N.B. This next line causes the debugger on some Perls to SEGV!
  $ENV{'MAILADDRESS'} = $conf{response_from};

  my $reply;
  {
    # Avoid stupid warnings in Mail::Internet
    local $^W = 0;
    $reply = $mail->reply();
  }

  my $to = $reply->head->get('To');
  chomp $to;
  &log_this("Responding by email to: $to\n");
  
  my $body = <<EOF;
THIS IS AN AUTOMATICALLY GENERATED MESSAGE:

Your message has been converted for display on a device capable of
receiving SMS messages.  The converted output appears below.  If you
believe that information vital to the understanding of the message
has been truncated or lost during compression please shorten your
message or split it into multiple messages and resend it.

Message sent to SMS device:

EOF

  $body .= substr(&final_out, 0, $conf{maxlen});

  # Set the body
  my $body_handle = new MIME::Body::Scalar $body;
  $reply->bodyhandle($body_handle);

  # Send the autoreply
  my @sent_to = $reply->smtpsend(Host => $conf{smtphost})
    or warn "failed to send auto-reply";
}
