#!/usr/bin/perl -w
#
# $Id: send_emails,v 1.18 2004/12/05 00:04:17 dgregor Exp $
#
# Copyright (c) 2000-2002 Daniel J. Gregor, Jr.
# All rights reserved.
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 
#    - Redistributions of source code must retain the above copyright
#      notice, this list of conditions and the following disclaimer.
#    - Redistributions in binary form must reproduce the above
#      copyright notice, this list of conditions and the following
#      disclaimer in the documentation and/or other materials provided
#      with the distribution.
# 
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
# BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
# ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#

use strict;

use Getopt::Std;
use IO::File;
use Text::CSV_XS 0.23;
use Net::SMTP 2.19;
use POSIX qw(strftime);
use Time::Local;

#
# These are all of the global variables that are used
#
use vars qw($max_tries $progname $usage @mailhosts $mailcur %mailobjs);
use vars qw($dryrun $verbose $debug $template_file $rate $status_csv);
use vars qw($message_template $status_file $status_csv $status_fh $input_csv);
use vars qw(@template_fields @status_fields %already_sent $last_smtp_warn);
use vars qw($address_warning $delivery_warning $time_offset $fqdn);

init_variables();
parse_opts();
read_template_file();
open_all_smtp_connections();
init_input_csv();
init_status_csv();
send_emails();
close_all_smtp_connections();

if ($address_warning) {
	exit 1;
}
if ($delivery_warning) {
	exit 2;
}

sub init_variables {
	$max_tries = 5;

	$progname = "send_emails";
        $usage = <<EOF;
usage: 
	$progname [-n] [-v] [-d] [-z] [-r <rate>] [-m <mail hosts>] \
		[-H <FQDN>] [-s <status file>] -t <template file>
	- specify a list of recipients on stdin in a CSV file
EOF

	$mailcur = 0;
	%mailobjs = ();

	%already_sent = ();

	return;
}

sub parse_opts {
	my (%opts);
	my (@names);
	my ($i);

        if (!getopts('dH:m:nr:s:t:vz', \%opts)) {
		mydie($usage);
	}

        # Put the options into names which are more useful
        $dryrun = defined($opts{'n'}) ? 1 : 0;
        $verbose = defined($opts{'v'}) ? 1 : 0;
        $debug = defined($opts{'d'}) ? 1 : 0;
        $template_file = defined($opts{'t'}) ? $opts{'t'} : undef;
        $status_file = defined($opts{'s'}) ? $opts{'s'} : undef;
        $rate = defined($opts{'r'}) ? $opts{'r'} : undef;
        $fqdn = defined($opts{'H'}) ? $opts{'H'} : undef;
        $time_offset = defined($opts{'z'}) ? "+0000" : undef;

	@mailhosts =
	    defined($opts{'m'}) ? split(/,/, $opts{'m'}) : "localhost";

	if (!defined($fqdn)) {
		my $hostname = `hostname`;
		chomp($hostname);
		$fqdn = (gethostbyname($hostname))[0];
		if (!$fqdn) {
			mydie("Couldn't determine FQDN for this host ($hostname).  Use the \"-H <fqdn>\" option to specify the valid FQDN for this host.");
		}
	}

	if (!defined($time_offset)) {
		my $offset_m  = (time() - Time::Local::timelocal(gmtime()))
				/ 60;
		$time_offset = sprintf("%+03d%02d",
				$offset_m / 60, $offset_m % 60);
	}

	return;
}

sub read_template_file {
	my ($template_fh);

	if (!defined($template_file)) {
		mydie("no template file specified\n", $usage);
	}

	$template_fh = new IO::File($template_file, "r");
	if (!defined($template_fh)) {
		mydie("could not open $template_file: $!\n");
	}
	
	$message_template = join("", $template_fh->getlines());
	
	$template_fh->close();

	if ($message_template !~ m/\n/) {
		$message_template =~ s/\r/\r\n/g;
	}

	debug("message template:\nBEGIN\n", $message_template, "\nEND\n");

	return;
}

sub init_input_csv {
	$input_csv = Text::CSV_XS->new();

	if (!$input_csv->parse(scalar(<>))) {
		mydie("CSV parsing failed reading column headings " .
		    "from input CSV file on line $.: ",
		    $input_csv->error_input(), "\n");
	}

	@template_fields = $input_csv->fields();

	debug("csv files: " . join(", ", @template_fields) . "\n");

	return;
}

sub init_status_csv {
	my (@status_addl_fields) = (
		"_submissionlocaltime",
		"_submissionhost",
		"_error",
	);
	my ($submission_time_col);

	if (!defined($status_file)) {
		return;
	}

	$status_csv = Text::CSV_XS->new();

	if (-e $status_file) {
		$status_fh = new IO::File($status_file, "r+");

		if (!defined($status_fh)) {
			mydie("Error opening existing status file for ",
				"read/write: ", $!, "\n");
		}

		if (!$status_csv->parse($status_fh->getline())) {
			mydie("CSV parsing failed reading column headings ",
			    "from status CSV file on line ", $.,
			    ": ", $status_csv->error_input(), "\n");
		}

		@status_fields = $status_csv->fields();

		debug("existing status fields: ", join(", ", @status_fields),
			"\n");

		if (join(":", @status_fields) ne join(":", @template_fields,
		    @status_addl_fields)) {
			mydie("Status CSV file fields do not match input CSV ",
				"file fields:\n",
				"Status CSV file fields: ",
				join(", ", @status_fields), "\n",
				"Expected status CSV file fields: ",
				join(", ", @template_fields,
				    @status_addl_fields), "\n");
		}

		$submission_time_col = scalar(@status_fields) -
			scalar(@status_addl_fields);

		while(defined($_ = $status_fh->getline())) {
			my (@a);

			if (!$status_csv->parse($_)) {
				mydie("CSV status parsing failed from ",
				    "existing status file on line ",
				    $status_fh->input_line_number(), ": ",
				    $status_csv->error_input(), "\n");
			}

			@a = $status_csv->fields();

			if ($a[$submission_time_col] ne "") {
				debug("already sent to: ", $a[0], "\n");
				$already_sent{$a[0]}++;
			}
		}
	} else {
		$status_fh = new IO::File($status_file, "w");

		if (!defined($status_fh)) {
			mydie("Error opening new status file for writing: ",
				$!, "\n");
		}

		if (!defined($status_csv->combine(@template_fields,
		    @status_addl_fields))) {
			mydie("Error combining output header for status CSV: ",
				$status_csv->error_input(), "\n");
		}

		$status_fh->print($status_csv->string(), "\n");
	}

	$status_fh->autoflush(1);

	return;
}

sub open_smtp {
	my ($name) = shift(@_);

        $mailobjs{$name}{'name'} = $name;
        $mailobjs{$name}{'conn'} = Net::SMTP->new($name, Hello => $fqdn);

	if (!defined($mailobjs{$name}{'conn'})) {
		mywarn("open_smtp(): Could not open a new connection to ",
		    "mail host \"$name\".\n");
	}
}

sub open_all_smtp_connections {
	my ($i);

	foreach $i (@mailhosts) {
		open_smtp($i);
	}
}

sub send_emails {
	my ($message, $sender, $recipient);
	my (@data_fields);
	my ($i);
	my ($message_count);
	my ($last_time);
	my ($last_count);

	$message_count = 0;
	$last_time = 0;
	$last_count = 0;

	while (<>) {
		if (!$input_csv->parse($_)) {
			mydie("CSV parsing failed reading from input CSV ",
			    "file on line  $.: ",
			    $input_csv->error_input(), "\n");
			next;
		}

		@data_fields = $input_csv->fields();

		if (exists($already_sent{$data_fields[0]})) {
			debug("already sent message to ", $data_fields[0],
			    "... skipping\n");
			next;
		}

		for $i (0 .. $#data_fields) {
			debug($i + 1, " => ", $data_fields[$i], "\n");
		}
		debug("\n");
	
		# Customize the mail_message for this user
		$message = $message_template;

		for $i (0 .. $#template_fields) {
			$message =~
			    s/%$template_fields[$i]%/$data_fields[$i]/egi;
		}

		$message =~ s/%_date%/get_date()/e;
		$message =~ s/%_messageid%/get_message_id()/e;

		if (!($message =~
		    m/^X-BulkMail-Envelope-From:[^<\n\r]*(<[^>\n\r]*>)/mi) &&
		    !($message =~ m/^From:[^<\n\r]*(<[^>\n\r]+>)/mi)) {
			mywarn("could not find sender address for ",
			    "mail message on line $.\n",
			    "Is the address inside of angle brackets? (<>)\n");
			$address_warning++;
			next;
		}
		$sender = $1;
		debug("sender: $sender\n");
		if ($sender eq "<>") {
			$sender = "";
		}

		if (!($message =~ m/^To:\s*[^<\n\r]*(<[^>\n\r]+>)/mi)) {
			mywarn("could not find recipient address ",
			    "for mail message on line $.\n",
			    "Is the address inside of angle brackets? (<>)\n");
			$address_warning++;
			next;
		}
		$recipient = $1;
		debug("recipient: $recipient\n");

		if (!$dryrun) {
			my ($delivered) = 0;
			my ($hash);

			for ($i = 0; !$delivered && $i < $max_tries;
			    $i++) {
				if (!defined($hash = get_smtp_conn())) {
					mysmtpwarn("could not get an SMTP ",
					    "connection from the pool\n");
					next;
				}

				debug("attempting delivery to host: ",
				    $$hash{'name'}, "\n");

				$delivered = send_message($sender, $recipient,
				    $message, $hash);
			}

			if (!$delivered) {
				mywarn("message to \"", $recipient, "\" not ",
				    "delivered after ", $max_tries,
				    " successive failed attempts: ",
				    "skipping message\n");
				$delivery_warning++;
			}

			if (defined($status_csv)) {
				if (!defined($status_csv->combine(
				    @data_fields,
				    $delivered ? scalar(localtime()) : "",
				    $$hash{'name'}, 
				    $delivered ? "" : $last_smtp_warn))) {
					mydie("Error combining output ",
					    "line for status CSV: ",
					    $status_csv->error_input(),
					    "\n");
				}

				$status_fh->print($status_csv->string(), "\n");
			}

			$message_count++;

			if (defined($rate)) {
				my ($wait_until);
				my ($wait);

				# $rate >= ($message_count - $last_count) /
				#	(time - $last_time)

				$wait_until = $last_time +
					($message_count - $last_count)/$rate;
			
				$wait = $wait_until - time;
				if (wait >= 1) {
					debug("sleeping for ", $wait,
						"seconds\n");
					sleep($wait);
				}

				if (time > $last_time) {
					$last_count = $message_count;
					$last_time = time;
				}
			}
		}
	
		verbose($message, "\n");
	}

	return;
}
    
sub close_smtp {
	my ($hash) = shift(@_);

	if (defined($$hash{'conn'})) {
		# nicely close the connection, if possible
		# ignore errors because we really don't care here
		$$hash{'conn'}->quit;
	}

	undef($$hash{'conn'});

	return;
}

sub close_all_smtp_connections {
	my ($i);

	foreach $i (keys(%mailobjs)) {
		if (defined($mailobjs{$i}) && defined($mailobjs{$i}{'conn'})) {
			$mailobjs{$i}{'conn'}->quit();
		}
		undef($mailobjs{$i}{'conn'});
	}
}

sub send_message {
	my ($sender) = shift(@_);
	my ($recipient) = shift(@_);
	my ($message) = shift(@_);
	my ($hash) = shift(@_);

	if (!$$hash{'conn'}->mail($sender)) {
		mysmtpwarn("could not issue MAIL FROM command to smtp server: ",
		    $!, "\n");
		close_smtp($hash);
		return 0;
	}
	if (!$$hash{'conn'}->to($recipient)) {
		mysmtpwarn("could not issue RCPT TO command to smtp server: ",
		    $!, "\n");
		close_smtp($hash);
		return 0;
	}
	if (!$$hash{'conn'}->data()) {
		mysmtpwarn("could not begin DATA sessions with smtp server: ",
		    $!, "\n");
		close_smtp($hash);
		return 0;
	}
	if (!$$hash{'conn'}->datasend($message)) {
		mysmtpwarn("could not send message content to smtp server: ",
		    $!, "\n");
		close_smtp($hash);
		return 0;
	}
	if (!$$hash{'conn'}->dataend()) {
		mysmtpwarn("could not end DATA session with smtp server: $!\n");
		close_smtp($hash);
		return 0;
	}
	# XXX should we reset the connection here?

	return 1;
}

sub get_smtp_conn {
	my ($host);
	my ($hash);

	$host = $mailhosts[$mailcur++ % @mailhosts];

	if (!defined($hash = $mailobjs{$host}) ||
	    !defined($$hash{'conn'})) {
		open_smtp($host);

		if (!defined($hash = $mailobjs{$host}) ||
		    !defined($$hash{'conn'})) {
			return undef;
		}
	}

	return $hash;
}

sub mydie {
	die($progname, ": ", @_);
}

sub mywarn {
	warn($progname, ": ", @_);
}

sub mysmtpwarn {
	mywarn(@_);
	$last_smtp_warn = join("", @_);
	$last_smtp_warn =~ s/[\r\n]+$//;
	$last_smtp_warn =~ s/[\r\n]+/ /g;
}

sub debug {
	if ($debug) {
		print(@_);
	}
}

sub verbose {
	if ($verbose) {
		print @_;
	}
}

sub get_date {
	my @time;

	if ($time_offset eq "+0000") {
		@time = gmtime();
	} else {
		@time = localtime();
	}
	return strftime("%a, %d %b %Y %H:%M:%S $time_offset", @time);
}

sub get_message_id {
	return "<" . strftime("%Y%m%d%H%M%S", gmtime()) .
		".$<.$$." . int(rand(1000000)) . "\@$fqdn>";
}

__END__

=head1 NAME

send_emails - send customized bulk RFC822 messages over SMTP

=head1 SYNOPSIS

B<send_emails> S<[ B<-n> ]> S<[ B<-v> ]> S<[ B<-d> ]> S<[ B<-z> ]>
            S<[ B<-r> I<rate> ]> S<[ B<-m> I<mail_hosts> ]>
            S<[ B<-H> I<fqdn> ]> S<[ B<-s> I<status_file> ]>
            S<B<-t> I<template_file>>

=head1 DESCRIPTION

B<send_emails> is used to send customized bulk I<RFC822> mail messages to
any number of users over SMTP (I<RFC821>).
The entire RFC822 message, including headers and body (which can be MIME
formatted, if desired) is provided as a message template containing
variables.
An address list file, in comma-separated values format (CSV), is read
from stdin.
The first line of the address list file is a column heading line and
specifies the variable in the template file assocated with each column.
Every subsequent line contains the values to be filled into the variables
in the template.

=head1 COMMAND-LINE OPTIONS

=over 4

=item B<-n>

Dry run mode.
Do not actually send any email messages.
Useful for testing the validity of a template/CSV file combination.

=item B<-v>

Verbose mode.
Print messages after variable replacement is done on the template.
This displays messages as users would see them.
This is useful with dry run mode to not send any messages and actually
see what they look like.

=item B<-d>

Debug mode.
Print internal debugging messages.

=item B<-z>

No time zone offset mode.
This uses GMT for the time zone offset in the date header (i.e.: no offset).
You can use this if the time zone is not correctly determined by the script
automatically.

=item B<-r> I<rate>

Set rate throttling, where I<rate> is messages per second.
The rate can be a whole positive integer to represent that multiple
messages are to be sent per second, or a decimal number between
zero and one to indicate that less than one message is to be sent
per second.

=item B<-m> I<mail_hosts>

Specify a list of one or more mail hosts, separated by commas if
there are multiple hosts.
If mail hosts are not explicitly
specified, I<localhost> will be used by default.
If multiple hosts are specified, each host is cycled through when
sending messages, and if sending a message fails to one host, the
next host will be tried.

=item B<-s> I<status_file>

Specify a status file.
This file is created if it does not already exist.
The status file is written as each message is sent, and is formatted
identically to the I<template_file>, with additional columns showing
message submission time, the host the message was submitted to, and
any errors while attempting to deliver the messages.
If a mailing is stopped before completing, or if any messages were not
delivered to the I<mail_hosts> by B<send_emails>, the status file will
allow messages to be sent to only those users for whom messages have
not yet been successfully submitted to the I<mail_hosts>.

=item B<-H> I<FQDN>

Specify the FQDN (fully-qualified domain name) of this host.
You might need to use this option if the script cannot automatically
determine the FQDN of the host or if you want it to use a different
value.
The FQDN is used in the SMTP HELO command as well as in the %_messageid%
variable.

=item B<-t> I<template_file>

Specify the template file.
This option is required.
See the B<TEMPLATE FILE> section below.

=back

=head1 TEMPLATE FILE

The template file is an entire RFC822 message, including headers and body
(which can be MIME formatted, if desired).
It includes variables which take the form of I<%variable_name%>.
These variables are replaced with data from the address list CSV file.
Each of the I<variable_name> strings must match up with a column heading
in the CSV address list file.
See the B<EXAMPLES> section for a working example.

There is a special header line that can be specified in the template
which sets the SMTP I<envelope from> address to be different from the
I<header from> address (I<From:> header).
The I<envelope from> address is supposed to be used for sending bounces
when a delivery error occurs, so this is useful to send bounce messages
to a mailbox separate from the mailbox in the visible I<From:> address.
This header line is "I<X-BulkMail-Envelope-From>".

Note that if you use I<X-Bulkmail-Envelope-From>, which is suggested,
I<Errors-To> is largely useless, unless you want bounce messages to
be sent to both addresses (and then, by only B<some> mailers).  If
you do not wish to have bounce messages delivered anywhere, set the
I<X-Bulkmail-Envelope-From> header to an empty address.  In other words,
set it to an empty set of angle brackets.  Example:

    X-Bulkmail-Envelope-From: <>

Any email addresses in headers that are used for sending the message,
in particular, I<From:>, I<To:>, and I<X-BulkMail-Envelope-From:>, B<MUST>
have the email address enclosed in angle brackets ("<>"), otherwise
the email address will not be parsed by B<send_emails>.

There are two special variable strings which can be used to add variable
headers to the message for the current date and message ID.
The variables are I<%_date%> and I<%_messageid%>, respectively.
It is suggested that you always added I<Date> and I<Message-ID> headers
using these variables, as you may receive an unfavorable score from
content-based spam scanners without these headers in the message.  Here
is how you can add them to the template:

    Date: %_date%
    Message-ID: %_messageid%

=head1 ADDRESS LIST FILE

This is a CSV (comma-separated values) file which contains a list
of values to fill into variables in the template file.
The first line is a set of column headings that match up with the
variables in the template file.  Each subsequent line
is a set of values to be filled into variables in the template
file for an individual message.

Any variable name can be used, however names beginning with an
underscore, "_" are reserved, and it is suggested that variable
names be all lowercase and not contain special characters.

The first column in the address list file has a special purpose
as a unique identifier.
It is used along with the status file on second and later runs
to determine if a message has been sent to a specific user or not.
It is suggested that the first column be the email address of the
recipients, and if that is not unique, some type of unique identifier.

The address list file can be made in pretty much any spreadsheet
program, like Microsoft Excel, and exported as a CSV (comma-separated
values) file.
It is also perfectly acceptable to make the files by hand in a
text editor or from a batch process.

=head1 EXAMPLES

=head2 Template file

    From: Newsletter Editor <newsletter@example.com>
    X-BulkMail-Envelope-From: <bounce@example.com>
    To: %name% <%to%>
    Subject: Hello! (%id%)
    Date: %_date%
    Message-ID: %_messageid%
    
    Hello %name%,
    
    Here is your newsletter for today.  If you do not wish to receive
    this newsletter in the future, you may unsubscribe at:
    
    	http://www.example.com/newletter/unsubscribe?id=%id%

=head2 Address list CSV file

    to,name,id
    president@example.com,The President,12345
    secretary@example.com,The Secretary,67890

=head2 The message to the first address would look like

    From: Newsletter Editor <newsletter@example.com>
    X-BulkMail-Envelope-From: <bounce@example.com>
    To: The President <president@example.com>
    Subject: Hello! (12345)
    Date: Sat, 04 Dec 2004 17:56:05 -0500
    Message-ID: <20041204225605.501.19835.788718@server.example.com>
    
    Hello The President,
    
    Here is your newsletter for today.  If you do not wish to receive
    this newsletter in the future, you may unsubscribe at:
    
    	http://www.example.com/newletter/unsubscribe?id=12345

=head1 RETURN VALUES

Returns 0 when no errors occured, 1 if any required addresses could not
be at least one message, 2 if any delivery errors occured, and 255 if
any major errors occured that stopped the bulkmailing process.

=head1 DEBUGGING

=over 4

=item CSV parsing failed reading column headings

The first line of the CSV or status files, which contains column headings,
could not be parsed.

=item Status CSV file fields do not match input CSV file fields

The column headings in the input CSV file do not match the column headings
in the previously-created status CSV file (excluding the status columns).

=item CSV status parsing failed from exsiting status file

A line in the status CSV file is invalid.

=item Error combining output header for status CSV 

An internal CSV module error occured when trying to format a column header
line for the status CSV file.

=item CSV parsing failed reading from input CSV file

An invalid line was found in the input CSV file.

=item Error combining output line for status CSV

An internal CSV module error occured when trying to format a row of data
for the status CSV file.

=item could not find sender address for mail message

A sender address was not found in a From or X-BulkMail-Envelope-From
header.  See the L<TEMPLATE FILE> section for details, and make sure
that the address is enclosed in angle brackets.

=item could not find recipient address for mail message

A recipient address was not found in a To header.  See the L<TEMPLATE FILE>
section for details, and make sure that the address is enclosed in
angle brackets.

=item could not get an SMTP connection from the pool

B<send_emails> did not already have an open SMTP connection to a certain
mail host, nor was it able to open a new connection to that host.  This
is normally due to the mail host refusing incoming SMTP connections.  Look
for "Could not open new connection" messages to determine what the host
is.

=item open_smtp(): Could not open a new connection to mail host "xxx"

A new SMTP connection could not be initiated to the mentioned host.

=item message to xxx not delivered after xxx successive failed attempts: skipping message

The message could not be delivered after repeated attempts and was skipped
on this bulkmail run.  If a status file was used, a future invocation of
B<send_emails> with the same status file will attempt to redeliver this
message, and all other messages that failed.

=item could not issue xxx command to smtp server

An unknown error occured while issuing the command to the SMTP server.

=item could not begin DATA sessions with smtp server

An unknown error occured while issuing the DATA command to the SMTP server.

=item could not send message content to smtp server

An unknown error occured while sending message content to the SMTP server.

=item could not end DATA session with smtp server

An unknown error occured while closing the DATA session with the SMTP
server (sending the closing ".").

=back

=head1 SEE ALSO

=over 4

=item o

L<RFC821> - http://www.cis.ohio-state.edu/cs/Services/rfc/rfc-text/rfc0821.txt

=item o

L<RFC822> - http://www.cis.ohio-state.edu/cs/Services/rfc/rfc-text/rfc0822.txt

=back

=head1 AUTHOR

DJ Gregor, <dj@gregor.com>


