#!/usr/bin/perl -w
$ID = q(cvslog,v 1.51 2005/04/16 22:39:39 eagle Exp );
#
# cvslog -- Mail CVS commit notifications.
#
# Written by Russ Allbery <rra@stanford.edu>
# Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004
#     Board of Trustees, Leland Stanford Jr. University
#
# This program is free software; you can redistribute it and/or modify it
# under the same terms as Perl itself.

##############################################################################
# Modules and declarations
##############################################################################

# The path to the repository.  If your platform or CVS implementation doesn't
# pass the full path to the cvslog script in $0 (or if your cvslog script
# isn't in the CVSROOT directory of your repository for some reason), you will
# need to explicitly set $REPOSITORY to the root directory of your repository
# (the same thing that you would set CVSROOT to).
($REPOSITORY) = ($0 =~ m%^(.*)/CVSROOT/cvslog$%);
$REPOSITORY ||= '';

require 5.004;

use Getopt::Long qw(GetOptions);
use IPC::Open2 qw(open2);
use POSIX qw(SEEK_SET strftime);

use strict;
use vars qw($DEBUG $ID $REPOSITORY);

# Clean up $0 for errors.
$0 =~ s%^.*/%%;

##############################################################################
# Utility functions
##############################################################################

# Given a prefix and a reference to an array, return a list of all strings in
# that array with the common prefix stripped off.  Also strip off any leading
# ./ if present.
sub simplify {
    my ($prefix, $list) = @_;
    my @stripped = @$list;
    for (@stripped) {
        s%^\Q$prefix\E/*%%;
        s%^\./+%%;
    }
    return @stripped;
}

# Return the next version for a CVS version, incrementing the last number.
sub next_version {
    my $version = shift;
    my @version = split (/\./, $version);
    $version[-1]++;
    return join ('.', @version);
}

# Given a directory name, find the corresponding CVS module.  We do this by
# looking in the modules file, finding the last "word" on each line, making
# sure it contains a / (and is therefore assumed to be a directory), and
# seeing if it's a prefix of the module path.
sub find_module {
    my $module = shift;
    if (open (MODULES, "$REPOSITORY/CVSROOT/modules")) {
        local $_;
        while (<MODULES>) {
            next if /^\s*\#/;
            next if /^\s*$/;
            my ($name, @rest) = split;
            my $path = pop @rest;
            next unless ($path =~ m%/%);
            if ($module =~ s%^$path(\Z|/)%%) {
                $module = '/' . $module if $module;
                $module = "<$name>$module";
                last;
            }
        }
        close MODULES;
    }
    return $module;
}

##############################################################################
# Multidirectory commit I/O
##############################################################################

# Recalculate the file prefix and module after having loaded a new set of
# data.  We do this by starting with the prefix from the last set of data and
# then stripping off one directory at a time until we find something that is a
# common prefix of every affected file.
sub recalculate_prefix {
    my $data = shift;
    my $prefix = $$data{prefix};
    for (keys %{ $$data{files} }) {
        while ($prefix && index ($_, $prefix) != 0) {
            $prefix =~ s%/*([^/]+)$%%;
            my $last = $1;
            $$data{repository} =~ s%/*\Q$last\E$%%;
            $$data{localpath} =~ s%/*\Q$last\E$%%;
        }
    }
    $$data{prefix} = $prefix;
    $$data{module} = find_module $prefix;
}

# Build the directory in which we'll find our data.
sub build_tmpdir {
    my $tmpdir = $ENV{TMPDIR} || '/tmp';
    $tmpdir .= '/cvs.' . $< . '.' . getpgrp;
    return $tmpdir;
}

# Delete all of the accumulated data for multidirectory commits.
sub cleanup_data {
    my $tmpdir = build_tmpdir;
    unless (opendir (D, $tmpdir)) {
        warn "$0: can't open $tmpdir: $!\n";
        return;
    }
    for (grep { $_ ne '.' && $_ ne '..' } readdir D) {
        unlink "$tmpdir/$_";
    }
    closedir D;
    rmdir $tmpdir or warn "$0: can't remove $tmpdir: $!\n";
}

# Read the file containing the last directory noticed by the commitinfo script
# and return that directory name.
sub read_lastdir {
    my $tmpdir = build_tmpdir;
    my $last;
    if (!-l $tmpdir && -d _ && (lstat _)[4] == $<) {
        if (open (LAST, $tmpdir . '/directory')) {
            $last = <LAST>;
            chomp $last;
            close LAST;
        }
    }
    return $last;
}

# Read in a list of files with revisions, one per line, and fill in the
# provided hashes.  The first one gets the file information put into its files
# key, and the second gets lists of added, removed, and modified files.
# Returns success or failure.
sub read_files {
    my ($file, $data, $message) = @_;
    unless (open (FILES, $file)) {
        warn "$0: can't open $file: $!\n";
        return;
    }
    my (@added, @removed, @modified);
    local $_;
    while (<FILES>) {
        chomp;
        my ($name, $old, $new) = /^(.*),([^,]+),([^,]+)$/;
        next unless $new;
        $$data{files}{$name} = [ $old, $new ];
        if    ($old eq 'NONE') { push (@added,    $name) }
        elsif ($new eq 'NONE') { push (@removed,  $name) }
        else                   { push (@modified, $name) }
    }
    close FILES;
    $$message{added} = [ @added ];
    $$message{removed} = [ @removed ];
    $$message{modified} = [ @modified ];
    return 1;
}

# Read in message text from a file and put it in the provided hash.
sub read_text {
    my ($file, $message) = @_;
    my @text;
    if (open (TEXT, $file)) {
        @text = <TEXT>;
        close TEXT;
    }
    $$message{text} = [ @text ];
}

# Given a list of message hashes and a new one, merge the new one into the
# list.  This is done by checking its commit message against the existing ones
# and merging the list of affected files if a match is found.  If a match
# isn't found, the new message is appended to the end of the list.
sub merge_message {
    my ($list, $message) = @_;
    my $done;
    for (@$list) {
        if ("@{ $$_{text} }" eq "@{ $$message{text} }") {
            push (@{ $$_{added} }, @{ $$message{added} });
            push (@{ $$_{removed} }, @{ $$message{removed} });
            push (@{ $$_{modified} }, @{ $$message{modified} });
            $done = 1;
        }
    }
    push (@$list, $message) unless $done;
}

# Read in saved data from previous directories.  This involves reading in its
# affected files and its commit message, merging this with the previous list
# of affected files and commit messages, and then recalculating the common
# prefix for all the files and deleting all the data we read in.
sub read_data {
    my $data = shift;
    my $tmpdir = build_tmpdir;
    $$data{messages} = [];
    for (my $i = 1; -f "$tmpdir/files.$i"; $i++) {
        my %message;
        read_files ("$tmpdir/files.$i", $data, \%message);
        read_text ("$tmpdir/text.$i", \%message);
        merge_message ($$data{messages}, \%message);
    }
    merge_message ($$data{messages}, $$data{message});
    recalculate_prefix ($data);
    cleanup_data;
}

# Save data for the files modified in this invocation to be picked up later.
sub save_data {
    my $data = shift;
    my $tmpdir = build_tmpdir;
    if (-l $tmpdir || !-d _ || (lstat _)[4] != $<) {
        warn "$0: invalid directory $tmpdir\n";
        return undef;
    }
    my $i = 1;
    $i++ while -f "$tmpdir/files.$i";
    unless (open (FILES, "> $tmpdir/files.$i")
            && open (TEXT, "> $tmpdir/text.$i")) {
        warn "$0: can't save to $tmpdir/files: $!\n";
        return undef;
    }
    for (keys %{ $$data{files} }) {
        my ($old, $new) = @{ $$data{files}{$_} };
        print FILES join (',', $_, $old, $new), "\n";
    }
    print TEXT @{ $$data{message}{text} };
    unless (close (FILES) && close (TEXT)) {
        warn "$0: can't save to $tmpdir/files: $!\n";
        return undef;
    }
}

##############################################################################
# Parsing functions
##############################################################################

# Split apart the file names that are passed to cvslog.  Unfortunately, CVS
# passes all the affected files as one string rather than as separate
# arguments, which means that file names that contain spaces and commas pose
# problems.  Returns the path in the repository and then a list of files with
# attached version information; that list may be just a couple of special-case
# strings indicating a cvs add of a directory or a cvs import.
#
# The complexity here is purely the fault of CVS, which doesn't have a good
# interface to logging hooks.
sub split_files {
    my ($files) = @_;

    # This ugly hack is here to deal with files at the top level of the
    # repository; CVS reports those files without including a directory
    # before the file list.  Check to see if what would normally be the
    # directory name looks more like a file with revisions.
    my ($root, $rest) = split (' ', $files, 2);
    if ($rest && $root !~ /(,(\d+(\.\d+)*|NONE)){2}$/) {
        $files = $rest;
    } else {
        $root = '.';
    }

    # Special-case directory adds and imports.
    if ($files =~ /^- New directory(,NONE,NONE)?$/) {
        return ($root, 'directory');
    } elsif ($files =~ /^- Imported sources(,NONE,NONE)?$/) {
        return ($root, 'import');
    }

    # Now, split apart $files, which contains just the files, at the spaces
    # after version information.
    my @files;
    while ($files =~ s/^((?:.*?)(?:,(?:\d+(?:\.\d+)*|NONE)){2})( |\z)//) {
        push (@files, $1);
    }
    push (@files, $files) if $files;
    return ($root, 'commit', @files);
}

# Given the summary line passed to the script, parse it into file names and
# version numbers (if available).  Takes the log information hash and adds a
# key for the type of change (directory, import, or commit) and for commits a
# hash of file names with values being a list of the previous and the now-
# current version number.  Also finds the module and stores that in the hash.
#
# The path in the repository (the first argument) is prepended to all of the
# file names; we'll pull off the common prefix later.
sub parse_files {
    my ($data, @args) = @_;
    my ($directory, $type, @files);
    if (@args == 1) {
        ($directory, $type, @files) = split_files ($args[0]);
        if ($type eq 'commit') {
            @files = map { [ /^(.*),([^,]+),([^,]+)$/ ] } @files;
        }
    } else {
        $directory = shift @args;
        if ($args[0] eq '- New directory') {
            $type = 'directory';
        } elsif ($args[0] eq '- Imported sources') {
            $type = 'import';
        } else {
            $type = 'commit';
            while (@args) {
                push (@files, [ splice (@args, 0, 3) ]);
            }
        }
    }
    die "$0: no module given by CVS (no \%{sVv}?)\n" unless $directory;
    $$data{prefix} = $directory;
    $$data{module} = find_module $directory;
    $$data{message}{added}    ||= [];
    $$data{message}{modified} ||= [];
    $$data{message}{removed}  ||= [];
    if ($type eq 'directory') {
        $$data{type} = 'directory';
        $$data{root} = $directory;
    } elsif ($type eq 'import') {
        $$data{type} = 'import';
        $$data{root} = $directory;
    } elsif (!@files) {
        die "$0: no files given by CVS (no \%{sVv}?)\n";
    } else {
        $$data{type} = 'commit';
        my $added = $$data{message}{added};
        my $modified = $$data{message}{modified};
        my $removed = $$data{message}{removed};
        for (@files) {
            my ($name, $prev, $cur) = @$_;
            warn "$0: no version numbers given by CVS (no \%{sVv}?)\n"
                unless defined $cur;
            $$data{files}{"$directory/$name"} = [ $prev, $cur ];
            if    ($prev eq 'NONE') { push (@$added,    "$directory/$name") }
            elsif ($cur  eq 'NONE') { push (@$removed,  "$directory/$name") }
            else                    { push (@$modified, "$directory/$name") }
        }
    }
}

# Parse the header of the CVS log message (containing the path information)
# and puts the path information into the data hash.
sub parse_paths {
    my $data = shift;

    # The first line of the log message will be "Update of <path>".
    my $path = <STDIN>;
    print $path if $DEBUG;
    $path =~ s/^Update of //;
    $path =~ s/\s*$//;
    $$data{repository} = $path;

    # Now comes the path to the local working directory.  Grab it and clean it
    # up, and then ignore the next blank line.
    local $_ = <STDIN>;
    print if $DEBUG;
    my ($local) = /directory (\S+)/;
    $$data{localpath} = $local;
    $_ = <STDIN>;
    print if $DEBUG;
}

# Extract the tag.  We assume that all files will be committed with the same
# tag; probably not the best assumption, but it seems workable.  Note that we
# ignore all of the file lists, since we build those ourself from the version
# information (saving the hard challenge of parsing a whitespace-separated
# list that could contain filenames with whitespace).
sub parse_filelist {
    my $data = shift;
    my ($current, @added, @modified, @removed);
    local $_;
    while (<STDIN>) {
        print if $DEBUG;
        last if /^Log Message/;
        $$data{tag} = $1, next if /^\s*Tag: (\S+)\s*$/;
    }
}

# Extract the commit message, stripping leading and trailing whitespace.
sub parse_message {
    my $data = shift;
    my @message = <STDIN>;
    print @message if $DEBUG;
    shift @message while (@message && $message[0] =~ /^\s*$/);
    pop @message while (@message && $message[-1] =~ /^\s*$/);
    $$data{message}{text} = [ @message ];
}

##############################################################################
# Formatting functions
##############################################################################

# Determine the From header of the message.  If CVSUSER is set, we're running
# from inside a CVS server, and the From header should reflect information
# from the CVS passwd file.  Otherwise, pull the information from the system
# passwd file.
sub build_from {
    my $cvsuser = $ENV{CVSUSER} || scalar (getpwuid $<);
    my $name = '';
    my $address = '';
    if ($cvsuser) {
        if (open (PASSWD, "$REPOSITORY/CVSROOT/passwd")) {
            local $_;
            while (<PASSWD>) {
                chomp;
                next unless /:/;
                my @info = split ':';
                if ($info[0] eq $cvsuser) {
                    $name = $info[3];
                    $address = $info[4];
                }
            }
            close PASSWD;
        }
        $name ||= (getpwnam $cvsuser)[6];
    }
    $address ||= $cvsuser || 'cvs';
    $name =~ s/,.*//;
    if ($name =~ /[^\w ]/) {
        $name = '"' . $name . '"';
    }
    return "From: " . ($name ? "$name <$address>" : $address) . "\n";
}

# Takes the data hash, a prefix to add to the subject header, and a flag
# saying whether to give a full list of files no matter how long it is.  Form
# the subject line of our message.  Try to keep the subject under 78
# characters by just giving a count of files if there are a lot of them.
sub build_subject {
    my ($data, $prefix, $long) = @_;
    $prefix = "Subject: " . $prefix;
    my $length = 78 - length ($prefix) - length ($$data{module});
    $length = 8 if $length < 8;
    my $subject;
    if ($$data{type} eq 'directory') {
        $subject = "[new]";
    } elsif ($$data{type} eq 'import') {
        $subject = "[import]";
    } else {
        my @files = sort keys %{ $$data{files} };
        @files = simplify ($$data{prefix}, \@files);
        my $files = join (' ', @files);
        $files =~ s/[\n\r]/ /g;
        if (!$long && length ($files) > $length) {
            $subject = '(' . @files . (@files > 1 ? " files" : " file") . ')';
        } else {
            $subject = "($files)";
        }
    }
    if ($$data{module}) {
        $subject = "$$data{module} $subject";
    }
    if ($$data{tag} && $$data{tag} =~ /[^\d.]/) {
        $subject = "$$data{tag} $subject";
    }
    return "$prefix$subject\n";
}

# Generate file lists, wrapped at 74 columns, with the right prefix for what
# type of file they are.
sub build_filelist {
    my ($prefix, @files) = @_;
    local $_ = join (' ', @files);
    my $output = '';
    while (length > 64) {
        if (s/^(.{0,64})\s+// || s/^(\S+)//) {
            $output .= (' ' x 10) . $1 . "\n";
        } else {
            last;
        }
    }
    $output .= (' ' x 10) . $_;
    $output =~ s/\s*$/\n/;
    $prefix = (' ' x (8 - length ($prefix))) . $prefix;
    $output =~ s/^ {10}/$prefix: /;
    return $output;
}

# Build the subheader of the report, listing the files changed and some other
# information about the change.  Returns the header as a list.
sub build_header {
    my ($data, $showdir, $showauthor) = @_;
    my $user = $ENV{CVSUSER} || (getpwuid $<)[0] || $<;
    my $date = strftime ('%A, %B %e, %Y @ %T', localtime time);
    $date =~ s/  / /;
    my @header = ("    Date: $date\n");
    push (@header, "  Author: $user\n") if $showauthor;

    # If the paths are too long, trim them by taking off a leading path
    # component until the length is under 70 characters.
    my $path = $$data{repository};
    my $local = $$data{localpath};
    while (length ($path) > 69) {
        $path =~ s%^\.\.\.%%;
        last unless $path =~ s%^/[^/]+%...%;
    }
    while (length ($local) > 69) {
        $local =~ s%^([\w.-]+:)\.\.\.%$1%;
        last unless $local =~ s%^([\w.-]+:)/[^/]+%$1...%;
    }

    if ($showdir) {
        push (@header, "     Tag: $$data{tag}\n") if $$data{tag};
        push (@header, "\n", "Update of $path\n",
              "     from $local\n");
    } else {
        push (@header, "    Path: $path\n");
        push (@header, "     Tag: $$data{tag}\n") if $$data{tag};
    }
    return @header;
}

# Build a report for a particular commit; this includes the list of affected
# files and the commit message.  Returns the report as a list.  Takes the
# data, the commit message, and a flag saying whether to add version numbers
# to the file names.
sub build_message {
    my ($data, $message, $versions) = @_;
    my @added    = sort @{ $$message{added} };
    my @modified = sort @{ $$message{modified} };
    my @removed  = sort @{ $$message{removed} };
    if ($versions) {
        @added    = map { "$_ ($$data{files}{$_}[1])" } @added;
        @removed  = map { "$_ ($$data{files}{$_}[0])" } @removed;
        @modified = map {
            print "$_\n";
            "$_ ($$data{files}{$_}[0] -> $$data{files}{$_}[1])"
        } @modified;
    }
    @added    = simplify ($$data{prefix}, \@added);
    @modified = simplify ($$data{prefix}, \@modified);
    @removed  = simplify ($$data{prefix}, \@removed);
    my @message;
    push (@message, build_filelist ('Added',    @added))    if @added;
    push (@message, build_filelist ('Modified', @modified)) if @modified;
    push (@message, build_filelist ('Removed',  @removed))  if @removed;
    if (@{ $$message{text} }) {
        push (@message, "\n") if (@added || @modified || @removed);
        push (@message, @{ $$message{text} });
    }
    return @message;
}

# Builds an array of -r flags to pass to CVS to get diffs between the
# appropriate versions, given a reference to the %data hash and the name of
# the file.
sub build_version_flags {
    my ($data, $file) = @_;
    my @versions = @{ $$data{files}{$file} };
    return unless $versions[1] && ($versions[0] ne $versions[1]);
    if ($versions[0] eq 'NONE') {
        @versions = ('-r', '0.0', '-r', $versions[1]);
    } elsif ($versions[1] eq 'NONE') {
        @versions = ('-r', $versions[0], '-r', next_version $versions[0]);
    } else {
        @versions = map { ('-r', $_) } @versions;
    }
    return @versions;
}

# Build cvsweb diff URLs.  Right now, this is very specific to cvsweb, but
# could probably be extended for other web interfaces to CVS.  Takes the data
# hash and the base URL for cvsweb.
sub build_cvsweb {
    my ($data, $cvsweb) = @_;
    my $options = 'f=h';
    my @cvsweb = ("Diff URLs:\n");
    my $file;
    for (sort keys %{ $$data{files} }) {
        my @versions = @{ $$data{files}{$_} };
        next unless @versions;
        my $file = $_;
        for ($file, @versions) {
            s{([^a-zA-Z0-9\$_.+!*\'(),/-])} {sprintf "%%%x", ord ($1)}ge;
        }
        my $url = "$cvsweb/$file.diff?$options&r1=$versions[0]"
            . "&r2=$versions[1]\n";
        push (@cvsweb, $url);
    }
    return @cvsweb;
}

# Run a cvs rdiff between the old and new versions and return the output.
# This is useful for small changes where you want to see the changes in
# e-mail, but probably creates too large of messages when the changes get
# bigger.  Note that this stores the full diff output in memory.
sub build_diff {
    my $data = shift;
    my @difflines;
    for my $file (sort keys %{ $$data{files} }) {
        my @versions = build_version_flags ($data, $file);
        next unless @versions;
        my $pid = open (CVS, '-|');
        if (!defined $pid) {
            die "$0: can't fork cvs: $!\n";
        } elsif ($pid == 0) {
            open (STDERR, '>&STDOUT') or die "$0: can't reopen stderr: $!\n";
            exec ('cvs', '-fnQq', '-d', $REPOSITORY, 'rdiff', '-u',
                  @versions, $file) or die "$0: can't fork cvs: $!\n";
        } else {
            my @diff = <CVS>;
            close CVS;
            if ($diff[1] =~ /failed to read diff file header/) {
                @diff = ($diff[0], "<<Binary file>>\n");
            }
            push (@difflines, @diff);
        }
    }
    return @difflines;
}

# Build a summary of the changes by building the patch it represents in /tmp
# and then running diffstat on it.  This gives a basic idea of the order of
# magnitude of the changes.  Takes the data hash and the path to diffstat as
# arguments.
sub build_summary {
    my ($data, $diffstat) = @_;
    $diffstat ||= 'diffstat';
    open2 (\*OUT, \*IN, $diffstat, '-w', '78')
        or die "$0: can't fork $diffstat: $!\n";
    my @binary;
    for my $file (sort keys %{ $$data{files} }) {
        my @versions = build_version_flags ($data, $file);
        next unless @versions;
        my $pid = open (CVS, '-|');
        if (!defined $pid) {
            die "$0: can't fork cvs: $!\n";
        } elsif ($pid == 0) {
            open (STDERR, '>&STDOUT') or die "$0: can't reopen stderr: $!\n";
            exec ('cvs', '-fnQq', '-d', $REPOSITORY, 'rdiff', '-u',
                  @versions, $file) or die "$0: can't fork cvs: $!\n";
        }
        local $_;
        while (<CVS>) {
            s%^(\*\*\*|---|\+\+\+) \Q$$data{prefix}\E/*%$1 %;
            s%^Index: \Q$$data{prefix}\E/*%Index: %;
            if (/^diff -c/) { s% \Q$$data{prefix}\E/*% %g }
            if (/: failed to read diff file header/) {
                my $short = $file;
                $short =~ s%^\Q$$data{prefix}\E/*%%;
                my $date = localtime;
                print IN "Index: $short\n";
                print IN "--- $short\t$date\n+++ $short\t$date\n";
                print IN "@@ -1,1 +1,1 @@\n+<<Binary file>>\n";
                push (@binary, $short);
                last;
            } else {
                print IN $_;
            }
        }
        close CVS;
    }
    close IN;
    my @stats = <OUT>;
    close OUT;
    my $offset = index ($stats[0], '|');
    for my $file (@binary) {
        @stats = map {
            s/^( +\Q$file\E +\|  +).*/$1<\<Binary file>>/;
            $_;
        } @stats;
    }
    unshift (@stats, '-' x $offset, "+\n");
    return @stats;
}

##############################################################################
# Configuration file handling
##############################################################################

# Load defaults from a configuration file, if any.  The syntax is keyword
# colon value, where value may be enclosed in quotes.  Returns a list
# containing the address to which to send all commits (defaults to not sending
# any message), the base URL for cvsweb (defaults to not including cvsweb
# URLs), the full path to diffstat (defaults to just "diffstat", meaning the
# user's path will be searched), the subject prefix, a default host for
# unqualified e-mail addresses, additional headers to add to the mail message,
# and the full path to sendmail.
sub load_config {
    my $file = $REPOSITORY . '/CVSROOT/cvslog.conf';
    my $address = '';
    my $cvsweb = '';
    my $diffstat = 'diffstat';
    my $headers = '';
    my $mailhost = '';
    my ($sendmail) = grep { -x $_ } qw(/usr/sbin/sendmail /usr/lib/sendmail);
    $sendmail ||= '/usr/lib/sendmail';
    my $subject = 'CVS update of ';
    if (open (CONFIG, $file)) {
        local $_;
        while (<CONFIG>) {
            next if /^\s*\#/;
            next if /^\s*$/;
            chomp;
            my ($key, $value) = /^\s*(\S+):\s+(.*)/;
            unless ($value) {
                warn "$0:$file:$.: invalid config syntax: $_\n";
                next;
            }
            $value =~ s/\s+$//;
            $value =~ s/^\"(.*)\"$/$1/;
            if    (lc $key eq 'address')  { $address  = $value }
            elsif (lc $key eq 'cvsweb')   { $cvsweb   = $value }
            elsif (lc $key eq 'diffstat') { $diffstat = $value }
            elsif (lc $key eq 'mailhost') { $mailhost = $value }
            elsif (lc $key eq 'sendmail') { $sendmail = $value }
            elsif (lc $key eq 'subject')  { $subject  = $value }
            elsif (lc $key eq 'header')   { $headers .= $value . "\n" }
            else { warn "$0:$file:$.: unrecognized config line: $_\n" }
        }
        close CONFIG;
    }
    return ($address, $cvsweb, $diffstat, $subject, $mailhost, $headers,
            $sendmail);
}

##############################################################################
# Main routine
##############################################################################

# Load the configuration file for defaults.
my ($address, $cvsweburl, $diffstat, $subject, $mailhost, $headers, $sendmail)
    = load_config;

# Parse command-line options.
my (@addresses, $cvsweb, $diff, $help, $longsubject, $merge, $omitauthor,
    $showdir, $summary, $version, $versions);
Getopt::Long::config ('bundling', 'no_ignore_case', 'require_order');
GetOptions ('address|a=s'        => \@addresses,
            'cvsweb|c'           => \$cvsweb,
            'debug|D'            => \$DEBUG,
            'diff|d'             => \$diff,
            'help|h'             => \$help,
            'include-versions|i' => \$versions,
            'long-subject|l'     => \$longsubject,
            'merge|m'            => \$merge,
            'omit-author|o'      => \$omitauthor,
            'show-directory|w'   => \$showdir,
            'summary|s'          => \$summary,
            'version|v'          => \$version) or exit 1;
if ($help) {
    print "Feeding myself to perldoc, please wait....\n";
    exec ('perldoc', '-t', $0);
} elsif ($version) {
    my @version = split (' ', $ID);
    shift @version if $ID =~ /^\$Id/;
    my $version = join (' ', @version[0..2]);
    $version =~ s/,v\b//;
    $version =~ s/(\S+)$/($1)/;
    $version =~ tr%/%-%;
    print $version, "\n";
    exit;
}
die "$0: no addresses specified\n" unless ($address || @addresses);
die "$0: unable to determine the repository path\n" unless $REPOSITORY;
die "$0: no cvsweb URL specified in the configuration file\n"
    if $cvsweb && !$cvsweburl;
my $showauthor = !$omitauthor;

# Parse the input.
print "Options: ", join ('|', @ARGV), "\n" if $DEBUG;
print '-' x 78, "\n" if $DEBUG;
my %data;
parse_files (\%data, @ARGV);
parse_paths (\%data);
parse_filelist (\%data);
parse_message (\%data);
print '-' x 78, "\n" if $DEBUG;

# Check to see if this is part of a multipart commit.  If so, just save the
# data for later.  Otherwise, read in any saved data and add it to our data.
if ($merge && $data{type} eq 'commit') {
    my $lastdir = read_lastdir;
    if ($lastdir && $data{repository} ne $lastdir) {
        save_data (\%data) and exit 0;
        # Fall through and send a notification if save_data fails.
    } else {
        read_data (\%data);
    }
}
$data{messages} = [ $data{message} ] unless $data{messages};

# Exit if there are no addresses to send the message to.
exit 0 if (!$address && !@addresses);

# Open our mail program.
open (MAIL, "| $sendmail -t -oi -oem")
    or die "$0: can't fork $sendmail: $!\n";
my $oldfh = select MAIL;
$| = 1;
select $oldfh;

# Build the mail headers.
if ($mailhost) {
    for ($address, @addresses) {
        if ($_ && !/\@/) {
            $_ .= '@' . $mailhost unless /\@/;
        }
    }
}
if (@addresses) {
    print MAIL "To: ", join (', ', @addresses), "\n";
    print MAIL "Cc: $address\n" if $address;
} else {
    print MAIL "To: $address\n";
}
print MAIL build_from;
print MAIL $headers if $headers;
print MAIL build_subject (\%data, $subject, $longsubject), "\n";

# Build the message and write it out.
print MAIL build_header (\%data, $showdir, $showauthor);
for (@{ $data{messages} }) {
    print MAIL "\n", build_message (\%data, $_, $versions);
}
if ($data{type} eq 'commit') {
    print MAIL "\n\n", build_summary (\%data, $diffstat) if $summary;
    print MAIL "\n\n", build_cvsweb (\%data, $cvsweburl) if $cvsweb;
    print MAIL "\n\n", build_diff (\%data) if $diff;
}

# Make sure sending mail succeeded.
close MAIL;
unless ($? == 0) { die "$0: sendmail exit status " . ($? >> 8) . "\n" }
exit 0;
__END__

##############################################################################
# Documentation
##############################################################################

=head1 NAME

cvslog - Mail CVS commit notifications

=head1 SYNOPSIS

B<cvslog> [B<-cDdhilmosvw>] [B<-a> I<address> ...] %{sVv}

=head1 REQUIREMENTS

CVS 1.10 or later, Perl 5.004 or later, diffstat for the B<-s> option, and a
sendmail command that can accept formatted mail messages for delivery.

=head1 DESCRIPTION

B<cvslog> is intended to be run out of CVS's F<loginfo> administrative file.
It parses the (undocumented) format of CVS's commit notifications, cleans it
up and reformats it, and mails the notification to one or more e-mail
addresses.  Optionally, a diffstat(1) summary of the changes can be added to
the notification, and a CVS commit spanning multiple directories can be
combined into a single notification (by default, CVS generates a separate
notification for each directory).

To combine a commit spanning multiple directories into a single notification,
B<cvslog> needs the help of an additional program run from the F<commitinfo>
administrative file that records the last directory affected by the commit.
See the description in L<"FILES"> for what files and directories must be
created.  One such suitable program is B<cvsprep> by the same author.

For information on how to add B<cvslog> to your CVS repository, see
L<"INSTALLATION"> below.  B<cvslog> also looks for a configuration file named
F<cvslog.conf>; for details on the format of that file, see
L<"CONFIGURATION">.

The From: header of the mail message sent by B<cvslog> is formed from the user
making the commit.  The contents of the environment variable CVSUSER or the
name of the user doing the commit if CVSUSER isn't set is looked up in the
F<passwd> file in the CVS repository, if present, and the fourth field is used
as the full name and the fifth as the user's e-mail address.  If that user
isn't found in F<passwd>, it's looked up in the system password file instead
to try to find a full name.  Otherwise, that user is just used as an e-mail
address.

=head1 OPTIONS

=over 4

=item B<-a> I<address>, B<--address>=I<address>

Send the commit notification to I<address> (possibly in addition to the
address defined in F<cvslog.conf>).  This option may occur more than once,
and all specified addresses will receive a copy of the notification.

=item B<-c>, B<--cvsweb>

Append the cvsweb URLs for all the diffs covered in the commit message to
the message.  The base cvsweb URL must be set in the configuration file.
The file name will be added and the C<r1> and C<r2> parameters will be
appended with the appropriate values, along with C<f=h> to request formatted
diff output.  Currently, the cvsweb URLs are not further configurable.

=item B<-D>, B<--debug>

Prints out the information B<cvslog> got from CVS as it works.  This option
is mostly useful for developing B<cvslog> and checking exactly what data CVS
provides.  The first line of output will be the options passed to B<cvsweb>,
separated by C<|>.

=item B<-d>, B<--diff>

Append the full diff output for each change to the notification message.
This is probably only useful if you know that all changes for which
B<cvslog> is run will be small.  Note that the entire diff output is
temporarily stored in memory, so this could result in excessive memory usage
in B<cvslog> for very large changes.

When this option is given, B<cvslog> needs to be able to find B<cvs> in the
user's PATH.

If one of the committed files is binary and this is detected by B<cvs>,
B<cvslog> will suppress the diff and replace it with a note that the file is
binary.

=item B<-h>, B<--help>

Print out this documentation (which is done simply by feeding the script to
C<perldoc -t>).

=item B<-i>, B<--include-versions>

Include version numbers (in parentheses) after the file names in the lists
of added, removed, and changed files.  By default, only the file names are
given.

=item B<-l>, B<--long-subject>

Normally, B<cvslog> will just list the number of changed files rather than the
complete list of them if the subject would otherwise be too long.  This flag
disables that behavior and includes the full list of modified files in the
subject header of the mail, no matter how long it is.

=item B<-m>, B<--merge>

Merge multidirectory commits into a single notification.  This requires that a
program be run from F<commitinfo> to record the last directory affected by the
commit.  Using this option will cause B<cvslog> to temporarily record
information about a commit in progress in TMPDIR or /tmp; see L<"FILES">.

=item B<-o>, B<--omit-author>

Omit the author information from the commit notification.  This is useful
where all commits are done by the same person (so the author information is
just noise) or where the author information isn't actually available.

=item B<-s>, B<--summary>

Append to each commit notification a summary of the changes, produced by
generating diffs and feeding those diffs to diffstat(1).  diffstat(1) must be
installed to use this option; see also the B<diffstat> configuration parameter
in L<"CONFIGURATION">.

When this option is given, B<cvslog> needs to be able to find B<cvs> in the
user's PATH.

If one of the committed files is binary and this is detected by B<cvs>,
B<cvslog> will replace the uninformative B<diffstat> line corresponding to
that file (B<diffstat> will indicate that nothing changed) with a note that
the file is binary.

=item B<-v>, B<--version>

Print out the version of B<cvslog> and exit.

=item B<-w>, B<--show-directory>

Show the working directory from which the commit was made.  This is usually
not enlightening and when running CVS in server mode will always be some
uninteresting directory in /tmp, so the default is to not include this
information.

=back

=head1 CONFIGURATION

B<cvslog> will look for a configuration file named F<cvslog.conf> in the
CVSROOT directory of your repository.  Absence of this file is not an error;
it just means that all of the defaults will be used.  The syntax of this file
is one configuration parameter per line in the format:

    parameter: value

The value may be enclosed in double-quotes and must be enclosed in
double-quotes if there is trailing whitespace that should be part of the
value.  There is no way to continue a line; each parameter must be a single
line.  Lines beginning with C<#> are comments.

The following configuration parameters are supported:

=over 4

=item address

The address or comma-separated list of addresses to which all commit messages
should be sent.  If this parameter is not given, the default is to send the
commit message only to those addresses specified with B<-a> options on the
command line, and there must be at least one B<-a> option on the command line.

=item cvsweb

The base URL for cvsweb diffs for this repository.  Only used if the B<-c>
option is given; see the description of that option for more information
about how the full URL is constructed.

=item diffstat

The full path to the diffstat(1) program.  If this parameter is not given, the
default is to look for diffstat(1) on the user's PATH.  Only used if the B<-s>
option is given.

=item header

The value should be a valid mail header, such as "X-Ticket: cvs".  This header
will be added to the mail message sent.  This configuration parameter may
occur multiple times, and all of those headers will be added to the message.

=item mailhost

The hostname to append to unqualified addresses given on the command line with
B<-a>.  If set, an C<@> and this value will be appended to any address given
with B<-a> that doesn't contain C<@>.  This parameter exists solely to allow
for shorter lines in the F<loginfo> file.

=item sendmail

The full path to the sendmail binary.  If not given, this setting defaults to
either C</usr/sbin/sendmail> or C</usr/lib/sendmail>, whichever is found,
falling back on C</usr/lib/sendmail>.

=item subject

The subject prefix to use for the mailed notifications.  Appended to this
prefix will be the module or path in the repository of the affected directory
and then either a list of files or a count of files depending on the available
space.  The default is C<"CVS update of ">.

=back

=head1 INSTALLATION

Follow these steps to add cvslog to your project:

=over 4

=item 1.

Check out CVSROOT for your repository (see the CVS manual if you're not sure
how to do this), copy this script into that directory, change the first line
to point to your installation of Perl if necessary, and cvs add and commit it.

=item 2.

Add a line like:

    cvslog Unable to check out CVS log notification script cvslog

to F<checkoutlist> in CVSROOT and commit it.

=item 3.

If needed, create a F<cvslog.conf> file as described above and cvs add and
commit it.  Most installations will probably want to set B<address>, since the
most common CVS configuration is a single repository per project with all
commit notifications sent to the same address.  If you don't set B<address>,
you'll need to add B<-a> options to every invocation of B<cvslog> in
F<loginfo>.

=item 4.

If you created a F<cvslog.conf> file, add a line like:

    cvslog.conf Unable to check out cvslog configuration cvslog.conf

to F<checkoutlist> in CVSROOT and commit it.

=item 5.

Set up your rules in F<loginfo> for those portions of the repository you want
to send CVS commit notifications for.  A good starting rule is:

    DEFAULT     $CVSROOT/CVSROOT/cvslog %{sVv}

which will send notifications for every commit to your repository that doesn't
have a separate, more specific rule to the value of B<address> in
F<cvslog.conf>.  You must always invoke B<cvslog> as $CVSROOT/CVSROOT/cvslog;
B<cvslog> uses the path it was invoked as to find the root of the repository.
If you have different portions of your repository that should send
notifications to different places, you can use a series of rules like:

    ^foo/       $CVSROOT/CVSROOT/cvslog -a foo-commit %{sVv}
    ^bar/       $CVSROOT/CVSROOT/cvslog -a bar-commit %{sVv}

This will send notification of commits to anything in the C<foo> directory
tree in the repository to foo-commit (possibly qualified with B<mailhost> from
F<cvslog.conf>) and everything in C<bar> to bar-commit.  No commit
notifications will be sent for any other commits.  The C<%{sVv}> string is
replaced by CVS with information about the committed files and should always
be present.

If you are using CVS version 1.12.6 or later, the format strings for
F<loginfo> rules have changed.  Instead of C<%{sVv}>, use C<-- %p %{sVv}>,
once you've set UseNewInfoFmtStrings=yes in F<config>.  For example:

    DEFAULT     $CVSROOT/CVSROOT/cvslog -- %p %{sVv}

Any options to B<cvslog> should go before C<-->.  See the CVS documentation
for more details on the new F<loginfo> format.

=item 6.

If you want summaries of changes, obtain and compile diffstat and add B<-s> to
the appropriate lines in F<loginfo>.  You may also need to set B<diffstat> in
F<cvslog.conf>.

diffstat is at L<http://dickey.his.com/diffstat/diffstat.html>.

=item 7.

If you want merging of multidirectory commits, add B<-m> to the invocations of
B<cvslog>, copy B<cvsprep> into your checked out copy of CVSROOT, change the
first line of the script if necessary to point to your installation of Perl,
and cvs add and cvs commit it.  Then, a line like:

    cvsprep Unable to check out CVS log notification script cvsprep

to F<checkoutlist> in CVSROOT and commit it.

See L<"WARNINGS"> for some warnings about the security of multi-directory
commit merging.

=item 8.

If your operating system doesn't pass the full path to the B<cvslog>
executable to this script when it runs, you'll need to edit the beginning of
this script and set $REPOSITORY to the correct path to the root of your
repository.  This should not normally be necessary.  See the comments in this
script for additional explanation.

=back

=head1 EXAMPLES

Send all commits under the baz directory to B<address> defined in
F<cvslog.conf>:

    ^baz/       $CVSROOT/CVSROOT/cvslog -msw %{sVv}

Multidirectory commits will be merged if B<cvsprep> is also installed, a
diffstat(1) summary will be appended to the notification, and the working
directory from which the files were committed will also be included.  This
line should be put in F<loginfo>.

See L<"INSTALLATION"> for more examples.

=head1 DIAGNOSTICS

=over 4

=item can't fork %s: %s

(Fatal) B<cvslog> was unable to run a program that it wanted to run.  This may
result in no notification being sent or in information missing.  Generally
this means that the program in question was missing or B<cvslog> couldn't find
it for some reason.

=item can't open %s: %s

(Warning) B<cvslog> was unable to open a file.  For the modules file, this
means that B<cvslog> won't do any directory to module mapping.  For files
related to multidirectory commits, this means that B<cvslog> can't gather
together information about such a commit and will instead send an individual
notification for the files affected in the current directory.  (This means
that some information may have been lost.)

=item can't remove %s: %s

(Warning) B<cvslog> was unable to clean up after itself for some reason, and
the temporary files from a multidirectory commit have been left behind in
TMPDIR or F</tmp>.

=item can't save to %s: %s

(Warning) B<cvslog> encountered an error saving information about a
multidirectory commit and will instead send an individual notification for the
files affected in the current directory.

=item invalid directory %s

(Warning) Something was strange about the given directory when B<cvslog> went
to use it to store information about a multidirectory commit, so instead a
separate notification for the affected files in the current directory will be
sent.  This means that the directory was actually a symlink, wasn't a
directory, or wasn't owned by the right user.

=item invalid config syntax: %s

(Warning) The given line in F<cvslog.conf> was syntactically invalid.  See
L<"CONFIGURATION"> for the correct syntax.

=item no %s given by CVS (no %{sVv}?)

(Fatal) The arguments CVS passes to B<cvslog> should be the directory within
the repository that's being changed and a list of files being changed with
version information for each file.  Something in that was missing.  This error
generally means that the invocation of B<cvslog> in F<loginfo> doesn't have
the magic C<%{sVv}> variable at the end but instead has no variables or some
other variable like C<%s>, or means that you're using a version of CVS older
than 1.10.

=item no addresses specified

(Fatal) There was no B<address> parameter in F<cvslog.conf> and no B<-a>
options on the command line.  At least one recipient address must be specified
for the CVS commit notification.

=item sendmail exit status %d

(Fatal) sendmail exited with a non-zero status.  This may mean that the
notification message wasn't sent.

=item unable to determine the repository path

(Fatal) B<cvslog> was unable to find the root of your CVS repository from the
path by which it was invoked.  See L<"INSTALLATION"> for hints on how to fix
this.

=item unrecognized config line: %s

(Warning) The given configuration parameter isn't one of the ones that
B<cvslog> knows about.

=back

=head1 FILES

All files relative to $CVSROOT will be found by looking at the full path
B<cvslog> was invoked as and pulling off the path before C<CVSROOT/cvslog>.
If this doesn't work on your operating system, you'll need to edit this script
to set $REPOSITORY.

=over 4

=item $CVSROOT/CVSROOT/cvslog.conf

Read for configuration directives if it exists.  See L<"CONFIGURATION">.

=item $CVSROOT/CVSROOT/modules

Read to find the module a given file is part of.  Rather than always giving
the full path relative to $CVSROOT of the changed files, B<cvslog> tries to
find the module that that directory belongs to and replaces the path of that
module with the name of the module in angle brackets.  Modules are found by
reading this file, looking at the last white-space-separated word on each
line, and if it contains a C</>, checking to see if it is a prefix of the path
to the files affected by a commit.  If so, the first white-space-separated
word on that line of F<modules> is taken to be the affected module.  The first
matching entry is used.

=item $CVSROOT/CVSROOT/passwd

Read to find the full name and e-mail address corresponding to a particular
user.  The full name is expected to be the fourth field colon-separated field
and the e-mail address the fifth.  Defaults derived from the system password
file are used if these are not provided.

=item TMPDIR/cvs.%d.%d

Information about multidirectory commits is read from and stored in this
directory.  This script will never create this directory (the helper script
B<cvsprep> that runs from F<commitinfo> has to do that), but it will read and
store information in it and when the commit message is sent, it will delete
everything in this directory and remove the directory.

The first %d is the numeric UID of the user running B<cvslog>.  The second %d
is the process group B<cvslog> is part of.  The process group is included in
the directory name so that if you're running a shell that calls setpgrp() (any
modern shell with job control should), multiple commits won't collide with
each other even when done from the same shell.

If TMPDIR isn't set in the environment, F</tmp> is used for TMPDIR.

=item TMPDIR/cvs.%d.%d/directory

B<cvslog> expects this file to contain the name of the final directory
affected by a multidirectory commit.  Each B<cvslog> invocation will save the
data that it's given until B<cvslog> is invoked for this directory, and then
all of the saved data will be combined with the data for that directory and
sent out as a single notification.

This file must be created by a script such as B<cvsprep> run from
F<commitinfo>.  If it isn't present, B<cvslog> doesn't attempt to combine
multidirectory commits, even if B<-m> is used.

=back

=head1 ENVIRONMENT

=over 4

=item PATH

Used to find cvs and diffstat when the B<-s> option is in effect.  If the
B<diffstat> configuration option is set, diffstat isn't searched for on the
user's PATH, but cvs must always be found on the user's PATH in order for
diffstat summaries to work.

=item TMPDIR

If set, specifies the temporary directory to use instead of F</tmp> for
storing information about multidirectory commits.  Setting this to some
private directory is recommended if you're doing CVS commits on a multiuser
machine with other untrusted users due to the standard troubles with safely
creating files in F</tmp>.  (Note that other programs besides B<cvslog> also
use TMPDIR.)

=back

=head1 WARNINGS

Merging multidirectory commits requires creating predictably-named files to
communicate information between different processes.  By default, those files
are created in F</tmp> in a directory created for that purpose.  While this
should be reasonably safe on systems that don't allow one to remove
directories owned by other people in F</tmp>, since a directory is used rather
than an individual file and since various sanity checks are made on the
directory before using it, this is still inherently risky on a multiuser
machine with a world-writeable F</tmp> directory if any of the other users
aren't trusted.

For this reason, I highly recommend setting TMPDIR to some directory, perhaps
in your home directory, that only you have access to if you're in that
situation.  Not only will this make B<cvslog> more secure, it may make some of
the other programs you run somewhat more secure (lots of programs will use the
value of TMPDIR if set).  I really don't trust the security of creating any
predictably-named files or directories in F</tmp> and neither should you.

Multiple separate B<cvslog> invocations in F<loginfo> interact oddly with
merging of multidirectory commits.  The commit notification will be sent to
the addresses and in the style configured for the last invocation of
B<cvslog>, even if some of the earlier directories had different notification
configurations.  As a general rule, it's best not to merge multidirectory
commits that span separate portions of the repository with different
notification policies.

B<cvslog> doesn't support using B<commit_prep> (which comes with CVS) as a
F<commitinfo> script to provide information about multidirectory commits
because it writes files directly in F</tmp> rather than using a subdirectory.

Some file names simply cannot be supported correctly in CVS versions prior
to 1.12.6 (with new-style info format strings turned on) because of
ambiguities in the output from CVS.  For example, file names beginning with
spaces are unlikely to produce the correct output, and file names containing
newlines will likely result in odd-looking mail messages.

=head1 BUGS

There probably should be a way to specify the path to cvs for generating
summaries and diffs, to turn off the automatic module detection stuff, to
provide for transformations of the working directory (stripping the domain
off the hostname, shortening directory paths in AFS), and to configure the
maximum subject length.  The cvsweb support could stand to be more
customizable.

Many of the logging scripts out there are based on B<log_accum>, which comes
with CVS and uses a different output format for multidirectory commits.  I
prefer the one in B<cvslog>, but it would be nice if B<cvslog> could support
either.

File names containing spaces may be wrapped at the space in the lists of
files added, modified, or removed.  The lists may also be wrapped in the
middle of the appended version information if B<-i> is used.

Multi-directory commit merging may mishandle file names that contain
embedded newlines even with CVS version 1.12.6 or later due to the file
format that B<cvslog> uses to save the intermediate data.

=head1 NOTES

Some parts of this script are horrible hacks because the entirety of commit
notification handling in CVS is a horrible, undocumented hack.  Better commit
notification support in CVS proper would be welcome, even if it would make
this script obsolete.

=head1 SEE ALSO

cvs(1), diffstat(1), cvsprep(1).

diffstat is at L<http://dickey.his.com/diffstat/diffstat.html>.

Current versions of this program are available from its web site at
L<http://www.eyrie.org/~eagle/software/cvslog/>.  B<cvsprep> is available
from this same location.

=head1 AUTHOR

Russ Allbery <rra@stanford.edu>.

=head1 COPYRIGHT AND LICENSE

Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004 Board of Trustees, Leland
Stanford Jr. University.

This program is free software; you can redistribute it and/or modify it
under the same terms as Perl itself.

=cut