#!/usr/bin/perl
# Copyright 2000-2005 Vlado Keselj www.cs.dal.ca/~vlado

sub help { print <<"#EOT" }
# Report new material on a web page, version $VERSION
#
# Uses diff, lynx, sendmail (if option -e is used)
#
# Usage: report-new.pl [switches] URL
#  -h    Print help and exit.
#  -v    Print version of the program and exit.
#  -e email Sends output, if not empty, to email.
#EOT

########################### Customization ###########################
my $dump = \&dump_lynx_txt;  # choose one or write your own dump sub
#my $dump = \&dump_wget;
#####################################################################
use strict;
use POSIX qw(strftime);

use vars qw( $VERSION );
$VERSION = sprintf "%d.%d", q$Revision: 1.4 $ =~ /(\d+)/g;

use Getopt::Std;
use vars qw($opt_v $opt_h $opt_e);
getopts("hve:");

if ($opt_v) { print "$VERSION\n"; exit; }
elsif ($opt_h || !@ARGV) { &help(); exit; }

($#ARGV==0 && $ARGV[0]=~/^http:\/\//) ||
    die "Format: report-new.pl http://...\n";

my ($urlbase, $url);
$urlbase = $url = shift;   # E.g.: http://www.cs.dal.ca/~vlado/srcperl
if ( $url =~ m.//[^/]*/. )
{ $urlbase = $`.$& }	   # E.g.: http://www.cs.dal.ca/

my $urlId = &encode_w1($url);
my $timestamp = strftime("%Y-%m-%d-%T", localtime(time));

if (! -d 'tmp')
{ mkdir 'tmp', 0700 or die "can't mkdir tmp: $!" }
if (! -d 'report-new.pl.d')
{ mkdir 'report-new.pl.d', 0700 or die "can't mkdir report-new.pl.d: $!" }

my $TmpBase  = "tmp/$urlId-$timestamp";
my $TmpFile1 = "$TmpBase-1";
my $lastFile = "report-new.pl.d/$urlId.last";
-e $lastFile or putfile($lastFile,'');

# First step: grab the page
my $material = &$dump($url);
putfile($TmpFile1, $material);

$material = `diff $TmpFile1 $lastFile 2>&1`;
$material =~ s/^[^<].*\n//mg;
$material =~ s/^< //mg;

if ($material) {
    if ($opt_e) {
        my $out;
        open($out, "|sendmail -t") or die;
	print $out "To: $opt_e\n".
	     "Subject: [report-new.pl] $url\n\n$material";
	close($out);
    }
    else { print $material }
}
unlink($lastFile);
rename($TmpFile1, $lastFile);

sub putfile($@) {
    my $f = shift;
    local *F;
    open(F, ">$f") or die "putfile:cannot open $f:$!";
    print F '' unless @_;
    while (@_) { print F shift(@_) }
    close(F)
}

sub encode_w1( $ ) {
    local $_ = shift;
    s/[\W_]/'_'.uc unpack("H2",$&)/ge;
    return $_;
}

sub dump_lynx_txt {
    my $url = shift;
    $url =~ s/'/'"'"'/g;
    return `lynx -dump -nolist '$url'`;
}

sub dump_wget {
    my $url = shift;
    $url =~ s/'/'"'"'/g;
    return `wget --quiet -O - '$url'`;
}

__END__
=head1 NAME

report-new.pl - Report new material on a web page

=head1 SYNOPIS

  report-new.pl [switches] URL

=head1 DESCRIPTION

Reports new material on a web page.  Typically used as a cron job with
the -e option.

  -h    Print help and exit.
  -v    Print version of the program and exit.
  -e email Sends output, if not empty, to email.
   justify [input files]

=head1 PREREQUISITES

POSIX qw(strftime); uses diff, lynx or wget, sendmail (if option -e is
used).

=head1 SCRIPT CATEGORIES

Web

=head1 README

Reports new material on a web page.

=head1 SEE ALSO

Scripts:
wget

=head1 COPYRIGHT

Copyright 2000-5 Vlado Keselj F<http://www.cs.dal.ca/~vlado>

This script is provided "as is" without expressed or implied warranty.
This is free software; you can redistribute it and/or modify it under
the same terms as Perl itself.

The latest version can be found at F<http://www.cs.dal.ca/~vlado/srcperl/>.

=cut
