#! /usr/bin/perl

    #	    	    	    	  Page Visit Counter
    #	    	    	    	     Version 1.0
    
    #	    	    	    by John Walker  --  October MMIV
    #	    	    	       http://www.fourmilab.ch/
    
    #	If you have received this script without documentation, please consult
    #	the Web page:
    #
    #	    	    http://www.fourmilab.ch/webtools/PageVisits/
    #
    #	for installation instructions and to download the latest version.
    
    #	Directory Configuration
    my $CGI_Directory = '/var/www/cgi-bin';
    my $NETPBM_Directory = '/usr/bin'; 	    # Where PPM tools are found

    #	This Perl CGI program maintains a graphical Web counter assembled
    #	from a set of digits in Portable Pixmap (.ppm) format in a
    #	subdirectory of your CGI directory.  The counter is maintained in
    #	a little ASCII data file.  Every time the script is run, it
    #	increments the counter (using flock() to guard against race
    #	conditions), then manufactures a PNG image by concatenating
    #	the digit images to assemble the number.
    
    #	(Bonehead versions of Netscape have a bug which causes them, once
    #	they get an image in the cache, to re-display it regardless of whether
    #	the page is reloaded, the HTTP headers specify no cache, or anything
    #	else.  If you just reload the page repeatedly, the counter will not
    #	appear to update, but if you open the counter image in a new browser
    #	window and reload it, you'll see that it really has.  Mozilla and
    #	Explorer do not have this dumb bug, which has been in Netscape
    #	for years and affects any page with images that change, not just
    #	scripts like this counter.)
    
    use strict;
    use warnings;
    use FileHandle;
    use Fcntl ':flock';     	# Import flock() LOCK_ constants
    
    my $prog = "PageVisits";
    
    #	If called with an argument of "test", verify that directories
    #	are properly configured and that required utilities are present.
    
    if (($#ARGV >= 0) && ($ARGV[0] eq 'test')) {
    	my $errors = 0;
	
    	if (!(-d $CGI_Directory)) {
	    print(STDERR '** $CGI_Directory' . " $CGI_Directory does not exist.\n");
	    $errors++;
	}
    	if (!(-d $NETPBM_Directory)) {
	    print(STDERR '** $NETPBM_Directory' . " $NETPBM_Directory does not exist.\n");
	    $errors++;
	} else {
    	    if (!(-f "$NETPBM_Directory/pnmcat")) {
		print(STDERR "Netpbm utility pnmcat not found in $NETPBM_Directory.\n");
		$errors++;
	    }
    	    if (!(-f "$NETPBM_Directory/pnmtopng")) {
		print(STDERR "Netpbm utility pnmtopng not found in $NETPBM_Directory.\n");
		$errors++;
	    }
	}
	if ($errors == 0) {
    	    print(STDERR "$prog configuration tests passed.\n");
	} else {
	    print(STDERR "$errors errors found in PageVisits configuration tests.\n");
	}
	exit(0);
    }
        
    my $Output_Filter = "$NETPBM_Directory/pnmtopng 2>/dev/null";
    my $MIME_type = 'image/png';
    
    parse_query_string();
    my $CounterDirectory = $ENV{'WWW_dir'};
    my $CounterFileName = $ENV{'WWW_pageid'};
    my $FontPrefix = 'times';
    if (defined($ENV{'WWW_font'})) {
    	$FontPrefix = $ENV{'WWW_font'};
    }
    
    #	Now we want to be *really* paranoid about the contents of
    #	these directory and file name specifications.  A malicious
    #	user may try to embed path specifications to escape from the
    #	CGI pigpen and scribble into other directories.  The
    #	following tr statement deletes all characters from the
    #	$CounterDirectory, $CounterFileName, and $FontPrefix other
    #	than letters, numbers, hyphen, and underscore.
    
    $CounterDirectory =~ tr/A-Za-z0-9_-//cd;
    $CounterFileName =~ tr/A-Za-z0-9_-//cd;
    $FontPrefix =~ tr/A-Za-z0-9_-//cd;
    
    my $CounterFile = "$CounterDirectory/$CounterFileName.dat";
    
    if (!(-d $CounterDirectory)) {
    	die("$prog: Invalid counter directory \"$CounterDirectory\" in CGI request");
    }
   
    #	A malicious user may have pointed us at a subdirectory of the
    #	CGI directory which wasn't intended as a PageVisits directory
    #	with the intent of clobbering a "$CounterFileName.dat" belonging
    #	to a different application or creating such a file with
    #	malicious intent.  Verify that this is a counter directory
    #	by checking for the presence of the counter data file and all
    #	of the digit image files.  Unless all are present, it's a no-go.
     
    for (my $n = 0; $n <= 9; $n++) {
    	my $di = "$CounterDirectory/$FontPrefix$n.ppm";
	if (!(-f $di)) {
    	    die("$prog: Digit image \"$di\" does not exist");
	}
    }

    #	Open the counter data file, read the current value, increment it,
    #	and write back to file.  flock() is used to make this a primitive
    #	operation.
    
    my $count;
    my $validate = '';
    my $validated = 1;
    if (defined($ENV{'WWW_value'})) {
    	$count = $ENV{'WWW_value'};
    } else {
	if (!(-f $CounterFile)) {
    	    die("$prog: Counter file \"$CounterFile\" does not exist");
	}
	open(COUNTER, "+<$CounterFile") || die("$prog: Unable to open counter file $CounterFile");;
	flock(COUNTER, LOCK_EX);
	seek(COUNTER, 0, 0);
	$count = <COUNTER>;
	chop($count);
	
	#   If the counter data file contains a line following the
	#   count, it is taken to be an environment variable validation
	#   test.  This will usually be a test of the HTTP_REFERER
	#   variable to prevent denial of service attacks originating
	#   at other sites from "hijacking" the counter, both consuming
	#   resources on the server and inflating the count to ridiculous
	#   values.
	
	if (!eof(COUNTER)) {
	    $validate = <COUNTER>;
	    my $v = $validate;
	    $v =~ s/\s+$//;
	    if (!($v =~ m/^\s*(\w+)\s*=(~?)\s*(.*)$/)) {
	    	print(STDERR "$prog: Failed to parse validation request \"$v\"\n");
		$validated = 0;
	    } elsif (!defined($ENV{$1})) {
	    	print(STDERR "$prog: Validation variable \"$1\" not defined in environment.\n");
		$validated = 0;
    	    } elsif (($2 eq '~') ? ($ENV{$1} !~ m/$3/)
	    	    	    	 : ($ENV{$1} ne $3)) {
	    	print(STDERR "$prog Validation variable \"$1\"=$2\"$ENV{$1}\" does not match \"$3\".\n");
		$validated = 0;
	    }
	}
	
	#   Now, if the request passed validation, increment the counter
	#   and write it, along with the validation string, back to the
	#   counter data file.  If the counter value is being overriden
	#   by a "value=" specification, however, we do not write back the
	#   incremented counter value.  Even if the counter value is being
	#   overriden, we need to come this far so that validation is
	#   performed.
	
	if ($validated && (!defined($ENV{'WWW_value'}))) {
	    seek(COUNTER, 0, 0);
	    $count++;
	    print(COUNTER "$count\n");
	    if ($validate ne '') {
	        print(COUNTER $validate);
	    }
	}
	
	#   Unlock and close the counter data file
	
	flock(COUNTER, LOCK_UN);
	close(COUNTER);
    }
    
    #	If the requesting URL has specified a "value=<value>" argument,
    #	override the counter value from the file.
    
    if (defined($ENV{'WWW_value'})) {
    	$count = $ENV{'WWW_value'};
    }
    
    if (!$validated) {
    	die("$prog: Validation failed for request from HTTP_REFERER = " . $ENV{'HTTP_REFERER'} . "\n");
    }
    
    my $cmd = '';
    while ($count > 0) {
    	my $digit = $count % 10;
	$count = int($count / 10);
	$cmd = " $CounterDirectory/$FontPrefix$digit.ppm $cmd";
    }
    $cmd = "$NETPBM_Directory/pnmcat -lr $cmd | $Output_Filter\n";

    STDOUT->autoflush(1);
    print("Content-type: $MIME_type\r\n");
    print("Pragma: no-cache\r\n");
    print("Cache-control: no-cache\r\n");
    print("\r\n");
    chdir($CGI_Directory);
    system($cmd);
    
    #	Parse the CGI QUERY_STRING argument (placed in the environment when
    #	the Web server launches this script).  This function sets environment
    #	variables named WWW_varname for each field in the form with value the
    #	unquoted contents of each field.  This rather odd way of returning
    #	results ("Why not just use a regular hash?") is used to maintain
    #	compatibility with the UNCGI program.  A Perl script which previously
    #	required execution under the control of UNCGI may be run stand-alone
    #	by calling this function at the top.  This halves the number of processes
    #	the server must fork in order to process the request, and is one
    #	fewer thing for the beleaguered system administrator to install.
    
    sub parse_query_string {
    	my ($rmeth, @args, $instream, $var, $varname, $value);
	
	$rmeth = $ENV{'REQUEST_METHOD'};    # Request method ("GET" or "POST")
	$rmeth =~ tr/a-z/A-Z/;	    	    # Just in case server doesn't shout
	if ($rmeth eq 'GET') {
	    @args = split(/&/, $ENV{'QUERY_STRING'});
	} elsif ($rmeth eq 'POST') {
	    read(STDIN, $instream, $ENV{'CONTENT_LENGTH'});
	    @args = split(/&/, $instream);
	}
	foreach $var (@args) {
	    ($varname, $value) = split(/=/, $var);
	    $varname = unescape_URL($varname);
	    $value = unescape_URL($value);
	    #	Elide any server-side includes a mischevious user may have
	    #	embedded in a value string.  This protects against mayhem if
	    #	the value is included in an HTML reply.
	    $value =~ s/<!--(.|\n)*-->//g;
	    
	    #	Define an "uncgi" compatible environment variable for
	    #	each name/value pair.
	    
	    $ENV{"WWW_$varname"} = $value;
	}
    }
    
    #	Unescape a URL-encoded string,  There is no error checking for
    #	malformed "%xx" quoted sequences; if the percent sign isn't
    #	followed by two hex digits, the whole mess will be left in
    #	the string unchanged.
    
    sub unescape_URL {
    	my ($s) = @_;
	
	$s =~ tr/+/ /;
	$s =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
	
	return $s;
    }
