
    #	Plot the mix of attack hits between direct hits and
    #	those forwarded by proxy servers.  This program must
    #	be fed an HTTP log in "forensic format" as defined by:
    #
    #	LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\" \"%{Cache-Control}i\" \"%{Pragma}i\" \"%{X-Forwarded-For}i\"" forensic
    #	CustomLog /files/server/logs/http/forensic_log forensic

    use Time::Local;
    
    #	Target page status item
    $target_page = 'GET / HTTP/1.1';

    $num_lines = 0;
    $ignored_lines = 0;
    $ltimebin = 0;
    
    %mnames = split(/,/, "Jan,1,Feb,2,Mar,3,Apr,4,May,5,Jun,6,Jul,7,Aug,8,Sep,9,Oct,10,Nov,11,Dec,12");
    
    while ($l = <>) {
    	$l =~ s/\s+$//;
	
	#   Parse request record
	if ($l !~ m/^(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+"(.*)"\s+(\d+)\s+([\-\d]+)\s"((?:[^"]|"")*)"\s"((?:[^"]|"")*)"\s"((?:[^"]|"")*)"\s"((?:[^"]|"")*)"\s"((?:[^"]|"")*)"/) {
#print("Ditch $l\n");
	    next;
	}
	$ip = $1;
	$ident = $2;
	$userid = $3;
	$time_date = $4;
	$request = $5;
	$status = $6;
	$length = $7;
	$referer = $8;
	$agent = $9;
	$cachecont = $10;
	$pragma = $11;
	$proxy = $12;
	if ($length eq '-') {
	    $length = 0;
	}
#if ($proxy ne '-') {
#    print("$l\n");
#    print ("$ip,$ident,$userid,$time_date,$request,$status,$length,$cachecont,$pragma,$proxy\n\n");
#}
	
	#   Parse date and time field
	$time_date =~ m-(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+)\s([\+\-]\d+)$-;
	$mday = $1;
	$mon = $2;
	$year = $3;
	$hour = $4;
	$minute = $5;
	$second = $6;
	$timezone = $7;
	$mindex = $mnames{$mon};
	$iso_date = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $year, $mindex, $mday,
	    	    	    	$hour, $minute, $second);
	$utime = timelocal($second, $minute, $hour, $mday, $mindex - 1, $year);
	
#print("$mday,$iso_date,$year,$hour,$minute,$second,$timezone\n");
	
#print ("$ip,$ident,$userid,$time_date,$request,$status,$length,$cachecont,$pragma,$proxy\n");

    	if ($request eq $target_page) {
    	    $num_lines++;
	    
	    $hits{$ip}++;
	    if (!defined($first{$ip})) {
	    	$first{$ip} = $iso_date;
		$ufirst{$ip} = $utime;
	    }
	    $last{$ip} = $iso_date;
	    $ulast{$ip} = $utime;
	    
	    $timebin = int($utime / 3600) * 3600;
	    $timehist{$timebin}++;
	    if ($proxy eq '-') {
	    	$directhist{$timebin}++;
	    } else {
	    	$proxyhist{$timebin}++;
	    }
	    if ($ltimebin != $timebin) {
	    	if ($ltimebin != 0) {
	    	    foreach $uh (keys %uhosts) {
		    	if ($uhosts{$uh} >= 10) {
			    $unique_hosts{$ltimebin}++;
			    if (!defined($ohosts{$uh})) {
				$new_hosts{$ltimebin}++;
				$ohosts{$uh} = 1;
			    }
			}
		    }
		}
	    	undef(%uhosts);
		$ltimebin = $timebin;
	    }
	    $uhosts{$ip}++;
	} else {
	    $ignored_lines++;
	}
    }
    
    #	Update unique hosts for last time bin
    foreach $uh (keys %uhosts) {
	if ($uhosts{$uh} >= 10) {
	    $unique_hosts{$timebin}++;
	    if (!defined($ohosts{$uh})) {
		$new_hosts{$timebin}++;
		$ohosts{$uh} = 1;
	    }
	}
    }
    
    print("Total records processed: $num_lines\n");
    print("Total records ignored:   $ignored_lines\n");
    
    #	Prepare time evolution report

    print << "EOL";
    
  Hourly  Bin           Hits/Hour    Hits/Sec   Hosts    New Hosts  Direct    Proxy
EOL
    open(OC, ">proxyhist.csv");
    for $k (sort keys %timehist) {
    	@lt = localtime($k);
    	printf("%04d-%02d-%02d %02d:00   %12d  %9.2f  %8d  %8d   %8d  %8d\n",
	    $lt[5] + 1900, $lt[4] + 1, $lt[3], $lt[2], $timehist{$k},
	    $timehist{$k} / 3600, $unique_hosts{$k}, $new_hosts{$k}, $directhist{$k}, $proxyhist{$k});
	printf(OC "%04d-%02d-%02d %02d:00,%d,%.4f,%d,%d,%d,%d\n",
	    $lt[5] + 1900, $lt[4] + 1, $lt[3], $lt[2], $timehist{$k},
	    $timehist{$k} / 3600, $unique_hosts{$k}, $new_hosts{$k}, $directhist{$k}, $proxyhist{$k});
    }
    close(OC);
