
    #	Look for onset of the attack.
    
    #	The input log is scanned searching for IP addresses which
    #	hit the $target_page $attack_criterion times or more without
    #	an intervening hit to another page.  Once marked as a possible
    #	attack, all subsequent hits are counted, using separate counters
    #	for the target page and all others.  A report is then printed
    #	showing the candidate events sorted in descending order of number
    #	of hits on the home page.

    use Time::Local;
    
    #	Target page status item
    $target_page = 'GET / HTTP/1.1';
    $aux_target_page = 'HEAD / HTTP/1.1';
    
    #	Consider the following number of consecutive hits to the
    #	target page without an intervening hit to something else
    #	as the criterion to deem an access an attack.
    $attack_criterion = 5; 

    $num_lines = 0;
    $ignored_lines = 0;
    $ltimebin = 0;
    
    %mnames = split(/,/, "Jan,1,Feb,2,Mar,3,Apr,4,May,5,Jun,6,Jul,7,Aug,8,Sep,9,Oct,10,Nov,11,Dec,12");

    #	Load the host database if it exists.  You can create the host
    #	database from the HTTP log file of incidents with:
    #
    #	cut -d' ' -f1 <log.txt >/tmp/a
    #	paste -d /tmp/a /tmp/a | sort | uniq >/tmp/b
    #	/usr/sbin/logresolve </tmp/b >hostdb.txt"
    #	rm /tmp/a /tmp/b
    #
    #	See "updateHostDatabase" for a script which adds newly
    #	encountered IP addresses to an existing host database.
    #
    if (open(HD, "<hostdb.txt")) {
    	while ($l = <HD>) {
	    chomp($l);
	    $l =~ m/^(\S+)\s+(\S+)/;
	    $hostdb{$2} = $1;
	    if ($1 eq $2) {
	    	$hostdb{$2} = '?';  	# IP address tried, but did not resolve
	    }
	}
	close(HD);
    }
    
    while ($l = <>) {
    	$l =~ s/\s+$//;
#print("$l\n");
	
	#   Parse request record
	$l =~ m/(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+"(.*)"\s+(\d+)\s+([\-\d]+)/;
	$ip = $1;
	$ident = $2;
	$userid = $3;
	$time_date = $4;
	$request = $5;
	$status = $6;
	$length = $7;
	
	#   Parse date and time field
	$time_date =~ m-(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+)\s([\+\-]\d+)$-;
	$mday = $1;
	$mon = $2;
	$year = $3;
	$hour = $4;
	$minute = $5;
	$second = $6;
	$timezone = $7;
	$mindex = $mnames{$mon};
	$iso_date = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $year, $mindex, $mday,
	    	    	    	$hour, $minute, $second);
#print("$ip $iso_date $second, $minute, $hour, $mday, $mindex, $year\n");
	$utime = timelocal($second, $minute, $hour, $mday, $mindex - 1, $year);
	
#print("$mday,$iso_date,$year,$hour,$minute,$second,$timezone\n");
	
#print ("$ip,$ident,$userid,$time_date,$request,$status,$length\n");

    	if (($request eq $target_page) ||
	    ($request eq $aux_target_page)) {
    	    $num_lines++;
#print ("$ip,$ident,$userid,$time_date,$request,$status,$length\n");
	    
	    $hits{$ip}++;
	    if (!defined($first{$ip})) {
	    	$first{$ip} = $iso_date;
		$ufirst{$ip} = $utime;
	    }
	    $last{$ip} = $iso_date;
	    $ulast{$ip} = $utime;
	    
#	    if ($hits{$ip} == $attack_criterion) {
#    		printf("%15s  %s  %s  %s\n", $ip, $first{$ip}, $last{$ip}, $hostdb{$k});
#	    }
	    
	} else {
	    #	Hit to page other than attack criterion
	    $ignored_lines++;
	    if (defined($hits{$ip}) && ($hits{$ip} < $attack_criterion)) {
	    	undef($hits{$ip});  	# Reset if fewer than $attack_criterion seen
	    } elsif (defined($hits{$ip}) && ($hits{$ip} >= $attack_criterion)) {
	    	$nhits{$ip}++;	    	# Record non-target hits once attack criterion met
	    }
	}
    }
        
    print("Total records processed: $num_lines\n");
    print("Total records ignored:   $ignored_lines\n");
        
    sub byhits {
    	$hits{$b} <=> $hits{$a};
    }
    
    $tothosts = 0;
    $hit_cutoff = 100;
    open(LR, ">newips.txt");
    
    print << "EOL";
    
                                                                      Hours   Seconds        Other
     Hits     IP Address       First Hit           Latest Hit         Active  per hit (avg)   Hits
EOL
    for $k (sort byhits keys %hits) {
    	if ($hits{$k} >= $hit_cutoff) {
	    $duration = $ulast{$k} - $ufirst{$k};
	    $rate = $duration / $hits{$k};
	    if (!defined($hostdb{$k})) {
	    	print(LR "$k $k\n");
	    }
    	    printf("%9d %15s  %s  %s  %4d  %6d           %6d  %s\n", $hits{$k}, $k, $first{$k}, $last{$k},
	    	$duration / 3600, $rate,
		$nhits{$k},
		$hostdb{$k});
	    $tothosts++;
	}
    }
    close(LR);
    
exit(0);
    
    print("\nTotal hosts with $hit_cutoff or more hits: $tothosts\n");
    print << "EOL";
    
  Hourly  Bin           Hits/Hour    Hits/Sec   Hosts    New Hosts
EOL
    open(OC, ">timehist.csv");
    for $k (sort keys %timehist) {
    	@lt = localtime($k);
    	printf("%04d-%02d-%02d %02d:00   %12d  %9.2f  %8d  %8d\n",
	    $lt[5] + 1900, $lt[4] + 1, $lt[3], $lt[2], $timehist{$k},
	    $timehist{$k} / 3600, $unique_hosts{$k}, $new_hosts{$k});
	printf(OC "%04d-%02d-%02d %02d:00,%d,%.4f,%d,%d\n",
	    $lt[5] + 1900, $lt[4] + 1, $lt[3], $lt[2], $timehist{$k},
	    $timehist{$k} / 3600, $unique_hosts{$k}, $new_hosts{$k});
    }
    close(OC);
