
    #	Analyse attacking hosts
    
    #	An HTTP access log (CERN/NCSA Common Log File format) is
    #	analysed and a table of hits on the $target_page by IP
    #	address is prepared, as well as total number of target
    #	page hits binned into one hour windows.  A report is printed
    #	of both the attacking hosts (in descending order of number of
    #	target hits, and the time evolution of the attack.
    
    #	The log file analysed by this program is assumed to have been
    #	pre-filtered for the attack hits to be analysed.  In the case
    #	of the attack for which it was developed, this is done by
    #	running:
    #
    #	    egrep '"GET / HTTP/1.1" (200 646|573 \-)' /files/server/logs/http/access_log >/tmp/ddos.txt
    #
    #	and then analysing the resulting "ddos.txt" file.  (The alternation
    #	is required to handle log entries made by the mitigation patch
    #	to Apache.)

    use Time::Local;
    
    #	Target page status item
    $target_page = qr-GET / HTTP/1\.-;

    $num_lines = 0;
    $ignored_lines = 0;
    $ltimebin = 0;
    
    %mnames = split(/,/, "Jan,1,Feb,2,Mar,3,Apr,4,May,5,Jun,6,Jul,7,Aug,8,Sep,9,Oct,10,Nov,11,Dec,12");
    
    while ($l = <>) {
    	$l =~ s/\s+$//;
	
	#   Parse request record
	$l =~ m/(\d+\.\d+\.\d+\.\d+)\s+(\S+)\s+(\S+)\s+\[(.*)\]\s+"(.*)"\s+(\d+)\s+([\d\-]+)/;
	$ip = $1;
	$ident = $2;
	$userid = $3;
	$time_date = $4;
	$request = $5;
	$status = $6;
	$length = $7;
	if ($length eq '-') {
	    $length = 0;
	}
	
	#   Parse date and time field
	$time_date =~ m-(\d+)/(\w+)/(\d+):(\d+):(\d+):(\d+)\s([\+\-]\d+)$-;
	$mday = $1;
	$mon = $2;
	$year = $3;
	$hour = $4;
	$minute = $5;
	$second = $6;
	$timezone = $7;
	$mindex = $mnames{$mon};
	$iso_date = sprintf("%04d-%02d-%02d %02d:%02d:%02d", $year, $mindex, $mday,
	    	    	    	$hour, $minute, $second);
	$utime = timelocal($second, $minute, $hour, $mday, $mindex - 1, $year);
	
#print("$mday,$iso_date,$year,$hour,$minute,$second,$timezone\n");
	
#print ("$ip,$ident,$userid,$time_date,$request,$status,$length\n");

    	if ($request =~ m/$target_page/) {
    	    $num_lines++;
	    
	    $hits{$ip}++;
	    if (!defined($first{$ip})) {
	    	$first{$ip} = $iso_date;
		$ufirst{$ip} = $utime;
	    }
	    $last{$ip} = $iso_date;
	    $ulast{$ip} = $utime;
	    
	    $timebin = int($utime / 3600) * 3600;
	    $timehist{$timebin}++;
	    if ($ltimebin != $timebin) {
	    	if ($ltimebin != 0) {
	    	    foreach $uh (keys %uhosts) {
		    	if ($uhosts{$uh} >= 10) {
			    $unique_hosts{$ltimebin}++;
			    if (!defined($ohosts{$uh})) {
				$new_hosts{$ltimebin}++;
				$ohosts{$uh} = 1;
			    }
			}
		    }
		}
	    	undef(%uhosts);
		$ltimebin = $timebin;
	    }
	    $uhosts{$ip}++;
	} else {
print(STDERR "DOOOOH!  $l\n");
	    $ignored_lines++;
	}
    }
    
    #	Update unique hosts for last time bin
    foreach $uh (keys %uhosts) {
	if ($uhosts{$uh} >= 10) {
	    $unique_hosts{$timebin}++;
	    if (!defined($ohosts{$uh})) {
		$new_hosts{$timebin}++;
		$ohosts{$uh} = 1;
	    }
	}
    }
    
    print("Total records processed: $num_lines\n");
    print("Total records ignored:   $ignored_lines\n");
    
    #	Load the host database if it exists.  You can create the host
    #	database from the HTTP log file of incidents with:
    #
    #	cut -d' ' -f1 <log.txt >/tmp/a
    #	paste -d /tmp/a /tmp/a | sort | uniq >/tmp/b
    #	/usr/sbin/logresolve </tmp/b >hostdb.txt"
    #	rm /tmp/a /tmp/b
    #
    #	See "updateHostDatabase" for a script which adds newly
    #	encountered IP addresses to an existing host database.
    #
    if (open(HD, "<hostdb.txt")) {
    	while ($l = <HD>) {
	    chomp($l);
	    $l =~ m/^(\S+)\s+(\S+)/;
	    $hostdb{$2} = $1;
	    if ($1 eq $2) {
	    	$hostdb{$2} = '?';  	# IP address tried, but did not resolve
	    }
	}
	close(HD);
    }

    sub byhits {
    	$hits{$b} <=> $hits{$a};
    }
    
    $tothosts = 0;
    $hit_cutoff = 100;
    open(LR, ">newips.txt");
    
    #	Report hosts in order of total number of hits
    
    print << "EOL";
    
                                                                      Hours   Seconds
     Hits     IP Address       First Hit           Latest Hit         Active  per hit (avg)
EOL
    for $k (sort byhits keys %hits) {
    	if ($hits{$k} >= $hit_cutoff) {
	    if (!defined($hostdb{$k})) {
		print(LR "$k $k\n");
	    }
	    $duration = $ulast{$k} - $ufirst{$k};
	    $rate = $duration / $hits{$k};
    	    printf("%9d %15s  %s  %s  %4d  %6d  %s\n", $hits{$k}, $k, $first{$k}, $last{$k},
	    	$duration / 3600, $rate,
		$hostdb{$k});
	    $tothosts++;
	}
    }
    close(LR);
    
    print("\nTotal hosts with $hit_cutoff or more hits: $tothosts\n");
    
    sub bylast {
    	$ulast{$b} <=> $ulast{$a};
    }
    
    $tothosts = 0;
    
    #	Report hosts in order of date of last hit

    print << "EOL";
    
                                                                      Hours   Seconds
     Hits     IP Address       First Hit           Latest Hit         Active  per hit (avg)
EOL

    $duration_to_print = 10 * 60;   	# Age before last hit in seconds to cut off recent hist report
    $vlast = -1;
    for $k (sort bylast keys %hits) {
    	if ($hits{$k} >= $hit_cutoff) {
	    if ($vlast < 0) {
	    	$vlast = $ulast{$k};
	    }
	    $duration = $ulast{$k} - $ufirst{$k};
	    $rate = $duration / $hits{$k};
    	    printf("%9d %15s  %s  %s  %4d  %6d  %s\n", $hits{$k}, $k, $first{$k}, $last{$k},
		$duration / 3600, $rate,
		$hostdb{$k});
	    $tothosts++;
	    if (($vlast - $ulast{$k}) > $duration_to_print) {
		last;
	    }
	}
    }

    #	Prepare time evolution report

    print << "EOL";
    
  Hourly  Bin           Hits/Hour    Hits/Sec   Hosts    New Hosts
EOL
    open(OC, ">timehist.csv");
    for $k (sort keys %timehist) {
    	@lt = localtime($k);
    	printf("%04d-%02d-%02d %02d:00   %12d  %9.2f  %8d  %8d\n",
	    $lt[5] + 1900, $lt[4] + 1, $lt[3], $lt[2], $timehist{$k},
	    $timehist{$k} / 3600, $unique_hosts{$k}, $new_hosts{$k});
	printf(OC "%04d-%02d-%02d %02d:00,%d,%.4f,%d,%d\n",
	    $lt[5] + 1900, $lt[4] + 1, $lt[3], $lt[2], $timehist{$k},
	    $timehist{$k} / 3600, $unique_hosts{$k}, $new_hosts{$k});
    }
    close(OC);
