#! /usr/bin/perl

#                             T o p 4 0
#
#                Show largest files in directory tree
#
#                           by John Walker
#                      http://www.fourmilab.ch/
#
#               This program is in the public domain.

my $Version =           '1.1 -- October 2017';

    use strict;
    use warnings;

#   User option defaults

    my $mount = 1;                    # Restrict search to specified filesystem
    my $threshold = 0;                # Size threshold (bytes) to include file as a candidate
    my $TopN = 40;                    # Number of largest files to display
    my $primate_size = 0;             # Interpret size with scaled units ?
    my $thousands_size = 0;           # Edit size with thousands separator ?

#   Localisation options

    my $Thousands = ',';              # Thousands separator or '' for none
    my $Decimal = '.';                # Decimal character

#   Configuration for the particulars of the system on which we're running

    my $find_blocksize = 512;         # Block size used by -size filter on find command
    my $find_command = "find";        # Command to obtain list of over-threshold files
    my $find_size = '-type f -size +';# Find size filter arguments
    my $find_action = '-print';       # Find action arguments
    my $find_mount = '-mount';        # Find argument to restrict to current file system
    my $expat = '(\d+)\D';            # Pattern to extract percentage used

#   You shouldn't need to change anything after this line.

    my ($opt_f, $opt_h, $opt_n, $opt_s, $opt_t, $opt_u);
    Getopts('fg:hn:s:tu');

    if (defined $opt_f) {             # -f  Follow mount points to other file systems
        $mount = 0;
    }

    if (defined $opt_h) {             # -h  Edit sizes in human-readable form
        $primate_size = 1;
    }

    if (defined $opt_n) {             # -n count  Show count largest files
        $TopN = $opt_n;
    }

    if (defined $opt_s) {             # -s size  Minimum size of files to scan
        $threshold = ScanSize($opt_s);
    }

    if (defined $opt_t) {             # -t  Edit sizes with thousands separator
        $thousands_size = 1;
    }

    if (defined $opt_u) {             # -u  Print how to call information
        print STDERR << "EOF";
Usage: Top40.pl [ options ] rootdir ...
    Options:
        -f                   Follow mount points to other file systems
        -h                   Edit sizes in human-readable form
        -n count             Show count largest files
        -s size              Minimum size of files to scan
        -t                   Edit sizes with thousands separator
        -u                   Print this message
Version $Version
EOF
        exit(0);
    }

    if ($#ARGV < 0 ) {
        $ARGV[0] = '.';
    }

    my $nfiles = 0;
    my (@Files, @Fss, %Fs);

    for (my $i = 0; $i <= $#ARGV; $i++) {
        my $pathname = $ARGV[$i];
        my ($fmt, $cmd, $n, $f, $filesize);

        if ($mount) {
            $fmt = $find_mount;
        }
        $b = $find_size . int($threshold / $find_blocksize);
        $cmd = "$find_command $pathname $fmt $b $find_action |";
        open(DF, "$cmd") || die("Cannot open pipe to $find_command");

        $n = 0;

fs:     while ($f = <DF>) {
            chop($f);

            $filesize = (-s $f);

            if ($nfiles >= $TopN) {
                @Files = sort(bySize @Files);
                if ($Fs{$Files[$nfiles - 1]} < $filesize) {
                    undef($Fs{$Files[$nfiles - 1]});
                    $nfiles--;
                } else {
                    next fs;
                }
            }

            $Fs{$f} = $filesize;
            $Files[$nfiles++] = $f;

        }
        close(DF);
    }

    @Fss = sort(bySize @Files);

    for (my $n = 0; $n <= $#Files; $n++) {
        $a = $Fss[$n];
        $b = $Fs{$a};
        printf("%16s\t%s\n", edSize($b), $a);
    }

#   Sort by size

sub bySize
{
    -($Fs{$a} <=> $Fs{$b});
}

#   Scan a size quantity

sub ScanSize {
    my ($sizespec) = @_;
    my ($num, $suffix, $unit);

    $unit = 1;
    $sizespec =~ m/(\d+)(\w)/;
    $num = $1;
    $suffix = $2;
    $suffix =~ tr/A-Z/a-z/;

    if ($suffix eq 'b') {
        $unit = 512;
    } elsif ($suffix eq 'k') {
        $unit = 1000;
    } elsif ($suffix eq 'm') {
        $unit = 1000000;
    } elsif ($suffix eq 'g') {
        $unit = 1000000000;
    } elsif ($suffix eq 't') {
        $unit = 1000000000000;
    }

    return ($num * $unit);
}

#   Edit a decimal number with commas

sub comma {
    my ($v, $o);

    $v = sprintf("%.0f", $_[0]);
    $o = "";
    if ($Thousands ne '') {
        while (length($v) >= 4) {
            $o = $Thousands . substr($v, -3, 3) . $o;
            $v = substr($v, 0, length($v) - 3);
        }
    }
    return ($v . $o);
}

#   Edit a size quantity into a human-readable form

sub primate {
    my ($u, $ul, $n, $i, $w);

    $n = $_[0];
    $n = sprintf("%.0f", $_[0]);
    $ul = 1;
    for ($i = 0, $u = 1000; $u <= 1000000000000; $u *= 1000, $i++) {
        if ($n < $u) {
            last;
        }
        $ul = $u;
    }
    $w = int($n / $ul);
    if ($w < 10) {
        $w = sprintf("%.1f", $n / $ul);
        $w =~ s/\./$Decimal/;
    }
    $w .= substr('bKMGT', $i, 1);
    return $w;
}

#   Edit size based on display options

sub edSize {
    my ($s);

    $s = $_[0];
    if ($primate_size) {
        $s = primate($s);
    } elsif ($thousands_size) {
        $s = comma($s);
    }
    return $s;
}

# getopts.pl - a better getopt.pl

# Usage:
#      Getopts('a:bc');  # -a takes arg. -b & -c not. Sets opt_* as a
#                        #  side effect.

#   This is equivalent to the Perl core module Getopt::Std's
#   getopts() function.  It was embedded to avoid path lookup
#   problems when Perl is run from cron jobs or by root.

sub Getopts {
    my ($argumentative) = @_;
    my (@args, $first, $rest, $pos);
    my ($errs) = 0;

    @args = split( / */, $argumentative );
    while (@ARGV && ($_ = $ARGV[0]) =~ /^-(.)(.*)/) {
        ($first, $rest) = ($1, $2);
        $pos = index($argumentative, $first);
        if ($pos >= 0) {
            if (($args[$pos + 1]) && ($args[$pos + 1] eq ':')) {
                shift(@ARGV);
                if ($rest eq '') {
                    ++$errs unless @ARGV;
                    $rest = shift(@ARGV);
                }
                eval "\$opt_$first = \$rest;";
            } else {
                eval "\$opt_$first = 1";
                if ($rest eq '') {
                    shift(@ARGV);
                } else {
                    $ARGV[0] = "-$rest";
                }
            }
        } else {
            print(STDERR "Unknown option: $first\n");
            ++$errs;
            if ($rest ne '') {
                $ARGV[0] = "-$rest";
            } else {
                shift(@ARGV);
            }
        }
    }
    return ($errs == 0);
}
