#!/usr/bin/perl -w
#
# freqcount - Run a frequency count on lines or fields.
#             Perl, Unix/Linux/Windows...
#
# 23-Jan-2006, ver 1.25
#
# USAGE:    freqcount [-h] [-d delim] [-f field[,field...]] [filename]
#        
#           -h         # help
#           -f1        # print field 1 only (starts at 1)
#           -d:        # input delimiter (default is whitespace)
#   eg,
#           freqcount /var/tmp/mylog         # freq count of file lines
#           freqcount -f6 /var/adm/sulog     # field 6 only
#           freqcount -d: -f7 /etc/passwd    # field 7 with delimiter ":"
#           cmd | freqcount                  # freq count of command output
#           cmd | freqcount -f5              # field 5 only
#           cmd | freqcount -f3,4            # field 3 and 4
#
# This is especially useful for processing large log files.
#
# COPYRIGHT: Copyright (c) 2006 Brendan Gregg.
#
#  This program is free software; you can redistribute it and/or
#  modify it under the terms of the GNU General Public License
#  as published by the Free Software Foundation; either version 2
#  of the License, or (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License
#  along with this program; if not, write to the Free Software Foundation,
#  Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#
#  (http://www.gnu.org/copyleft/gpl.html)
#
# 08-May-2003    Brendan Gregg     Created this.
# 23-Jan-2006       "       "      Tweaked style.

use strict;
use Getopt::Std;

#
#  Check Arguments
#
getopts('hd:f:') or usage();
my $filename = defined $ARGV[0] ? $ARGV[0] : "";
my $delim    = defined $main::opt_d ? $main::opt_d : "";
my $fieldnum = defined $main::opt_f ? $main::opt_f : "";
usage() if defined $main::opt_h or $filename eq "--help";
usage() unless $fieldnum =~ /^[\d,]*$/;       # check string is ok
my @Fieldnum = split(/,/, $fieldnum);         # field numbers
$_-- foreach @Fieldnum;                       # decrement fields by 1
$main::opt_h = 0;
my @Lines;
my %Hash;

#
#  Read input file
#
if ($filename ne "") {
    ### Read from file
    open IN, $filename or die "ERROR1: Can't open $filename: $!\n";
    push @Lines, $_ while <IN>;
}
else {
    ### Read from STDIN
    while (my $line = <STDIN>) { 
        push @Lines, $line; 
    }
}

#
#  Process fields
#
if ($fieldnum ne "") {
    my (@Fields, @Spaces);

    ### Grab fields
    foreach my $line (@Lines) {
        my $data = "";
        chomp $line;

        # Split fields
        if ($delim eq "") {
            @Fields = split ' ', $line;
            @Spaces = split /\S+/, $line;
        }
        else {
            @Fields = split /$delim/, $line;
        }

        # Rejoin selected fields
        for (my $pos=0; $pos <= $#Fieldnum; $pos++) {
            my $num = $Fieldnum[$pos];
        
            # Add field data
            $data .= $Fields[$num] if defined $Fields[$num];

            # Append delimiter
            if ($pos < $#Fieldnum) {
                if ($delim eq "") {
                    if (defined $Spaces[$num+1] and $Spaces[$num+1] ne "") {
                        # reinsert original spacing,
                        $data .= $Spaces[$num+1];
                    }
                    else {
                        $data .= " ";
                    }
                }
                else {
                    $data .= $delim;
                }
            }
        }
        
        # Store
        $Hash{"$data\n"}++;
    }
}
else {

    ### Grab line
    foreach my $line (@Lines) { 
        $Hash{$line}++; 
    }
}

#
#  Process frequency count
#
foreach my $key (sort {$Hash{$b} <=> $Hash{$a}} keys %Hash) {
    print "$Hash{$key}:$key";
}

#
#  Subroutines
#
sub usage {
    print STDERR <<END;
USAGE:    freqcount [-h] [-d delim] [-f field[,field...]] [filename]

           -h         # help
           -f1        # print field 1 only (starts at 1)
           -d:        # input delimiter (default is whitespace)
   eg,
           freqcount /var/tmp/mylog         # freq count of file lines
           freqcount -f6 /var/adm/sulog     # field 6 only
           freqcount -d: -f7 /etc/passwd    # field 7 with delimiter ":"
           cmd | freqcount                  # freq count of command output
           cmd | freqcount -f5              # field 5 only
           cmd | freqcount -f3,4            # field 3 and 4
END
    exit 1;
}
