#!/usr/bin/perl -w
#-
# Copyright (c) 2003 Dag-Erling Coïdan Smørgrav
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
#    notice, this list of conditions and the following disclaimer
#    in this position and unchanged.
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions and the following disclaimer in the
#    documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
#    derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
# IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
# $Id$
#

use strict;
use POSIX;
use vars qw($LOGPATH %LOGIN %TREE %DAY %REVISION $CUTOFF $CATMIN);

$LOGPATH = "/home/ncvs/CVSROOT/commitlogs";
$CUTOFF = 0;
$CATMIN = 1;

sub register($$$$$$) {
    my $login = shift;
    my $time = shift;
    my $file = shift;
    my $revision = shift;
    my $plus = shift;
    my $minus = shift;

    if (exists($REVISION{$file}->{$revision})) {
	# Duplicate
	return;
    }
    $REVISION{$file}->{$revision} = $login;

    my $tree = \%TREE;

    $LOGIN{$login}->{$file}++;
    $LOGIN{$login}->{' total'}++;
    $DAY{$time}->{' total'}++;
    if ($file =~ m|^src/sys|) {
	$DAY{$time}->{'sys'}++;
    } elsif ($file =~ m|^([^/]+)/|) {
	$DAY{$time}->{$1}++;
    }
    while ($file =~ s|^([^/]+)/?||) {
	$tree->{" $login"}++;
	$tree->{' total'}++;
	if (!exists($tree->{$1})) {
	    $tree->{$1} = {};
	}
	$tree = $tree->{$1};
    }
    $tree->{" $login"}++;
    $tree->{' total'}++;
}

sub scan_log($) {
    my $fn = shift;

    local *FILE;

    if ($fn =~ /gz$/) {
	open(FILE, "/usr/bin/gunzip -cd $fn|")
	    or die("$fn: $!\n");
    } else {
	open(FILE, "<$fn")
	    or die("$fn: $!\n");
    }
  COMMIT:
    while (<FILE>) {
	next unless (m|^(\w+)\s+(\d\d\d\d)/(\d\d)/(\d\d)|);
	my ($login, $yy, $mm, $dd) = ($1, $2, $3, $4);
	#my $time = mktime(0, 0, 0, $dd, $mm - 1, $yy - 1900);
	while (<FILE>) {
	    last if m/^\s+Revision\s+Changes\s+Path/;
	    if (m/^\s+\S+ - Imported sources$/ ||
		m/^\s+Modified files:.*Branch:/) {
		# Skip vendor imports
		next COMMIT;
	    }
	}
	while (<FILE>) {
	    last unless m/([\d\.]+)\s+\+(\d+)\s+-(\d+)\s+(\S+)/;
	    register($login, "$yy-$mm-$dd", $4, $1, $2, $3);
	}
    }
    close(FILE);
}

sub scan_logs(@) {
    my @files = @_;

    foreach my $fn (@files) {
	print(STDERR "$fn        \r");
	scan_log($fn);
    }
    print(STDERR "\n");
}

sub show_stats() {

    my @days;

    @days = sort(keys(%DAY));
    print("// ", $TREE{" total"}, " commits registered between $days[0] and $days[-1]\n");
    print("// ", scalar(keys(%LOGIN)), " active committers\n");
}

sub dump_days() {

    my %totals;

    printf("// date     total    sys    src  ports    doc    www\n");
    foreach my $day (sort(keys(%DAY))) {
	foreach my $key (keys(%{$DAY{$day}})) {
	    $totals{$key} += $DAY{$day}->{$key};
	}
	printf("%-10.10s %6d %6d %6d %6d %6d %6d\n",
	       $day,
	       $DAY{$day}->{' total'}	|| 0,
	       $DAY{$day}->{'sys'}	|| 0,
	       $DAY{$day}->{'src'}	|| 0,
	       $DAY{$day}->{'ports'}	|| 0,
	       $DAY{$day}->{'doc'}	|| 0,
	       $DAY{$day}->{'www'}	|| 0);
    }
    printf("%-10.10s %6d %6d %6d %6d %6d %6d\n",
	   "// total",
	   $totals{' total'}	|| 0,
	   $totals{'sys'}	|| 0,
	   $totals{'src'}	|| 0,
	   $totals{'ports'}	|| 0,
	   $totals{'doc'}	|| 0,
	   $totals{'www'}	|| 0);
}

sub dump_ploticus() {

    printf("
#proc getdata
#intrailer

#set unittype=datetime

#proc areadef
  title: FreeBSD commits
  rectangle: 1 1 4 4
  xscaletype: date yyyy-mm-dd
  xautorange: datafield=1 nearest=month
  yautorange: datafield=2 lowfix=0 nearest=20
  autowidth: 0.5
  autoheight: 0.5
  xaxis.stubs: incremental 1 month
  xaxis.stubformat: MMM
  xaxis.autoyears: yyyy
  yaxis.stubs: incremental 100
  yaxis.tics: yes

#proc bars
  outline: no
  barwidth: 0.02
  locfield: 1
  lenfield: 3
  legendlabel: sys
  color: red
  stackfields: *
  #saveas bar

#proc bars
  #clone: bar
  lenfield: 4
  legendlabel: src
  color: blue

#proc bars
  #clone: bar
  lenfield: 5
  legendlabel: ports
  color: green

#proc trailer
data:
");
}

sub dump_gnuplot() {

}

sub show_tree($$$$);
sub show_tree($$$$) {
    my $name = shift;
    my $tree = shift;
    my $prefix = shift;
    my $depth = shift;

    my @subtrees;

    return if ($depth == 0);
    $depth--;
    if (!ref($tree)) {
	printf("$prefix $name (%.2f%%)\n", ($tree * 100.0 / $TREE{' total'}));
	return;
    }
    foreach my $subtree (sort(keys(%{$tree}))) {
	next if ($subtree =~ m/^ /);
	push(@subtrees, $subtree);
    }
    printf("$prefix%s $name (%.2f%%)\n",
	   (@subtrees && $depth ? '+' : ''),
	   ($tree->{' total'} * 100.0 / $TREE{' total'}));
    $prefix =~ s/\+--/\|  /;
    $prefix =~ s/\\--/   /;
    while (my $subtree = shift(@subtrees)) {
	show_tree($subtree, $tree->{$subtree}, $prefix .
		  (@subtrees ? "+--" : "\\--"), $depth);
    }
}

sub categorize() {

    my $login;
    my ($doc, $ports, $www, $src, $sys, $user, $total);
    my $cat;

    printf("%-10.10s %-6.6s %6s %6s %6s %6s %6s %6s\n",
	   'login', 'CAT', 'total', 'user', 'sys', 'doc', 'ports', 'www');
    foreach my $login (sort(keys(%LOGIN))) {
	$doc = $TREE{'doc'}->{" $login"} || 0;
	$ports = $TREE{'ports'}->{" $login"} || 0;
	$www = $TREE{'www'}->{" $login"} || 0;
	$src = $TREE{'src'}->{" $login"} || 0;
	$sys = $TREE{'src'}->{'sys'}->{" $login"} || 0;
	$user = $src - $sys;
	$total = $LOGIN{$login}->{' total'} || 0;

	next unless $total >= $CATMIN;

	# Principal category
	if ($doc > $total / 2) {
	    $cat = 'D';
	} elsif ($ports > $total / 2) {
	    $cat = 'P';
	} elsif ($www > $total / 2) {
	    $cat = 'W';
	} elsif ($sys > $total / 2) {
	    $cat = 'Y';
	} elsif ($user > $total / 2) {
	    $cat = 'U';
	} else {
	    $cat = '';
	}

	# Secondary categories
	if ($doc >= $CATMIN && $doc <= $total / 2) {
	    $cat .= 'd';
	}
	if ($ports >= $CATMIN && $ports <= $total / 2) {
	    $cat .= 'p';
	}
	if ($www >= $CATMIN && $www <= $total / 2) {
	    $cat .= 'w';
	}
	if ($sys >= $CATMIN && $sys <= $total / 2) {
	    $cat .= 'y';
	}
	if ($user >= $CATMIN && $user <= $total / 2) {
	    $cat .= 'u';
	}

	printf("%-10.10s %-6.6s %6d %6d %6d %6d %6d %6d\n",
	       $login, $cat, $total, $user, $sys, $doc, $ports, $www);
    }
}

MAIN:{
    scan_logs(@ARGV);
    show_stats();
    #show_tree(".", \%TREE, " ", 3);
    #dump_ploticus();
    #dump_days();

    $| = 1;
  COMMAND:
    for (;;) {
	print("> ");
	last unless defined($_ = <STDIN>);
	chomp;
	s/^\s*//;
	s/\s*$//;
	last if (m/^q(uit)?$/i);
	if (m/^cat(?:egorize)?$/) {
	    categorize();
	} elsif (m/^set\s+cutoff\s+(\d+(?:\.\d+)?)%?$/) {
	    $CUTOFF = $1;
	} elsif (m/^sho(?:w)?\s+cutoff$/) {
	    printf("%6.2f%%\n", $CUTOFF);
	} elsif (m/^set\s+catmin\s+(\d+(?:\.\d+)?)$/) {
	    $CATMIN = $1;
	} elsif (m/^sho(?:w)?\s+catmin$/) {
	    printf("%d\n", $CATMIN);
	} elsif (m/^sho(?:w)?\s+com(?:mit(?:ters)?)?\s+for\s+(\S+)$/) {
	    my $file = $1;
	    my $tree = \%TREE;
	    while ($file =~ s|^([^/]+)/?||) {
		if (!defined($tree = $tree->{$1})) {
		    print("No data.\n");
		    next COMMAND;
		}
	    }
	    my $others;
	    foreach my $login (sort({$tree->{$b} <=> $tree->{$a}} keys(%{$tree}))) {
		next if $login eq ' total' || $login !~ m/^ /;
		if (($tree->{$login} * 100) / $tree->{' total'} < $CUTOFF) {
		    $others += $tree->{$login};
		    next;
		}
		printf("%12.12s %6d %6.2f%%\n",
		       $login, $tree->{$login},
		       ($tree->{$login} * 100.0) / $tree->{' total'});
	    }
	    if ($others) {
		printf("%12.12s %6d %6.2f%%\n", 'others',
		       $others, ($others * 100.0) / $tree->{' total'});
	    }
	} elsif (m/^sho(?:w)?\s+com(?:mit(?:s)?)?\s+by\s+(\S+)(?:\s+in\s+(\S+))?$/) {
	    my $login = $LOGIN{$1};
	    my $restrict = $2;
	    foreach my $file (sort({$login->{$b} <=> $login->{$a}} keys(%{$login}))) {
		next if $file eq ' total';
		next if $restrict && substr($file, 0, length($restrict)) ne $restrict;
		printf("%12d %s\n", $login->{$file}, $file);
	    }
	} else {
	    print("Syntax error.\n");
	}
    }
}
