#!/usr/bin/perl

use 5.010;
use strict;
use warnings;
use English qw( -no_match_vars );
use Marpa::UrHTML;
use List::Util qw(sum);
use Fatal qw(open close);
use Getopt::Long;

my $html;
Carp::croak("usage: $0") unless GetOptions( 'html' => \$html );

my $locator = shift;
my $document;
if ($locator =~ /^[a-zA-Z0-9]+[:]/) {
    require WWW::Mechanize;
    my $mech = WWW::Mechanize->new( autocheck => 1 );
    $mech->get( $locator );
    $document = $mech->content;
    undef $mech;
} else {
    local $RS = undef;
    open my $fh, q{<}, $locator;
    $document = <$fh>;
    close $fh;
}

sub calculate_max_depths {
    my ($child_data) = @_;
    my %return_depths = ( ANY => 0 );
    for my $child_value ( grep { ref $_ } map { $_->[0] } @{$child_data} ) {
        my $depths = $child_value->{depths};
        CHILD_TAGNAME: for my $child_tagname ( keys %{$depths} ) {
            my $depth = $depths->{$child_tagname};
            if ( $depth > ( $return_depths{$child_tagname} // 0 ) ) {
                $return_depths{$child_tagname} = $depth;
            }
            if ( $depth > $return_depths{ANY} ) {
                $return_depths{ANY} = $depth;
            }
        } ## end for my $child_tagname ( keys %{$depths} )
    } ## end for my $child_value ( grep { ref $_ } map { $_->[0] }...)
    return \%return_depths;
} ## end sub calculate_max_depths

sub calculate_length {
    my ($child_data) = @_;
    my $length = 0;
    CHILD: for my $child_data ( @{$child_data} ) {
        my ( $value, $literal ) = @{$child_data};
        if ( defined $value ) {
            $length += $value->{length};
            next CHILD;
        }
        $length += ( $literal =~ tr/\t\f \x{200B}//c );
    } ## end for my $child_data ( @{$child_data} )
    return $length;
} ## end sub calculate_length

my ( $instance, $value ) = @{ Marpa::UrHTML->new(
        {   handlers => [
                [ ':COMMENT' => sub { return { depths => {}, length => 0 } }
                ],
                [   q{*} => sub {
                        my $child_data =
                            Marpa::UrHTML::child_data('value,literal');
                        my $tagname = Marpa::UrHTML::tagname();
                        my $length  = calculate_length($child_data);
                        $Marpa::UrHTML::INSTANCE->{count}->{$tagname}++;
                        $Marpa::UrHTML::INSTANCE->{length}->{$tagname}
                            += $length;
                        my $return_depths = calculate_max_depths($child_data);
                        ( $return_depths->{$tagname} //= 0 )++;
                        $return_depths->{ANY}++;
                        return {
                            depths => $return_depths,
                            length => $length,
                        };
                    },
                ],
                [   ':TOP' => sub {
                        my $child_data =
                            Marpa::UrHTML::child_data('value,literal');
                        return [
                            $Marpa::UrHTML::INSTANCE,
                            {   depths => calculate_max_depths($child_data),
                                length => calculate_length($child_data),
                            },
                        ];
                    },
                ],
            ],
        }
        )->parse( \$document )
    };

my $length_by_element = $instance->{length};
my $count_by_element = $instance->{count};
my $html_length = $length_by_element->{html};
my $total_lengths = List::Util::sum values %{ $length_by_element };
my $complexity = sprintf "%.3f", ($total_lengths / ($html_length * log ($html_length)));
my $max_depths = $value->{depths};
my $max_element_depth = $max_depths->{ANY};
delete $max_depths->{ANY};

if ($html) {
print qq{<table cellpadding="3" border="1">}
    . qq{<thead>\n}
    . qq{<tr><th colspan="5">$locator</tr>\n}
    . qq{<tr><th colspan="5">Complexity Score = $complexity</tr>\n}
    . qq{<tr><th colspan="5">Maximum Depth = $max_element_depth</tr>\n}
    . qq{<tr>}
    . qq{<th>Element}
    . qq{<th>Maximum<br>Nesting}
    . qq{<th>Number of<br>Elements}
    . qq{<th>Size in<br>Characters</th>}
    . qq{<th>Average<br>Size</th>}
    . qq{</tr>\n}
    . qq{</thead>\n};
} else {
    say $locator;
    say "Complexity Score = ", $complexity;
    say "Maximum Depth = ", $max_element_depth;
    printf "%11s%11s%11s%11s%11s\n", q{}, 'Maximum ', 'Number of', 'Size in  ', 'Average';
    printf "%11s%11s%11s%11s%11s\n", 'Element ', 'Nesting ', 'Elements ', 'Characters', 'Size  ';
}

for my $element ( sort keys %{$max_depths} ) {
    my $count = $count_by_element->{$element};
    my $size  = $length_by_element->{$element};
    my $average = $count ? int( $size / $count ) : q{-};
    if ($html) {
    print join q{},
        q{<tr>},
        qq{<td>$element</td>},
        q{<td align="right">}, $max_depths->{$element}, q{</td>},
        qq{<td align="right">$count</td>},
        qq{<td align="right">$size</td>},
        qq{<td align="right">$average</td>},
        "</tr>\n";
    } else {
        printf "%-11s%11d%11d%11d%11d\n", $element, $max_depths->{$element}, $count, $size, $average;
    }
} ## end for my $element ( sort keys %{$max_depths} )

$html and print qq{</table>\n};

exit 0;

__END__
