#!/usr/bin/perl

use 5.010;
use strict;
use warnings;
use English qw( -no_match_vars );
use Marpa::UrHTML;
use HTML::Tagset;
use Fatal qw(open close);

my $locator = shift;
my $document;
if ($locator =~ /^[a-zA-Z0-9]+[:]/) {
    require WWW::Mechanize;
    my $mech = WWW::Mechanize->new( autocheck => 1 );
    $mech->get( $locator );
    $document = $mech->content;
    undef $mech;
} else {
    local $RS = undef;
    open my $fh, q{<}, $locator;
    $document = <$fh>;
    close $fh;
}

my $urhtml_args = {
    handlers => [
        [   'pre' => sub {
                my @new_line_data      = ();
                my @following_comments = ();
                CHILD:
                for my $child_data ( @{ Marpa::UrHTML::child_data('value') } )
                {
                    next CHILD if not defined( my $value = $child_data->[0] );
                    for my $line_data ( @{$value} ) {
                        given ( $line_data->[0] ) {    # depending on the type
                            when ( [ 'cruft', 'missing start tag', 'missing end tag' ] ) {
                                push @new_line_data,
                                    [
                                    $_, 0,
                                    'following pre',
                                    @{$line_data}[ 3 .. $#{$line_data} ]
                                    ];
                            } ## end when ( [ 'cruft', 'missing start tag' ] )
                        } ## end given
                    } ## end for my $line_data ( @{$value} )
                } ## end for my $child_data ( @{ Marpa::UrHTML::child_data(...)})
                my $original = Marpa::UrHTML::original();
                push @new_line_data, [ 'line', 0, $original ];
                push @new_line_data, @following_comments;
                return \@new_line_data;
            },
        ],
        [   ':CRUFT' => sub {
                my $literal = Marpa::UrHTML::literal();
                my @new_line_data = ( [ 'cruft', 0, 'following', $literal ] );
                $literal =~ s/^\s+//gxms;
                $literal =~ s/\s+$//gxms;
                $literal =~ s/\s+/ /gxms;
                push @new_line_data, [ 'line', 0, $literal ];
                return \@new_line_data;
            },
        ],
        [   q{*} => sub {
                my $tagname = Marpa::UrHTML::tagname();
                my @new_line_data = ();
                my @child_data = @{ Marpa::UrHTML::child_data('token_type,value,original') };
                my $first_child = $child_data[0];
                my $first_content_child = 0;

                if ( defined $first_child->[0] and $first_child->[0] eq 'S' )
                {
                    push @new_line_data, [ 'line', 0, $first_child->[2] ];
                    $first_content_child = 1;
                }
                else {
                    push @new_line_data, [ 'missing start tag', 0, 'following', $tagname ];
                } ## end else [ if ( defined $first_child->[0] and $first_child->[0]...)]

                my $last_child = $child_data[-1];
                my $last_content_child = $#child_data;
                my $end_tag_child;
                if ( defined $last_child->[0] and $last_child->[0] eq 'E' )
                {
                    $end_tag_child = $last_child;
                    $last_content_child -= 1;
                }

                CHILD:
                for my $child_data_ix ( $first_content_child .. $last_content_child ) {
                    my ( $token_type, $value, $original ) =
                        @{ $child_data[$child_data_ix] };
                    if ( defined $value ) {
                        for my $line_data ( @{$value} ) {
                            my ( $type, $indent, @data ) = @{$line_data};
                            push @new_line_data, [ $type, $indent + 1, @data ];
                        }
                        next CHILD;
                    } ## end if ( defined $value )
                    for my $line ( split /\n/xms, $original ) {
                        $line =~ s/^\s+//gxms;
                        $line =~ s/\s+$//gxms;
                        $line =~ s/\s+/ /gxms;
                        push @new_line_data, [ 'line', 1, $line ];
                    }
                } ## end for my $child_data ( @{ Marpa::UrHTML::child_data(...)})

                given (1) {
                    when ( defined $end_tag_child ) {
                        push @new_line_data,
                            [ 'line', 0, $end_tag_child->[2] ];
                    }
                    when ( not $HTML::Tagset::emptyElement{$tagname} ) {
                        push @new_line_data,
                            [ 'missing end tag', 0, 'preceding', $tagname ];
                    }
                } ## end given

                return \@new_line_data;
            },
        ],
        [   ':TOP' => sub {
                my $result = q{};
                CHILD:
                for my $child_data (
                    @{ Marpa::UrHTML::child_data('value,original') } )
                {
                    my ( $value, $original ) = @{$child_data};
                    if ( defined $value ) {
                        LINE: for my $line_data ( @{$value} ) {
                            my $type = shift @{$line_data};
                            my $indent = shift @{$line_data};
                            my $line_prefix = q{  } x $indent;
                            if ( $type eq 'line' ) {
                                my ( $line ) = @{$line_data};
                                next LINE if $line =~ /^\s*$/;
                                $result .= "$line_prefix$line\n";
                                next LINE;
                            } ## end if ( $type eq 'line' )
                            if ( $type eq 'missing start tag' ) {
                                my ( $location, $tagname ) = @{$line_data};
                                given ($location) {
                                    when ('following') {
                                        $result
                                            .= $line_prefix
                                            . qq{<!-- Following start tag is replacement for a missing one -->\n}
                                            . $line_prefix
                                            . "<$tagname>\n";
                                    } ## end when ('following')
                                    when ('following pre') {
                                        $result .= $line_prefix
                                            . qq{<!-- Inside following <pre>, a start tag is missing: <$tagname> -->\n};
                                    }
                                    default {
                                        Carp::croak(
                                            "Internal error: unprovided-for missing start tag location: $_"
                                        );
                                    }
                                } ## end given
                                next LINE;
                            } ## end if ( $type eq 'missing start tag' )
                            if ( $type eq 'missing end tag' ) {
                                my ( $location, $tagname ) = @{$line_data};
                                given ($location) {
                                    when ('preceding') {
                                        $result
                                            .= "$line_prefix</$tagname>\n"
                                            . $line_prefix
                                            . qq{<!-- Preceding end tag is replacement for a missing one -->\n};
                                    } ## end when ('preceding')
                                    when ('following pre') {
                                        $result .= $line_prefix
                                            . qq{<!-- Inside following <pre>, an end tag is missing: <$tagname> -->\n};
                                    }
                                    default {
                                        Carp::croak(
                                            "Internal error: unprovided-for missing end tag location: $_"
                                        );
                                    }
                                } ## end given
                                next LINE;
                            } ## end if ( $type eq 'missing end tag' )
                            if ( $type eq 'cruft' ) {
                                my ( $location, $cruft ) = @{$line_data};
                                given ($location) {
                                    when ('following') {
                                        $result
                                            .= "$line_prefix<!-- Next line is cruft -->\n";
                                    }
                                    when ('following pre') {

                                        # Make sure the cruft quoted inside
                                        # the HTML comment does not
                                        # disrupt the comment.
                                        ( my $safe_cruft = $cruft )
                                            =~ s/--/- -/xms;
                                        $safe_cruft
                                            =~ s/^/  $line_prefix/gxms;
                                        $result
                                            .= qq{$line_prefix<!-- Inside the following <pre>, there is this cruft:\n}
                                            . qq{$safe_cruft\n}
                                            . qq{$line_prefix-->\n};
                                    } ## end when ('following pre')
                                    default {
                                        Carp::croak(
                                            "Internal error: unprovided-for cruft location: $_"
                                        );
                                    }
                                } ## end given
                                next LINE;
                            } ## end if ( $type eq 'cruft' )
                            Carp::croak(qq{Internal error: unknown line data type: "$type"});
                        } ## end for my $line_data ( @{$value} )
                        next CHILD;
                    } ## end if ( defined $value )
                    LINE: for my $line ( split /\n/xms, $original ) {
                        next LINE if $line =~ /^\s*$/;
                        $line =~ s/^\s+//gxms;
                        $line =~ s/\s+$//gxms;
                        $line =~ s/\s+/ /gxms;
                        $result .= "$line\n";
                    } ## end for my $line ( split /\n/xms, $original )
                } ## end for my $child_data ( @{ Marpa::UrHTML::child_data(...)})
                return $result;
            },
        ],
    ]
};

print Marpa::UrHTML->new($urhtml_args)->parse( \$document );

exit 0;

__END__
