#!/local/bin/perl
#
# Recursive-descent parser for an RFC 2234 ABNF grammar
# It's probably silly to do this in Perl, but wottehell....
#
use English;
use ParserIO;

inputfile();

if (rulelist() && EoF()) {
    print "OK\n";
    print "Maximum jumpback: $maxback\n";
# Symbol table - check for undefinded rules
    for $name (sort(keys(%used))) {
	if (!$defined{$name}) {
	    print "$name: Used $used{$name} times, but not defined\n";
	}
    }
    for $name (sort(keys(%defined))) {
	if ($used{$name} == 1) {
	    print "$name: Defined but not used\n";
	}
    }
} else {
    print "Not OK\n";
    print "Syntax error: ";
    PrintTrailer();
}

# General: Return 1 for OK, 0 for failure.

sub rulelist {
    my $point;
    my $stack = enter(rulelist);
    $stat = 1;
    while ($stat == 1) {
	$point = mark();
	if (rule()) {
	    # OK - rule eaten
	} else {
	    rollback($point);
	    star_c_wsp();
	    $stat = c_nl();
	}
    }
    leave($stack);
}

sub rule {
    my $stack = enter(rule);
    my $startpos = $ParserIO::pos;
    my $name = rulename();
    # much more elegant, but handles no symbols:
    # my $stat = rulename() && defined_as() && elements() && c_nl();
    if ($name && defined_as()) {
	$definition = elements();
	if ($definition && c_nl()) {
	    ++ $defined{$name};
	    $definition{$name} = $definition;
	    $stat = 1;
	    print "Rule: ", @ParserIO::buffer[$startpos..$ParserIO::pos-1];
	    # note that rules are constrained to end with an NL....
	} else {
	    $stat = 0;
	}
    } else {
	$stat = 0;
    }
    leave($stack);
    return $stat;
}

sub rulename {
    my $stack = enter(rulename);
    my $first = mark();
    ALPHA() || return 0;
    while (ALPHA() || DIGIT() || Character("-")) {}
    # hacky....
    my $rulename = join("", @ParserIO::buffer[$first..$ParserIO::pos-1]);
    #print "Rulename: ", $rulename, "\n";
    ++ $used{$rulename}; # note - not rolled back on parse rollback
    leave($stack);
    return $rulename;
}

sub defined_as {
    my $stack = enter(defined_as);
    star_c_wsp();
    Character("=") || return 0;
    Character("/"); # and ignore error code
    my $status = star_c_wsp();
    leave($stack);
    return $status;
}

sub elements {
    my $stack = enter(elements);
    alternation() || return 0;
    star_c_wsp();
    return 1;
}

sub star_c_wsp {
    my $stack = enter(star_c_wsp);
    while (c_wsp()){};
    leave($stack);
}

sub c_wsp {
    my $stack = enter(c_wsp);
    if (WSP()) { # note: WSP eats nothing on failure, so needs no rollback
	leave($stack);
	return 1;
    } else {
	my $pos = mark();
	if (c_nl() && WSP()) {
	    leave($stack);
	    return 1;
	} else {
	    rollback($pos);
	    leave($stack);
	    return 0;
	}
    }
}

sub c_nl {
    my $stack = enter(c_nl);
    my $pos = mark();
    if (comment()) {
	return 1;
    } else {
	rollback($pos);
	return CRLF();
    }
}

sub comment {
    my $stack = enter(comment);
    Character(";") || return 0;
    while (WSP() || VCHAR()){}
    CRLF() || return 0;
    return 1;
}

sub alternation {
    my $stack = enter(alternation);
    concatenation() || return 0;
    while (star_c_wsp() && Character("/") && star_c_wsp() && concatenation()){}
    return 1;
}

sub concatenation {
    my $stack = enter(concatenation);
    repetition() || return 0;
    while (c_wsp() && star_c_wsp() && repetition()){};
    return 1;
}

sub repetition {
    my $stack = enter(repetition);
    repeat(); # and ignore result - optional
    my $stat = element();
    leave($stack);
    return $stat;
}

sub repeat {
    my $stack = enter(repeat);
    my $pos = mark();
    # First try the *digit * *digit
    while (DIGIT()){};
    if (Character("*")) {
	while (DIGIT()){};
    } else {
	rollback($pos);
	DIGIT() || return 0;
	while (DIGIT()){};
    }
}

sub element {
    my $stack = enter(element);
    my $pos = mark();
    if (rulename()) {
	return 1;
    } elsif (rollback($pos) && group()) {
	return 1;
    } elsif (rollback($pos) && option()) {
	return 1;
    } elsif (rollback($pos) && char_val()) {
	return 1;
    } elsif (rollback($pos) && num_val()) {
	return 1;
    } elsif (rollback($pos) &&  prose_val()) {
	return 1;
    }
}

sub group {
    my $stack = enter(group);
    return Character("(") && star_c_wsp()
	&& alternation() && star_c_wsp && Character(")");
}
sub option {
    my $stack = enter(option);
    return Character("[") && star_c_wsp()
	&& alternation() && star_c_wsp && Character("]");
}
sub char_val {
    my $stack = enter(char_val);
    DQUOTE() || return 0;
    while (CharRange(0x20, 0x21) || CharRange(0x23, 0x7E)) {}
    DQUOTE() || return 0;
}

sub num_val {
    Character("%") || return 0;
    my $pos = mark();
    if (bin_val()) {
	return 1;
    } elsif (rollback($pos) && dec_val()) {
	return 1;
    } elsif (rollback($pos) && hex_val()) {
	return 1;
    } else {
	rollback($pos);
	return 0;
    }
}

sub bin_val {
    Character("b") || return 0;
    BIT() || return 0;
    while (BIT()){}
    opt: {
	my $pos = mark();
	if (Character(".")) {
	    BIT() || (rollback($pos) , last opt);
	    while (BIT()){}
	} elsif (Character("-")) {
	    BIT() || (rollback($pos), last opt);
	    while (BIT()){}
	}
    }
    return 1;
}
sub dec_val {
    Character("d") || return 0;
    DIGIT() || return 0;
    while (DIGIT()){}
    opt: {
	my $pos = mark();
	if (Character(".")) {
	    DIGIT() || (rollback($pos), last opt);
	    while (DIGIT()){}
	} elsif (Character("-")) {
	    DIGIT() || (rollback($pos), last opt);
	    while (DIGIT()){}
	}
    }
    return 1;
}

sub hex_val {
    Character("x") || return 0;
    HEXDIG() || return 0;
    while (HEXDIG()){}
    opt: {
	my $pos = mark();
	if (Character(".")) {
	    HEXDIG() || (rollback($pos), last opt);
	    while (HEXDIG()){}
	} elsif (Character("-")) {
	    HEXDIG() || (rollback($pos), last opt);
	    while (HEXDIG()){}
	}
    }
    return 1;
}

sub prose_val {
    my $pos = mark();
    Character("<") || return 0;
    while (CharRange(0x20, 0x3d) || CharRange(0x3f, 0x7e)) {};
    Character(">") || (rollback($pos), return 0);
}
 
sub ALPHA {
    return (CharRange(0x41, 0x5A) || CharRange(0x61, 0x7A));
}
sub BIT {
    return(Character("0") || Character("1"));
}
sub CHAR {
    return (CharRange(0x01, 0x7f));
}
sub CR {
    return(Character("\n"));
}
sub CRLF {
    #return (CR() && LF());
    return LF(); # bowing to Unix reality....
}
sub CTL {
    return (CharRange(0, 0x1F) || CharRange(0x7f, 0x7f));
}
sub DIGIT {
    return(CharRange(0x30, 0x39));
}
sub DQUOTE {
    return(CharRange(0x22, 0x22));
}
sub HEXDIG {
    return(DIGIT()
	   || Character("A")
	   || Character("B")
	   || Character("C")
	   || Character("D")
	   || Character("E")
	   || Character("F"));
}
sub HTAB {
    return(CharRange(0x09, 0x09));
}
sub LF {
    return(CharRange(0x0A, 0x0A));
}
sub LWSP {
    while (WSP() || (CRLF() && WSP())) {};
}
sub OCTET {
    return(CharRange(0x00, 0xFF));
}
sub SP {
    return(CharRange(0x20, 0x20));
}
sub VCHAR {
    return(CharRange(0x21, 0x7E));
}
sub WSP {
    return(SP() || HTAB());
}
#-----------------------------
# Special-rules
#-----------------------------
sub Character {
    my $Ch = shift;
    
    if (nextchar() eq $Ch) {
	eatchar();
	return 1;
    } else {
	return 0;
    }
}

sub CharRange {
    my ($lower, $upper) = @_;
    my $nc = ord(nextchar());
    if ($nc >= $lower && $nc <= $upper) {
	eatchar();
	return 1;
    } else {
	return 0;
    }
}
