grammar Perl:ver<6.0.0.alpha>:auth; =begin things todo add more suppositions and figure out exact error continuation semantics finish out all the {*} #= hookage think about longest-token-defeating {*} that maybe should be add parsing this file to sanity tests :) =end things todo =begin comment overview This file is designed to be either preprocessed into a grammar with action statements or used as-is without any preprocessing. The {*} notation is a no-op action block, but can be identified uniquely via a combination of the preceding token or rule name plus any additional text following a #= comment. We put this into a comment rather than using a macro so that bootstrap compilers don't have to worry about macros yet, and to keep the main grammar relatively uncluttered by action statements. Note that the preprocessor can certainly generate accesses to $/ within the action block, so we need not mention it explicitly. Also, some rules are named by syntactic category plus an additonal symbol specified in adverbial form, either in bare :name form or in :sym form. (It does not matter which form you use for identifier symbols, except that to specify a symbol "sym" you must use the :sym form of adverb.) If you use the rule within the rule, it will parse the symbol at that point. At the final reduction point of a rule, if $ has been set, that is used as the final symbol name for the rule. This need not match the symbol specified as part the rule name; that is just for disambiguating the name. However, if no $ is set, the original symbol will be used by default. Note that rules with only one action need no #= comment, so the identifier of the following stub is just "TOP". =end comment overview token TOP { {*} } # This grammar also assumes transitive longest-token semantics, though # we make a feeble attempt to order rules so a procedural | can usually # produce a correct parse. =begin comment linkage XXX random inconsistent ideas From the viewpoint of the user: complete replacement of grammar till end of file: use only MyGrammar; turns into something like: use MyGrammar :my<$?PARSER>; # XXX how to export this? which ends up looking like something below... use MyGrammar; BEGIN { temp COMPILING::<$?PARSER> := MyGrammar; my $more = m:p/ <$?PARSER::UNIT(/$/)>/; # XXX how to we attach $more to the prior tree? } complete replacement of grammar till end of scope: use MyGrammar; BEGIN { temp COMPILING::<$?PARSER> := MyGrammar; my $more = m:p/ <$?PARSER::UNIT(//)>/; # XXX how to we attach $more to the prior tree? } mutating existing grammar by derivation: # (Presumably normal subroutine defs don't need to mutate the grammar) BEGIN { temp COMPILING::<$?PARSER> := grammar is OUTER::<$?PARSER> { token infix:plus returns Additive { } } my $more = m:p/ <$?PARSER::UNIT(//)>/; # XXX or some such ... } Note that these BEGIN blocks parse the rest of a scope as kind of a "compilation continuation". The temp restores the old parser at the end of the begin block, which is presumably coincident with the end of the user's current scope if the rule ended up in the right spot. Note also that, within the BEGIN block, $_ might be the current program being parsed! These also assume a cooperative subgrammar that knows how to quit on other stoppers than just /$/. In the absence of that we might need to snip out a substring to feed the subgrammar. Of course, this means finding a delimiter that can't occur in the substring, or preparsing the subgrammar somehow to find the right closer, neither of which is exactly optimal. =end comment linkage # The internal precedence levels are *not* part of the public interface. # The current values are mere implmentation; they may change at any time. # Users should specify precedence only in relation to existing levels. constant %term = { :prec }; constant %methodcall = { :prec }; constant %autoincrement = { :prec, :lvalue }; constant %exponentiation = { :prec, :assoc, :assign }; constant %symbolic_unary = { :prec }; constant %multiplicative = { :prec, :assoc, :assign }; constant %additive = { :prec, :assoc, :assign }; constant %junctive_and = { :prec, :assoc, :assign }; constant %junctive_or = { :prec, :assoc, :assign }; constant %named_unary = { :prec, }; constant %nonchaining = { :prec, :assoc }; constant %chaining = { :prec, :assoc, :bool }; constant %tight_and = { :prec, :assoc, :assign }; constant %tight_or = { :prec, :assoc, :assign }; constant %conditional = { :prec, :assoc, }; constant %item_assignment = { :prec, :assoc, :lvalue }; constant %loose_unary = { :prec, }; constant %comma = { :prec, :assoc, }; constant %list_infix = { :prec, :assoc, }; constant %list_prefix = { :prec, }; constant %loose_and = { :prec, :assoc, }; constant %loose_or = { :prec, :assoc, }; constant %LOOSEST = { :prec, }; constant %terminator = { :prec, :assoc }; # "epsilon" tighter than terminator #constant $LOOSEST = %LOOSEST; constant $LOOSEST = "a=!"; # XXX preceding line is busted role PrecOp[*%defaults] { # This is hopefully called on a match to mix in operator info by type. method &.(Match $m) { $m but= ::?CLASS; for %defaults.kv -> $k, $v { $m{$k} //= $v }; %+thisop = $m; if not $m { %+thisop = $m; %+thisop = $m; } return $m; } } class Hyper does PrecOp[:transparent] {} class Term does PrecOp[|%term] {} class Methodcall does PrecOp[|%methodcall] {} class Autoincrement does PrecOp[|%autoincrement] {} class Exponentiation does PrecOp[|%exponentiation] {} class Symbolic_unary does PrecOp[|%symbolic_unary] {} class Multiplicative does PrecOp[|%multiplicative] {} class Additive does PrecOp[|%additive] {} class Junctive_and does PrecOp[|%junctive_and] {} class Junctive_or does PrecOp[|%junctive_or] {} class Named_unary does PrecOp[|%named_unary] {} class Nonchaining does PrecOp[|%nonchaining] {} class Chaining does PrecOp[|%chaining] {} class Tight_and does PrecOp[|%tight_and] {} class Tight_or does PrecOp[|%tight_or] {} class Conditional does PrecOp[|%conditional] {} class Item_assignment does PrecOp[|%item_assignment] {} class Loose_unary does PrecOp[|%loose_unary] {} class Comma does PrecOp[|%comma] {} class List_infix does PrecOp[|%list_infix] {} class List_prefix does PrecOp[|%list_prefix] {} class Loose_and does PrecOp[|%loose_and] {} class Loose_or does PrecOp[|%loose_or] {} class Terminator does PrecOp[|%terminator] {} # Categories are designed to be easily extensible in derived grammars # by merely adding more rules in the same category. The rules within # a given category start with the category name followed by a differentiating # adverbial qualifier to serve (along with the category) as the longer name. # The endsym context, if specified, says what to implicitly check for in each # rule right after the initial . Normally this is used to make sure # there's appropriate whitespace, though Perl 6 also uses it to rule out # the => (fatarrow) construct. Note that endsym isn't called if # isn't called. my $endsym is context = / /; # XXX the only magic we're assuming here is that someone set up $+sym for us. # (well, and endsym, but that's set explicitly in the proto sigs below, # (which theoretically propagate to the sigs of the multis they control...)) multi method sym (Str $pat = $+sym) { m:p/ $pat <$+endsym> /; } multi method sym ($pat = $+sym) { m:p/ <$pat> <$+endsym> /; } proto token category { } token category:category { } token category:sigil { } proto token sigil { } token category:twigil { } proto token twigil { } token category:special_variable { } proto token special_variable { } token category:version { } proto token version { } token category:term { } proto token term { } token category:quote { } proto token quote { } token category:prefix { } proto token prefix is defequiv(%symbolic_unary) { } token category:infix { } proto token infix is defequiv(%additive) { } token category:postfix { } proto token postfix is defequiv(%autoincrement) { } token category:dotty { } proto token dotty (:$endsym is context = / ? /) { } token category:circumfix { } proto token circumfix { } token category:postcircumfix { } proto token postcircumfix { } token category:regex_metachar { } proto token regex_metachar { } token category:regex_backslash { } proto token regex_backslash { } token category:regex_assertion { } proto token regex_assertion { } token category:regex_mod_internal { } proto token regex_mod_internal { } #token category:regex_mod_external { } #proto token regex_mod_external # (:$endsym is context = / /) { } token category:quote_mod { } proto token quote_mod { } token category:q_backslash { } proto token q_backslash { } token category:qq_backslash { } proto token qq_backslash { } token category:trait_verb { } proto token trait_verb (:$endsym is context = / \s+ /) { } token category:trait_auxiliary { } proto token trait_auxiliary (:$endsym is context = / \s+ /) { } token category:type_declarator { } proto token type_declarator (:$endsym is context = / >> /) { } token category:scope_declarator { } proto token scope_declarator (:$endsym is context = / >> /) { } token category:package_declarator { } proto token package_declarator (:$endsym is context = / >> /) { } token category:routine_declarator { } proto token routine_declarator (:$endsym is context = / >> /) { } token category:statement_prefix { } proto rule statement_prefix (:$endsym is context = / >> /) { } token category:statement_control { } proto rule statement_control (:$endsym is context = / \s /) { } token category:statement_mod_cond { } proto rule statement_mod_cond (:$endsym is context = / >> /) { } token category:statement_mod_loop { } proto rule statement_mod_loop (:$endsym is context = / >> /) { } token category:infix_prefix_meta_operator { } proto token infix_prefix_meta_operator { } token category:infix_postfix_meta_operator { } proto token infix_postfix_meta_operator { } token category:postfix_prefix_meta_operator { } proto token postfix_prefix_meta_operator { } token category:prefix_postfix_meta_operator { } proto token prefix_postfix_meta_operator { } token category:prefix_circumfix_meta_operator { } proto token prefix_circumfix_meta_operator { } # Lexical routines # make sure we're not an autoquoted identifier regex nofat { ? '=>' > } token ws { || ::: # must \s+ between words || [ | {*} #= unsp | \v {*} #= vwhite | {*} #= unv ]* {*} #= all } token unsp { \\ [ | \v {*} #= vwhite | {*} #= unv ]* {*} #= all } token unv { | \h+ {*} #= hwhite | ^^ [ | '#' \N* {*} #= line | {*} #= pod ] | '#' [ # assuming defaults to standard set | {*} #= inline | \N* {*} #= end ] } # XXX We need to parse the pod eventually to support $= variables. token pod_comment { ^^ '=' ? [ | begin .*? \n '=' ? 'end' $ \N* \n? {*} #= block | \N* \n? {*} #= misc ] {*} } # Top-level rules method UNIT ($unitstop is context = /$/) { UNIT: do { m:p/ /; } } # Note: we only check for the unitstopper. We don't check for ^ because # we might be embedded in something else. rule comp_unit (:$begin_compunit is context = 1) { [ <$+unitstop> || ] {*} } token pblock { [ '->' ]? } token block { '{' [ '}' || ] [ | \h* ? > {*} #= normal | ? \n > {*} { let $ := 1; } #= endline | {*} { let $ := 1; } #= endlist ] {*} } token regex_block { # perhaps parameterize and combine with block someday '{' [ '}' || ] [ | ? > {*} #= normal | ? \n > {*} { let $ := 1; } #= endline | {*} { let $ := 1; } #= endlist ] {*} } rule statement_list { * {*} } token label { ':' \s [ ) }> as a label> ]? # add label as a pseudo type { COMPILING::{"::$"} = Label.new($) } # XXX need statement ref too? {*} } rule statement {