This commit is contained in:
Frédéric Bour 2021-05-26 12:35:09 +09:00
parent e956fce0c6
commit 7e518ea261
8 changed files with 471 additions and 225 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
_build

View File

@ -1,2 +1,7 @@
TARGETS=stat_lexer.exe stat_parser.exe
all:
dune build menhir_stat.exe
dune build $(TARGETS)
$(TARGETS):
dune build $@

25
dune
View File

@ -1,9 +1,26 @@
(executable
(name stat_parser)
(modules stat_parser)
(libraries menhirLib menhirSdk menhir_stat raw_parser))
(executable
(name stat_lexer)
(modules stat_lexer raw_lexer)
(libraries raw_parser))
(library
(name menhir_stat)
(modules menhir_stat)
(libraries menhirLib menhirSdk))
(executable
(name ocamllexer)
(modules ocamllexer)
(libraries compilerlibs))
(library
(name raw_parser)
(flags :standard -w -26-27)
(modules raw_parser)
(libraries menhirLib))
(menhir
(modules raw_parser)
(flags --table --inspection))
(ocamllex raw_lexer)

View File

@ -1 +1,2 @@
(lang dune 2.8)
(using menhir 2.1)

View File

@ -35,10 +35,6 @@ struct
| (env', f') :: _ when M.equal env env' -> f', stack
| _ :: tl -> pop_until tl env
let top_frame = function
| (_, f) :: _ -> f
| _ -> assert false
let reduce_to state stack prod env =
match M.pop env with
| Some env' ->

View File

@ -19,19 +19,30 @@
open Lexing
open Raw_parser
type location = position * position
let location_error ~loc fmt =
let start, stop = loc in
Format.eprintf "Error at %d:%d-%d:%d: "
start.pos_lnum (start.pos_cnum - start.pos_bol)
stop.pos_lnum (stop.pos_cnum - stop.pos_bol);
Format.eprintf fmt
type error =
| Illegal_character of char
| Illegal_escape of string * string option
| Reserved_sequence of string * string option
| Unterminated_comment of Location.t
| Unterminated_comment of location
| Unterminated_string
| Unterminated_string_in_comment of Location.t * Location.t
| Unterminated_string_in_comment of location * location
| Keyword_as_label of string
| Invalid_literal of string
| Invalid_directive of string * string option
;;
exception Error of error * Location.t;;
exception Error of error * location
let location_curr lexbuf = (lexbuf.lex_start_p, lexbuf.lex_curr_p)
(* The table of keywords *)
@ -89,13 +100,13 @@ let keyword_table =
"while", WHILE;
"with", WITH;
"lor", INFIXOP3("lor"); (* Should be INFIXOP2 *)
"lxor", INFIXOP3("lxor"); (* Should be INFIXOP2 *)
"mod", INFIXOP3("mod");
"land", INFIXOP3("land");
"lsl", INFIXOP4("lsl");
"lsr", INFIXOP4("lsr");
"asr", INFIXOP4("asr")
"lor", INFIXOP3; (* Should be INFIXOP2 *)
"lxor", INFIXOP3; (* Should be INFIXOP2 *)
"mod", INFIXOP3;
"land", INFIXOP3;
"lsl", INFIXOP4;
"lsr", INFIXOP4;
"asr", INFIXOP4
];
table
@ -111,7 +122,7 @@ let store_string s = Buffer.add_string string_buffer s
let store_lexeme lexbuf = store_string (Lexing.lexeme lexbuf)
(* To store the position of the beginning of a string and comment *)
let string_start_loc = ref Location.none;;
let string_start_loc = ref (dummy_pos, dummy_pos);;
let comment_start_loc = ref [];;
let in_comment () = !comment_start_loc <> [];;
let is_in_string = ref false
@ -126,16 +137,16 @@ let store_escaped_uchar lexbuf u =
if in_comment () then store_lexeme lexbuf else store_string_utf_8_uchar u
let with_comment_buffer comment lexbuf =
let start_loc = Location.curr lexbuf in
let start_loc = location_curr lexbuf in
comment_start_loc := [start_loc];
reset_string_buffer ();
let end_loc = comment lexbuf in
let s = get_stored_string () in
reset_string_buffer ();
let loc = { start_loc with Location.loc_end = end_loc.Location.loc_end } in
let loc = (fst start_loc, snd end_loc) in
s, loc
let error lexbuf e = raise (Error(e, Location.curr lexbuf))
let error lexbuf e = raise (Error(e, location_curr lexbuf))
let error_loc loc e = raise (Error(e, loc))
(* to translate escape sequences *)
@ -165,7 +176,7 @@ let char_for_backslash = function
let illegal_escape lexbuf reason =
let error = Illegal_escape (Lexing.lexeme lexbuf, Some reason) in
raise (Error (error, Location.curr lexbuf))
raise (Error (error, location_curr lexbuf))
let char_for_decimal_code lexbuf i =
let c = num_value lexbuf ~base:10 ~first:i ~last:(i+2) in
@ -209,9 +220,6 @@ let uchar_for_uchar_escape lexbuf =
let is_keyword name = Hashtbl.mem keyword_table name
let check_label_name lexbuf name =
if is_keyword name then error lexbuf (Keyword_as_label name)
(* Update the current location with file name and line number. *)
let update_loc lexbuf file line absolute chars =
@ -233,59 +241,39 @@ let escaped_newlines = ref false
(* Warn about Latin-1 characters used in idents *)
let warn_latin1 lexbuf = ignore lexbuf
(*Location.deprecated
(Location.curr lexbuf)
"ISO-Latin1 characters in identifiers"*)
let handle_docstrings = ref true
let comment_list = ref []
let add_comment com =
comment_list := com :: !comment_list
let add_docstring_comment ds =
let com =
("*" ^ Docstrings.docstring_body ds, Docstrings.docstring_loc ds)
in
add_comment com
let comments () = List.rev !comment_list
(* Error report *)
open Format
let prepare_error loc = function
let print_error loc = function
| Illegal_character c ->
Location.errorf ~loc "Illegal character (%s)" (Char.escaped c)
location_error ~loc "Illegal character (%s)" (Char.escaped c)
| Illegal_escape (s, explanation) ->
Location.errorf ~loc
location_error ~loc
"Illegal backslash escape in string or character (%s)%t" s
(fun ppf -> match explanation with
| None -> ()
| Some expl -> fprintf ppf ": %s" expl)
| Reserved_sequence (s, explanation) ->
Location.errorf ~loc
location_error ~loc
"Reserved character sequence: %s%t" s
(fun ppf -> match explanation with
| None -> ()
| Some expl -> fprintf ppf " %s" expl)
| Unterminated_comment _ ->
Location.errorf ~loc "Comment not terminated"
location_error ~loc "Comment not terminated"
| Unterminated_string ->
Location.errorf ~loc "String literal not terminated"
| Unterminated_string_in_comment (_, literal_loc) ->
Location.errorf ~loc
location_error ~loc "String literal not terminated"
| Unterminated_string_in_comment (_, _) ->
location_error ~loc
"This comment contains an unterminated string literal"
~sub:[Location.msg ~loc:literal_loc "String literal begins here"]
| Keyword_as_label kwd ->
Location.errorf ~loc
location_error ~loc
"`%s' is a keyword, it cannot be used as label name" kwd
| Invalid_literal s ->
Location.errorf ~loc "Invalid literal %s" s
location_error ~loc "Invalid literal %s" s
| Invalid_directive (dir, explanation) ->
Location.errorf ~loc "Invalid lexer directive %S%t" dir
location_error ~loc "Invalid lexer directive %S%t" dir
(fun ppf -> match explanation with
| None -> ()
| Some expl -> fprintf ppf ": %s" expl)
@ -348,102 +336,90 @@ rule token = parse
| ".~"
{ error lexbuf
(Reserved_sequence (".~", Some "is reserved for use in MetaOCaml")) }
| "~" (lowercase identchar * as name) ':'
{ check_label_name lexbuf name;
LABEL name }
| "~" (lowercase_latin1 identchar_latin1 * as name) ':'
{ warn_latin1 lexbuf;
LABEL name }
| "~" (lowercase identchar *) ':'
{ LABEL }
| "~" (lowercase_latin1 identchar_latin1 *) ':'
{ LABEL }
| "?"
{ QUESTION }
| "?" (lowercase identchar * as name) ':'
{ check_label_name lexbuf name;
OPTLABEL name }
| "?" (lowercase_latin1 identchar_latin1 * as name) ':'
{ warn_latin1 lexbuf;
OPTLABEL name }
| "?" (lowercase identchar *) ':'
{ OPTLABEL }
| "?" (lowercase_latin1 identchar_latin1 *) ':'
{ OPTLABEL }
| lowercase identchar * as name
{ try Hashtbl.find keyword_table name
with Not_found -> LIDENT name }
| lowercase_latin1 identchar_latin1 * as name
{ warn_latin1 lexbuf; LIDENT name }
| uppercase identchar * as name
{ UIDENT name } (* No capitalized keywords *)
| uppercase_latin1 identchar_latin1 * as name
{ warn_latin1 lexbuf; UIDENT name }
| int_literal as lit { INT (lit, None) }
| (int_literal as lit) (literal_modifier as modif)
{ INT (lit, Some modif) }
| float_literal | hex_float_literal as lit
{ FLOAT (lit, None) }
| (float_literal | hex_float_literal as lit) (literal_modifier as modif)
{ FLOAT (lit, Some modif) }
with Not_found -> LIDENT }
| lowercase_latin1 identchar_latin1 *
{ LIDENT }
| uppercase identchar *
{ UIDENT } (* No capitalized keywords *)
| uppercase_latin1 identchar_latin1 *
{ UIDENT }
| int_literal { INT }
| int_literal literal_modifier { INT }
| float_literal | hex_float_literal
{ FLOAT }
| (float_literal | hex_float_literal) literal_modifier
{ FLOAT }
| (float_literal | hex_float_literal | int_literal) identchar+ as invalid
{ error lexbuf (Invalid_literal invalid) }
| "\""
{ reset_string_buffer();
is_in_string := true;
let string_start = lexbuf.lex_start_p in
string_start_loc := Location.curr lexbuf;
string_start_loc := location_curr lexbuf;
string lexbuf;
is_in_string := false;
lexbuf.lex_start_p <- string_start;
STRING (get_stored_string(), None) }
STRING }
| "{" (lowercase* as delim) "|"
{ reset_string_buffer();
is_in_string := true;
let string_start = lexbuf.lex_start_p in
string_start_loc := Location.curr lexbuf;
string_start_loc := location_curr lexbuf;
quoted_string delim lexbuf;
is_in_string := false;
lexbuf.lex_start_p <- string_start;
STRING (get_stored_string(), Some delim) }
STRING }
| "\'" newline "\'"
{ update_loc lexbuf None 1 false 1;
(* newline is ('\013'* '\010') *)
CHAR '\n' }
| "\'" ([^ '\\' '\'' '\010' '\013'] as c) "\'"
{ CHAR c }
| "\'\\" (['\\' '\'' '\"' 'n' 't' 'b' 'r' ' '] as c) "\'"
{ CHAR (char_for_backslash c) }
CHAR }
| "\'" [^ '\\' '\'' '\010' '\013'] "\'"
{ CHAR }
| "\'\\" ['\\' '\'' '\"' 'n' 't' 'b' 'r' ' '] "\'"
{ CHAR }
| "\'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "\'"
{ CHAR(char_for_decimal_code lexbuf 2) }
{ CHAR }
| "\'\\" 'o' ['0'-'7'] ['0'-'7'] ['0'-'7'] "\'"
{ CHAR(char_for_octal_code lexbuf 3) }
{ CHAR }
| "\'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "\'"
{ CHAR(char_for_hexadecimal_code lexbuf 3) }
{ CHAR }
| "\'" ("\\" _ as esc)
{ error lexbuf (Illegal_escape (esc, None)) }
| "(*"
{ let s, loc = with_comment_buffer comment lexbuf in
COMMENT (s, loc) }
{ let _ = with_comment_buffer comment lexbuf in
COMMENT }
| "(**"
{ let s, loc = with_comment_buffer comment lexbuf in
if !handle_docstrings then
DOCSTRING (Docstrings.docstring s loc)
else
COMMENT ("*" ^ s, loc)
{ let _ = with_comment_buffer comment lexbuf in
COMMENT
}
| "(**" (('*'+) as stars)
{ let s, loc =
{ let _ =
with_comment_buffer
(fun lexbuf ->
store_string ("*" ^ stars);
comment lexbuf)
lexbuf
in
COMMENT (s, loc) }
COMMENT }
| "(*)"
{ (*if !print_warnings then
Location.prerr_warning (Location.curr lexbuf) Warnings.Comment_start;*)
let s, loc = with_comment_buffer comment lexbuf in
COMMENT (s, loc) }
| "(*" (('*'*) as stars) "*)"
{ if !handle_docstrings && stars="" then
(* (**) is an empty docstring *)
DOCSTRING(Docstrings.docstring "" (Location.curr lexbuf))
else
COMMENT (stars, Location.curr lexbuf) }
let _ = with_comment_buffer comment lexbuf in
COMMENT }
| "(*" ('*'*) "*)"
{ COMMENT }
| "*)"
{ (*let loc = Location.curr lexbuf in
Location.prerr_warning loc Warnings.Comment_not_end;*)
@ -469,7 +445,7 @@ rule token = parse
| "->" { MINUSGREATER }
| "." { DOT }
| ".." { DOTDOT }
| "." (dotsymbolchar symbolchar* as op) { DOTOP op }
| "." (dotsymbolchar symbolchar*) { DOTOP }
| ":" { COLON }
| "::" { COLONCOLON }
| ":=" { COLONEQUAL }
@ -499,34 +475,34 @@ rule token = parse
| "[%" { LBRACKETPERCENT }
| "[%%" { LBRACKETPERCENTPERCENT }
| "!" { BANG }
| "!=" { INFIXOP0 "!=" }
| "!=" { INFIXOP0 }
| "+" { PLUS }
| "+." { PLUSDOT }
| "+=" { PLUSEQ }
| "-" { MINUS }
| "-." { MINUSDOT }
| "!" symbolchar + as op
{ PREFIXOP op }
| ['~' '?'] symbolchar + as op
{ PREFIXOP op }
| ['=' '<' '>' '|' '&' '$'] symbolchar * as op
{ INFIXOP0 op }
| ['@' '^'] symbolchar * as op
{ INFIXOP1 op }
| ['+' '-'] symbolchar * as op
{ INFIXOP2 op }
| "**" symbolchar * as op
{ INFIXOP4 op }
| "!" symbolchar +
{ PREFIXOP }
| ['~' '?'] symbolchar +
{ PREFIXOP }
| ['=' '<' '>' '|' '&' '$'] symbolchar *
{ INFIXOP0 }
| ['@' '^'] symbolchar *
{ INFIXOP1 }
| ['+' '-'] symbolchar *
{ INFIXOP2 }
| "**" symbolchar *
{ INFIXOP4 }
| '%' { PERCENT }
| ['*' '/' '%'] symbolchar * as op
{ INFIXOP3 op }
| '#' (symbolchar | '#') + as op
{ HASHOP op }
| "let" kwdopchar dotsymbolchar * as op
{ LETOP op }
| "and" kwdopchar dotsymbolchar * as op
{ ANDOP op }
| ['*' '/' '%'] symbolchar *
{ INFIXOP3 }
| '#' (symbolchar | '#') +
{ HASHOP }
| "let" kwdopchar dotsymbolchar *
{ LETOP }
| "and" kwdopchar dotsymbolchar *
{ ANDOP }
| eof { EOF }
| (_ as illegal_char)
{ error lexbuf (Illegal_character illegal_char) }
@ -550,21 +526,21 @@ and directive = parse
}
and comment = parse
"(*"
{ comment_start_loc := (Location.curr lexbuf) :: !comment_start_loc;
{ comment_start_loc := (location_curr lexbuf) :: !comment_start_loc;
store_lexeme lexbuf;
comment lexbuf
}
| "*)"
{ match !comment_start_loc with
| [] -> assert false
| [_] -> comment_start_loc := []; Location.curr lexbuf
| [_] -> comment_start_loc := []; location_curr lexbuf
| _ :: l -> comment_start_loc := l;
store_lexeme lexbuf;
comment lexbuf
}
| "\""
{
string_start_loc := Location.curr lexbuf;
string_start_loc := location_curr lexbuf;
store_string_char '\"';
is_in_string := true;
begin try string lexbuf
@ -581,7 +557,7 @@ and comment = parse
comment lexbuf }
| "{" (lowercase* as delim) "|"
{
string_start_loc := Location.curr lexbuf;
string_start_loc := location_curr lexbuf;
store_lexeme lexbuf;
is_in_string := true;
begin try quoted_string delim lexbuf
@ -714,99 +690,18 @@ and skip_hash_bang = parse
| None -> token lexbuf
| Some (_init, preprocess) -> preprocess token lexbuf
type newline_state =
| NoLine (* There have been no blank lines yet. *)
| NewLine
(* There have been no blank lines, and the previous
token was a newline. *)
| BlankLine (* There have been blank lines. *)
type doc_state =
| Initial (* There have been no docstrings yet *)
| After of docstring list
(* There have been docstrings, none of which were
preceded by a blank line *)
| Before of docstring list * docstring list * docstring list
(* There have been docstrings, some of which were
preceded by a blank line *)
and docstring = Docstrings.docstring
let token lexbuf =
let post_pos = lexeme_end_p lexbuf in
let attach lines docs pre_pos =
let open Docstrings in
match docs, lines with
| Initial, _ -> ()
| After a, (NoLine | NewLine) ->
set_post_docstrings post_pos (List.rev a);
set_pre_docstrings pre_pos a;
| After a, BlankLine ->
set_post_docstrings post_pos (List.rev a);
set_pre_extra_docstrings pre_pos (List.rev a)
| Before(a, f, b), (NoLine | NewLine) ->
set_post_docstrings post_pos (List.rev a);
set_post_extra_docstrings post_pos
(List.rev_append f (List.rev b));
set_floating_docstrings pre_pos (List.rev f);
set_pre_extra_docstrings pre_pos (List.rev a);
set_pre_docstrings pre_pos b
| Before(a, f, b), BlankLine ->
set_post_docstrings post_pos (List.rev a);
set_post_extra_docstrings post_pos
(List.rev_append f (List.rev b));
set_floating_docstrings pre_pos
(List.rev_append f (List.rev b));
set_pre_extra_docstrings pre_pos (List.rev a)
in
let rec loop lines docs lexbuf =
let rec loop lexbuf =
match token_with_comments lexbuf with
| COMMENT (s, loc) ->
add_comment (s, loc);
let lines' =
match lines with
| NoLine -> NoLine
| NewLine -> NoLine
| BlankLine -> BlankLine
in
loop lines' docs lexbuf
| EOL ->
let lines' =
match lines with
| NoLine -> NewLine
| NewLine -> BlankLine
| BlankLine -> BlankLine
in
loop lines' docs lexbuf
| DOCSTRING doc ->
Docstrings.register doc;
add_docstring_comment doc;
let docs' =
if Docstrings.docstring_body doc = "/*" then
match docs with
| Initial -> Before([], [doc], [])
| After a -> Before (a, [doc], [])
| Before(a, f, b) -> Before(a, doc :: b @ f, [])
else
match docs, lines with
| Initial, (NoLine | NewLine) -> After [doc]
| Initial, BlankLine -> Before([], [], [doc])
| After a, (NoLine | NewLine) -> After (doc :: a)
| After a, BlankLine -> Before (a, [], [doc])
| Before(a, f, b), (NoLine | NewLine) -> Before(a, f, doc :: b)
| Before(a, f, b), BlankLine -> Before(a, b @ f, [doc])
in
loop NoLine docs' lexbuf
| tok ->
attach lines docs (lexeme_start_p lexbuf);
tok
| COMMENT -> loop lexbuf
| EOL -> loop lexbuf
| tok -> tok
in
loop NoLine Initial lexbuf
loop lexbuf
let init () =
is_in_string := false;
comment_start_loc := [];
comment_list := [];
match !preprocessor with
| None -> ()
| Some (init, _preprocess) -> init ()

154
stat_lexer.ml Normal file
View File

@ -0,0 +1,154 @@
let string_of_token : Raw_parser.token -> _ = function
| AMPERAMPER -> "AMPERAMPER"
| AMPERSAND -> "AMPERSAND"
| AND -> "AND"
| ANDOP -> "ANDOP"
| AS -> "AS"
| ASSERT -> "ASSERT"
| BACKQUOTE -> "BACKQUOTE"
| BANG -> "BANG"
| BAR -> "BAR"
| BARBAR -> "BARBAR"
| BARRBRACKET -> "BARRBRACKET"
| BEGIN -> "BEGIN"
| CHAR -> "CHAR"
| CLASS -> "CLASS"
| COLON -> "COLON"
| COLONCOLON -> "COLONCOLON"
| COLONEQUAL -> "COLONEQUAL"
| COLONGREATER -> "COLONGREATER"
| COMMA -> "COMMA"
| COMMENT -> "COMMENT"
| CONSTRAINT -> "CONSTRAINT"
| DO -> "DO"
| DOCSTRING -> "DOCSTRING"
| DONE -> "DONE"
| DOT -> "DOT"
| DOTDOT -> "DOTDOT"
| DOTOP -> "DOTOP"
| DOWNTO -> "DOWNTO"
| ELSE -> "ELSE"
| END -> "END"
| EOF -> "EOF"
| EOL -> "EOL"
| EQUAL -> "EQUAL"
| EXCEPTION -> "EXCEPTION"
| EXTERNAL -> "EXTERNAL"
| FALSE -> "FALSE"
| FLOAT -> "FLOAT"
| FOR -> "FOR"
| FUN -> "FUN"
| FUNCTION -> "FUNCTION"
| FUNCTOR -> "FUNCTOR"
| GREATER -> "GREATER"
| GREATERRBRACE-> "GREATERRBRACE"
| GREATERRBRACKET -> "GREATERRBRACKET"
| HASH -> "HASH"
| HASHOP -> "HASHOP"
| IF -> "IF"
| IN -> "IN"
| INCLUDE -> "INCLUDE"
| INFIXOP0 -> "INFIXOP0"
| INFIXOP1 -> "INFIXOP1"
| INFIXOP2 -> "INFIXOP2"
| INFIXOP3 -> "INFIXOP3"
| INFIXOP4 -> "INFIXOP4"
| INHERIT -> "INHERIT"
| INITIALIZER -> "INITIALIZER"
| INT -> "INT"
| LABEL -> "LABEL"
| LAZY -> "LAZY"
| LBRACE -> "LBRACE"
| LBRACELESS -> "LBRACELESS"
| LBRACKET -> "LBRACKET"
| LBRACKETAT -> "LBRACKETAT"
| LBRACKETATAT -> "LBRACKETATAT"
| LBRACKETATATAT -> "LBRACKETATATAT"
| LBRACKETBAR -> "LBRACKETBAR"
| LBRACKETGREATER -> "LBRACKETGREATER"
| LBRACKETLESS -> "LBRACKETLESS"
| LBRACKETPERCENT -> "LBRACKETPERCENT"
| LBRACKETPERCENTPERCENT -> "LBRACKETPERCENTPERCENTERCENT"
| LESS -> "LESS"
| LESSMINUS -> "LESSMINUS"
| LET -> "LET"
| LETOP -> "LETOP"
| LIDENT -> "LIDENT"
| LPAREN -> "LPAREN"
| MATCH -> "MATCH"
| METHOD -> "METHOD"
| MINUS -> "MINUS"
| MINUSDOT -> "MINUSDOT"
| MINUSGREATER -> "MINUSGREATER"
| MODULE -> "MODULE"
| MUTABLE -> "MUTABLE"
| NEW -> "NEW"
| NONREC -> "NONREC"
| OBJECT -> "OBJECT"
| OF -> "OF"
| OPEN -> "OPEN"
| OPTLABEL -> "OPTLABEL"
| OR -> "OR"
| PERCENT -> "PERCENT"
| PLUS -> "PLUS"
| PLUSDOT -> "PLUSDOT"
| PLUSEQ -> "PLUSEQ"
| PREFIXOP -> "PREFIXOP"
| PRIVATE -> "PRIVATE"
| QUESTION -> "QUESTION"
| QUOTE -> "QUOTE"
| RBRACE -> "RBRACE"
| RBRACKET -> "RBRACKET"
| REC -> "REC"
| RPAREN -> "RPAREN"
| SEMI -> "SEMI"
| SEMISEMI -> "SEMISEMI"
| SIG -> "SIG"
| STAR -> "STAR"
| STRING -> "STRING"
| STRUCT -> "STRUCT"
| THEN -> "THEN"
| TILDE -> "TILDE"
| TO -> "TO"
| TRUE -> "TRUE"
| TRY -> "TRY"
| TYPE -> "TYPE"
| UIDENT -> "UIDENT"
| UNDERSCORE -> "UNDERSCORE"
| VAL -> "VAL"
| VIRTUAL -> "VIRTUAL"
| WHEN -> "WHEN"
| WHILE -> "WHILE"
| WITH -> "WITH"
let () =
for i = 1 to Array.length Sys.argv - 1 do
let name = Sys.argv.(i) in
let ic = open_in name in
try
let lexbuf = Lexing.from_channel ic in
let rec aux () =
match Raw_lexer.token lexbuf with
| Raw_parser.EOF -> ()
| tok ->
print_char ' ';
print_string (string_of_token tok);
aux ()
in
let is_intf = name.[String.length name - 1] = 'i' in
if is_intf then
print_string "interface:"
else
print_string "implementation:";
aux ();
print_newline ();
close_in ic
with exn ->
begin match exn with
| Raw_lexer.Error (err, loc) ->
Raw_lexer.print_error loc err;
| exn -> prerr_endline (Printexc.to_string exn)
end;
close_in ic
done

177
stat_parser.ml Normal file
View File

@ -0,0 +1,177 @@
open Raw_parser
let token_of_string = function
| "AMPERAMPER" -> AMPERAMPER
| "AMPERSAND" -> AMPERSAND
| "AND" -> AND
| "ANDOP" -> ANDOP
| "AS" -> AS
| "ASSERT" -> ASSERT
| "BACKQUOTE" -> BACKQUOTE
| "BANG" -> BANG
| "BAR" -> BAR
| "BARBAR" -> BARBAR
| "BARRBRACKET" -> BARRBRACKET
| "BEGIN" -> BEGIN
| "CHAR" -> CHAR
| "CLASS" -> CLASS
| "COLON" -> COLON
| "COLONCOLON" -> COLONCOLON
| "COLONEQUAL" -> COLONEQUAL
| "COLONGREATER" -> COLONGREATER
| "COMMA" -> COMMA
| "COMMENT" -> COMMENT
| "CONSTRAINT" -> CONSTRAINT
| "DO" -> DO
| "DOCSTRING" -> DOCSTRING
| "DONE" -> DONE
| "DOT" -> DOT
| "DOTDOT" -> DOTDOT
| "DOTOP" -> DOTOP
| "DOWNTO" -> DOWNTO
| "ELSE" -> ELSE
| "END" -> END
| "EOF" -> EOF
| "EOL" -> EOL
| "EQUAL" -> EQUAL
| "EXCEPTION" -> EXCEPTION
| "EXTERNAL" -> EXTERNAL
| "FALSE" -> FALSE
| "FLOAT" -> FLOAT
| "FOR" -> FOR
| "FUN" -> FUN
| "FUNCTION" -> FUNCTION
| "FUNCTOR" -> FUNCTOR
| "GREATER" -> GREATER
| "GREATERRBRACE" -> GREATERRBRACE
| "GREATERRBRACKET" -> GREATERRBRACKET
| "HASH" -> HASH
| "HASHOP" -> HASHOP
| "IF" -> IF
| "IN" -> IN
| "INCLUDE" -> INCLUDE
| "INFIXOP0" -> INFIXOP0
| "INFIXOP1" -> INFIXOP1
| "INFIXOP2" -> INFIXOP2
| "INFIXOP3" -> INFIXOP3
| "INFIXOP4" -> INFIXOP4
| "INHERIT" -> INHERIT
| "INITIALIZER" -> INITIALIZER
| "INT" -> INT
| "LABEL" -> LABEL
| "LAZY" -> LAZY
| "LBRACE" -> LBRACE
| "LBRACELESS" -> LBRACELESS
| "LBRACKET" -> LBRACKET
| "LBRACKETAT" -> LBRACKETAT
| "LBRACKETATAT"-> LBRACKETATAT
| "LBRACKETATATAT" -> LBRACKETATATAT
| "LBRACKETBAR" -> LBRACKETBAR
| "LBRACKETGREATER" -> LBRACKETGREATER
| "LBRACKETLESS" -> LBRACKETLESS
| "LBRACKETPERCENT" -> LBRACKETPERCENT
| "LBRACKETPERCENTPERCENT" -> LBRACKETPERCENTPERCENT
| "LESS" -> LESS
| "LESSMINUS" -> LESSMINUS
| "LET" -> LET
| "LETOP" -> LETOP
| "LIDENT" -> LIDENT
| "LPAREN" -> LPAREN
| "MATCH" -> MATCH
| "METHOD" -> METHOD
| "MINUS" -> MINUS
| "MINUSDOT" -> MINUSDOT
| "MINUSGREATER"-> MINUSGREATER
| "MODULE" -> MODULE
| "MUTABLE" -> MUTABLE
| "NEW" -> NEW
| "NONREC" -> NONREC
| "OBJECT" -> OBJECT
| "OF" -> OF
| "OPEN" -> OPEN
| "OPTLABEL" -> OPTLABEL
| "OR" -> OR
| "PERCENT" -> PERCENT
| "PLUS" -> PLUS
| "PLUSDOT" -> PLUSDOT
| "PLUSEQ" -> PLUSEQ
| "PREFIXOP" -> PREFIXOP
| "PRIVATE" -> PRIVATE
| "QUESTION" -> QUESTION
| "QUOTE" -> QUOTE
| "RBRACE" -> RBRACE
| "RBRACKET" -> RBRACKET
| "REC" -> REC
| "RPAREN" -> RPAREN
| "SEMI" -> SEMI
| "SEMISEMI" -> SEMISEMI
| "SIG" -> SIG
| "STAR" -> STAR
| "STRING" -> STRING
| "STRUCT" -> STRUCT
| "THEN" -> THEN
| "TILDE" -> TILDE
| "TO" -> TO
| "TRUE" -> TRUE
| "TRY" -> TRY
| "TYPE" -> TYPE
| "UIDENT" -> UIDENT
| "UNDERSCORE" -> UNDERSCORE
| "VAL" -> VAL
| "VIRTUAL" -> VIRTUAL
| "WHEN" -> WHEN
| "WHILE" -> WHILE
| "WITH" -> WITH
| token -> failwith ("Unknown token: " ^ token)
module Stat = Menhir_stat.Recorder(Raw_parser.MenhirInterpreter)
let word_iter str =
let pos = ref 0 in
fun () ->
let i = !pos in
match String.index_from str i ' ' with
| exception Not_found ->
let len = String.length str in
if i < len then
(pos := len; Some (String.sub str i (len - i)))
else
None
| j ->
pos := j + 1;
Some (String.sub str i (j - i))
let parse_line str =
let f = word_iter str in
match f () with
| Some "interface:" -> Some (`Intf, f)
| Some "implementation:" -> Some (`Impl, f)
| Some str ->
failwith ("Unexpected input " ^ str ^
", expecting \"interface:\" or \"implementation:\"")
| None -> None
let parse entry words =
let supplier () =
let token = match words () with
| Some tok -> token_of_string tok
| None -> EOF
in
(token, Lexing.dummy_pos, Lexing.dummy_pos)
in
MenhirInterpreter.loop supplier (entry Lexing.dummy_pos)
let () =
let rec aux () =
match input_line stdin with
| line ->
begin match parse_line line with
| Some (`Intf, words) -> parse Incremental.interface words
| Some (`Impl, words) -> parse Incremental.implementation words
| None -> ()
end;
aux ()
| exception End_of_file ->
()
in
aux ()