wip
This commit is contained in:
parent
e956fce0c6
commit
7e518ea261
|
@ -0,0 +1 @@
|
|||
_build
|
7
Makefile
7
Makefile
|
@ -1,2 +1,7 @@
|
|||
TARGETS=stat_lexer.exe stat_parser.exe
|
||||
|
||||
all:
|
||||
dune build menhir_stat.exe
|
||||
dune build $(TARGETS)
|
||||
|
||||
$(TARGETS):
|
||||
dune build $@
|
||||
|
|
25
dune
25
dune
|
@ -1,9 +1,26 @@
|
|||
(executable
|
||||
(name stat_parser)
|
||||
(modules stat_parser)
|
||||
(libraries menhirLib menhirSdk menhir_stat raw_parser))
|
||||
|
||||
(executable
|
||||
(name stat_lexer)
|
||||
(modules stat_lexer raw_lexer)
|
||||
(libraries raw_parser))
|
||||
|
||||
(library
|
||||
(name menhir_stat)
|
||||
(modules menhir_stat)
|
||||
(libraries menhirLib menhirSdk))
|
||||
|
||||
(executable
|
||||
(name ocamllexer)
|
||||
(modules ocamllexer)
|
||||
(libraries compilerlibs))
|
||||
(library
|
||||
(name raw_parser)
|
||||
(flags :standard -w -26-27)
|
||||
(modules raw_parser)
|
||||
(libraries menhirLib))
|
||||
|
||||
(menhir
|
||||
(modules raw_parser)
|
||||
(flags --table --inspection))
|
||||
|
||||
(ocamllex raw_lexer)
|
||||
|
|
|
@ -1 +1,2 @@
|
|||
(lang dune 2.8)
|
||||
(using menhir 2.1)
|
||||
|
|
|
@ -35,10 +35,6 @@ struct
|
|||
| (env', f') :: _ when M.equal env env' -> f', stack
|
||||
| _ :: tl -> pop_until tl env
|
||||
|
||||
let top_frame = function
|
||||
| (_, f) :: _ -> f
|
||||
| _ -> assert false
|
||||
|
||||
let reduce_to state stack prod env =
|
||||
match M.pop env with
|
||||
| Some env' ->
|
||||
|
|
327
raw_lexer.mll
327
raw_lexer.mll
|
@ -19,19 +19,30 @@
|
|||
open Lexing
|
||||
open Raw_parser
|
||||
|
||||
type location = position * position
|
||||
|
||||
let location_error ~loc fmt =
|
||||
let start, stop = loc in
|
||||
Format.eprintf "Error at %d:%d-%d:%d: "
|
||||
start.pos_lnum (start.pos_cnum - start.pos_bol)
|
||||
stop.pos_lnum (stop.pos_cnum - stop.pos_bol);
|
||||
Format.eprintf fmt
|
||||
|
||||
type error =
|
||||
| Illegal_character of char
|
||||
| Illegal_escape of string * string option
|
||||
| Reserved_sequence of string * string option
|
||||
| Unterminated_comment of Location.t
|
||||
| Unterminated_comment of location
|
||||
| Unterminated_string
|
||||
| Unterminated_string_in_comment of Location.t * Location.t
|
||||
| Unterminated_string_in_comment of location * location
|
||||
| Keyword_as_label of string
|
||||
| Invalid_literal of string
|
||||
| Invalid_directive of string * string option
|
||||
;;
|
||||
|
||||
exception Error of error * Location.t;;
|
||||
exception Error of error * location
|
||||
|
||||
let location_curr lexbuf = (lexbuf.lex_start_p, lexbuf.lex_curr_p)
|
||||
|
||||
(* The table of keywords *)
|
||||
|
||||
|
@ -89,13 +100,13 @@ let keyword_table =
|
|||
"while", WHILE;
|
||||
"with", WITH;
|
||||
|
||||
"lor", INFIXOP3("lor"); (* Should be INFIXOP2 *)
|
||||
"lxor", INFIXOP3("lxor"); (* Should be INFIXOP2 *)
|
||||
"mod", INFIXOP3("mod");
|
||||
"land", INFIXOP3("land");
|
||||
"lsl", INFIXOP4("lsl");
|
||||
"lsr", INFIXOP4("lsr");
|
||||
"asr", INFIXOP4("asr")
|
||||
"lor", INFIXOP3; (* Should be INFIXOP2 *)
|
||||
"lxor", INFIXOP3; (* Should be INFIXOP2 *)
|
||||
"mod", INFIXOP3;
|
||||
"land", INFIXOP3;
|
||||
"lsl", INFIXOP4;
|
||||
"lsr", INFIXOP4;
|
||||
"asr", INFIXOP4
|
||||
];
|
||||
table
|
||||
|
||||
|
@ -111,7 +122,7 @@ let store_string s = Buffer.add_string string_buffer s
|
|||
let store_lexeme lexbuf = store_string (Lexing.lexeme lexbuf)
|
||||
|
||||
(* To store the position of the beginning of a string and comment *)
|
||||
let string_start_loc = ref Location.none;;
|
||||
let string_start_loc = ref (dummy_pos, dummy_pos);;
|
||||
let comment_start_loc = ref [];;
|
||||
let in_comment () = !comment_start_loc <> [];;
|
||||
let is_in_string = ref false
|
||||
|
@ -126,16 +137,16 @@ let store_escaped_uchar lexbuf u =
|
|||
if in_comment () then store_lexeme lexbuf else store_string_utf_8_uchar u
|
||||
|
||||
let with_comment_buffer comment lexbuf =
|
||||
let start_loc = Location.curr lexbuf in
|
||||
let start_loc = location_curr lexbuf in
|
||||
comment_start_loc := [start_loc];
|
||||
reset_string_buffer ();
|
||||
let end_loc = comment lexbuf in
|
||||
let s = get_stored_string () in
|
||||
reset_string_buffer ();
|
||||
let loc = { start_loc with Location.loc_end = end_loc.Location.loc_end } in
|
||||
let loc = (fst start_loc, snd end_loc) in
|
||||
s, loc
|
||||
|
||||
let error lexbuf e = raise (Error(e, Location.curr lexbuf))
|
||||
let error lexbuf e = raise (Error(e, location_curr lexbuf))
|
||||
let error_loc loc e = raise (Error(e, loc))
|
||||
|
||||
(* to translate escape sequences *)
|
||||
|
@ -165,7 +176,7 @@ let char_for_backslash = function
|
|||
|
||||
let illegal_escape lexbuf reason =
|
||||
let error = Illegal_escape (Lexing.lexeme lexbuf, Some reason) in
|
||||
raise (Error (error, Location.curr lexbuf))
|
||||
raise (Error (error, location_curr lexbuf))
|
||||
|
||||
let char_for_decimal_code lexbuf i =
|
||||
let c = num_value lexbuf ~base:10 ~first:i ~last:(i+2) in
|
||||
|
@ -209,9 +220,6 @@ let uchar_for_uchar_escape lexbuf =
|
|||
|
||||
let is_keyword name = Hashtbl.mem keyword_table name
|
||||
|
||||
let check_label_name lexbuf name =
|
||||
if is_keyword name then error lexbuf (Keyword_as_label name)
|
||||
|
||||
(* Update the current location with file name and line number. *)
|
||||
|
||||
let update_loc lexbuf file line absolute chars =
|
||||
|
@ -233,59 +241,39 @@ let escaped_newlines = ref false
|
|||
|
||||
(* Warn about Latin-1 characters used in idents *)
|
||||
|
||||
let warn_latin1 lexbuf = ignore lexbuf
|
||||
(*Location.deprecated
|
||||
(Location.curr lexbuf)
|
||||
"ISO-Latin1 characters in identifiers"*)
|
||||
|
||||
let handle_docstrings = ref true
|
||||
let comment_list = ref []
|
||||
|
||||
let add_comment com =
|
||||
comment_list := com :: !comment_list
|
||||
|
||||
let add_docstring_comment ds =
|
||||
let com =
|
||||
("*" ^ Docstrings.docstring_body ds, Docstrings.docstring_loc ds)
|
||||
in
|
||||
add_comment com
|
||||
|
||||
let comments () = List.rev !comment_list
|
||||
|
||||
(* Error report *)
|
||||
|
||||
open Format
|
||||
|
||||
let prepare_error loc = function
|
||||
let print_error loc = function
|
||||
| Illegal_character c ->
|
||||
Location.errorf ~loc "Illegal character (%s)" (Char.escaped c)
|
||||
location_error ~loc "Illegal character (%s)" (Char.escaped c)
|
||||
| Illegal_escape (s, explanation) ->
|
||||
Location.errorf ~loc
|
||||
location_error ~loc
|
||||
"Illegal backslash escape in string or character (%s)%t" s
|
||||
(fun ppf -> match explanation with
|
||||
| None -> ()
|
||||
| Some expl -> fprintf ppf ": %s" expl)
|
||||
| Reserved_sequence (s, explanation) ->
|
||||
Location.errorf ~loc
|
||||
location_error ~loc
|
||||
"Reserved character sequence: %s%t" s
|
||||
(fun ppf -> match explanation with
|
||||
| None -> ()
|
||||
| Some expl -> fprintf ppf " %s" expl)
|
||||
| Unterminated_comment _ ->
|
||||
Location.errorf ~loc "Comment not terminated"
|
||||
location_error ~loc "Comment not terminated"
|
||||
| Unterminated_string ->
|
||||
Location.errorf ~loc "String literal not terminated"
|
||||
| Unterminated_string_in_comment (_, literal_loc) ->
|
||||
Location.errorf ~loc
|
||||
location_error ~loc "String literal not terminated"
|
||||
| Unterminated_string_in_comment (_, _) ->
|
||||
location_error ~loc
|
||||
"This comment contains an unterminated string literal"
|
||||
~sub:[Location.msg ~loc:literal_loc "String literal begins here"]
|
||||
| Keyword_as_label kwd ->
|
||||
Location.errorf ~loc
|
||||
location_error ~loc
|
||||
"`%s' is a keyword, it cannot be used as label name" kwd
|
||||
| Invalid_literal s ->
|
||||
Location.errorf ~loc "Invalid literal %s" s
|
||||
location_error ~loc "Invalid literal %s" s
|
||||
| Invalid_directive (dir, explanation) ->
|
||||
Location.errorf ~loc "Invalid lexer directive %S%t" dir
|
||||
location_error ~loc "Invalid lexer directive %S%t" dir
|
||||
(fun ppf -> match explanation with
|
||||
| None -> ()
|
||||
| Some expl -> fprintf ppf ": %s" expl)
|
||||
|
@ -348,102 +336,90 @@ rule token = parse
|
|||
| ".~"
|
||||
{ error lexbuf
|
||||
(Reserved_sequence (".~", Some "is reserved for use in MetaOCaml")) }
|
||||
| "~" (lowercase identchar * as name) ':'
|
||||
{ check_label_name lexbuf name;
|
||||
LABEL name }
|
||||
| "~" (lowercase_latin1 identchar_latin1 * as name) ':'
|
||||
{ warn_latin1 lexbuf;
|
||||
LABEL name }
|
||||
| "~" (lowercase identchar *) ':'
|
||||
{ LABEL }
|
||||
| "~" (lowercase_latin1 identchar_latin1 *) ':'
|
||||
{ LABEL }
|
||||
| "?"
|
||||
{ QUESTION }
|
||||
| "?" (lowercase identchar * as name) ':'
|
||||
{ check_label_name lexbuf name;
|
||||
OPTLABEL name }
|
||||
| "?" (lowercase_latin1 identchar_latin1 * as name) ':'
|
||||
{ warn_latin1 lexbuf;
|
||||
OPTLABEL name }
|
||||
| "?" (lowercase identchar *) ':'
|
||||
{ OPTLABEL }
|
||||
| "?" (lowercase_latin1 identchar_latin1 *) ':'
|
||||
{ OPTLABEL }
|
||||
| lowercase identchar * as name
|
||||
{ try Hashtbl.find keyword_table name
|
||||
with Not_found -> LIDENT name }
|
||||
| lowercase_latin1 identchar_latin1 * as name
|
||||
{ warn_latin1 lexbuf; LIDENT name }
|
||||
| uppercase identchar * as name
|
||||
{ UIDENT name } (* No capitalized keywords *)
|
||||
| uppercase_latin1 identchar_latin1 * as name
|
||||
{ warn_latin1 lexbuf; UIDENT name }
|
||||
| int_literal as lit { INT (lit, None) }
|
||||
| (int_literal as lit) (literal_modifier as modif)
|
||||
{ INT (lit, Some modif) }
|
||||
| float_literal | hex_float_literal as lit
|
||||
{ FLOAT (lit, None) }
|
||||
| (float_literal | hex_float_literal as lit) (literal_modifier as modif)
|
||||
{ FLOAT (lit, Some modif) }
|
||||
with Not_found -> LIDENT }
|
||||
| lowercase_latin1 identchar_latin1 *
|
||||
{ LIDENT }
|
||||
| uppercase identchar *
|
||||
{ UIDENT } (* No capitalized keywords *)
|
||||
| uppercase_latin1 identchar_latin1 *
|
||||
{ UIDENT }
|
||||
| int_literal { INT }
|
||||
| int_literal literal_modifier { INT }
|
||||
| float_literal | hex_float_literal
|
||||
{ FLOAT }
|
||||
| (float_literal | hex_float_literal) literal_modifier
|
||||
{ FLOAT }
|
||||
| (float_literal | hex_float_literal | int_literal) identchar+ as invalid
|
||||
{ error lexbuf (Invalid_literal invalid) }
|
||||
| "\""
|
||||
{ reset_string_buffer();
|
||||
is_in_string := true;
|
||||
let string_start = lexbuf.lex_start_p in
|
||||
string_start_loc := Location.curr lexbuf;
|
||||
string_start_loc := location_curr lexbuf;
|
||||
string lexbuf;
|
||||
is_in_string := false;
|
||||
lexbuf.lex_start_p <- string_start;
|
||||
STRING (get_stored_string(), None) }
|
||||
STRING }
|
||||
| "{" (lowercase* as delim) "|"
|
||||
{ reset_string_buffer();
|
||||
is_in_string := true;
|
||||
let string_start = lexbuf.lex_start_p in
|
||||
string_start_loc := Location.curr lexbuf;
|
||||
string_start_loc := location_curr lexbuf;
|
||||
quoted_string delim lexbuf;
|
||||
is_in_string := false;
|
||||
lexbuf.lex_start_p <- string_start;
|
||||
STRING (get_stored_string(), Some delim) }
|
||||
STRING }
|
||||
| "\'" newline "\'"
|
||||
{ update_loc lexbuf None 1 false 1;
|
||||
(* newline is ('\013'* '\010') *)
|
||||
CHAR '\n' }
|
||||
| "\'" ([^ '\\' '\'' '\010' '\013'] as c) "\'"
|
||||
{ CHAR c }
|
||||
| "\'\\" (['\\' '\'' '\"' 'n' 't' 'b' 'r' ' '] as c) "\'"
|
||||
{ CHAR (char_for_backslash c) }
|
||||
CHAR }
|
||||
| "\'" [^ '\\' '\'' '\010' '\013'] "\'"
|
||||
{ CHAR }
|
||||
| "\'\\" ['\\' '\'' '\"' 'n' 't' 'b' 'r' ' '] "\'"
|
||||
{ CHAR }
|
||||
| "\'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "\'"
|
||||
{ CHAR(char_for_decimal_code lexbuf 2) }
|
||||
{ CHAR }
|
||||
| "\'\\" 'o' ['0'-'7'] ['0'-'7'] ['0'-'7'] "\'"
|
||||
{ CHAR(char_for_octal_code lexbuf 3) }
|
||||
{ CHAR }
|
||||
| "\'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "\'"
|
||||
{ CHAR(char_for_hexadecimal_code lexbuf 3) }
|
||||
{ CHAR }
|
||||
| "\'" ("\\" _ as esc)
|
||||
{ error lexbuf (Illegal_escape (esc, None)) }
|
||||
| "(*"
|
||||
{ let s, loc = with_comment_buffer comment lexbuf in
|
||||
COMMENT (s, loc) }
|
||||
{ let _ = with_comment_buffer comment lexbuf in
|
||||
COMMENT }
|
||||
| "(**"
|
||||
{ let s, loc = with_comment_buffer comment lexbuf in
|
||||
if !handle_docstrings then
|
||||
DOCSTRING (Docstrings.docstring s loc)
|
||||
else
|
||||
COMMENT ("*" ^ s, loc)
|
||||
{ let _ = with_comment_buffer comment lexbuf in
|
||||
COMMENT
|
||||
}
|
||||
| "(**" (('*'+) as stars)
|
||||
{ let s, loc =
|
||||
{ let _ =
|
||||
with_comment_buffer
|
||||
(fun lexbuf ->
|
||||
store_string ("*" ^ stars);
|
||||
comment lexbuf)
|
||||
lexbuf
|
||||
in
|
||||
COMMENT (s, loc) }
|
||||
COMMENT }
|
||||
| "(*)"
|
||||
{ (*if !print_warnings then
|
||||
Location.prerr_warning (Location.curr lexbuf) Warnings.Comment_start;*)
|
||||
let s, loc = with_comment_buffer comment lexbuf in
|
||||
COMMENT (s, loc) }
|
||||
| "(*" (('*'*) as stars) "*)"
|
||||
{ if !handle_docstrings && stars="" then
|
||||
(* (**) is an empty docstring *)
|
||||
DOCSTRING(Docstrings.docstring "" (Location.curr lexbuf))
|
||||
else
|
||||
COMMENT (stars, Location.curr lexbuf) }
|
||||
let _ = with_comment_buffer comment lexbuf in
|
||||
COMMENT }
|
||||
| "(*" ('*'*) "*)"
|
||||
{ COMMENT }
|
||||
| "*)"
|
||||
{ (*let loc = Location.curr lexbuf in
|
||||
Location.prerr_warning loc Warnings.Comment_not_end;*)
|
||||
|
@ -469,7 +445,7 @@ rule token = parse
|
|||
| "->" { MINUSGREATER }
|
||||
| "." { DOT }
|
||||
| ".." { DOTDOT }
|
||||
| "." (dotsymbolchar symbolchar* as op) { DOTOP op }
|
||||
| "." (dotsymbolchar symbolchar*) { DOTOP }
|
||||
| ":" { COLON }
|
||||
| "::" { COLONCOLON }
|
||||
| ":=" { COLONEQUAL }
|
||||
|
@ -499,34 +475,34 @@ rule token = parse
|
|||
| "[%" { LBRACKETPERCENT }
|
||||
| "[%%" { LBRACKETPERCENTPERCENT }
|
||||
| "!" { BANG }
|
||||
| "!=" { INFIXOP0 "!=" }
|
||||
| "!=" { INFIXOP0 }
|
||||
| "+" { PLUS }
|
||||
| "+." { PLUSDOT }
|
||||
| "+=" { PLUSEQ }
|
||||
| "-" { MINUS }
|
||||
| "-." { MINUSDOT }
|
||||
|
||||
| "!" symbolchar + as op
|
||||
{ PREFIXOP op }
|
||||
| ['~' '?'] symbolchar + as op
|
||||
{ PREFIXOP op }
|
||||
| ['=' '<' '>' '|' '&' '$'] symbolchar * as op
|
||||
{ INFIXOP0 op }
|
||||
| ['@' '^'] symbolchar * as op
|
||||
{ INFIXOP1 op }
|
||||
| ['+' '-'] symbolchar * as op
|
||||
{ INFIXOP2 op }
|
||||
| "**" symbolchar * as op
|
||||
{ INFIXOP4 op }
|
||||
| "!" symbolchar +
|
||||
{ PREFIXOP }
|
||||
| ['~' '?'] symbolchar +
|
||||
{ PREFIXOP }
|
||||
| ['=' '<' '>' '|' '&' '$'] symbolchar *
|
||||
{ INFIXOP0 }
|
||||
| ['@' '^'] symbolchar *
|
||||
{ INFIXOP1 }
|
||||
| ['+' '-'] symbolchar *
|
||||
{ INFIXOP2 }
|
||||
| "**" symbolchar *
|
||||
{ INFIXOP4 }
|
||||
| '%' { PERCENT }
|
||||
| ['*' '/' '%'] symbolchar * as op
|
||||
{ INFIXOP3 op }
|
||||
| '#' (symbolchar | '#') + as op
|
||||
{ HASHOP op }
|
||||
| "let" kwdopchar dotsymbolchar * as op
|
||||
{ LETOP op }
|
||||
| "and" kwdopchar dotsymbolchar * as op
|
||||
{ ANDOP op }
|
||||
| ['*' '/' '%'] symbolchar *
|
||||
{ INFIXOP3 }
|
||||
| '#' (symbolchar | '#') +
|
||||
{ HASHOP }
|
||||
| "let" kwdopchar dotsymbolchar *
|
||||
{ LETOP }
|
||||
| "and" kwdopchar dotsymbolchar *
|
||||
{ ANDOP }
|
||||
| eof { EOF }
|
||||
| (_ as illegal_char)
|
||||
{ error lexbuf (Illegal_character illegal_char) }
|
||||
|
@ -550,21 +526,21 @@ and directive = parse
|
|||
}
|
||||
and comment = parse
|
||||
"(*"
|
||||
{ comment_start_loc := (Location.curr lexbuf) :: !comment_start_loc;
|
||||
{ comment_start_loc := (location_curr lexbuf) :: !comment_start_loc;
|
||||
store_lexeme lexbuf;
|
||||
comment lexbuf
|
||||
}
|
||||
| "*)"
|
||||
{ match !comment_start_loc with
|
||||
| [] -> assert false
|
||||
| [_] -> comment_start_loc := []; Location.curr lexbuf
|
||||
| [_] -> comment_start_loc := []; location_curr lexbuf
|
||||
| _ :: l -> comment_start_loc := l;
|
||||
store_lexeme lexbuf;
|
||||
comment lexbuf
|
||||
}
|
||||
| "\""
|
||||
{
|
||||
string_start_loc := Location.curr lexbuf;
|
||||
string_start_loc := location_curr lexbuf;
|
||||
store_string_char '\"';
|
||||
is_in_string := true;
|
||||
begin try string lexbuf
|
||||
|
@ -581,7 +557,7 @@ and comment = parse
|
|||
comment lexbuf }
|
||||
| "{" (lowercase* as delim) "|"
|
||||
{
|
||||
string_start_loc := Location.curr lexbuf;
|
||||
string_start_loc := location_curr lexbuf;
|
||||
store_lexeme lexbuf;
|
||||
is_in_string := true;
|
||||
begin try quoted_string delim lexbuf
|
||||
|
@ -714,99 +690,18 @@ and skip_hash_bang = parse
|
|||
| None -> token lexbuf
|
||||
| Some (_init, preprocess) -> preprocess token lexbuf
|
||||
|
||||
type newline_state =
|
||||
| NoLine (* There have been no blank lines yet. *)
|
||||
| NewLine
|
||||
(* There have been no blank lines, and the previous
|
||||
token was a newline. *)
|
||||
| BlankLine (* There have been blank lines. *)
|
||||
|
||||
type doc_state =
|
||||
| Initial (* There have been no docstrings yet *)
|
||||
| After of docstring list
|
||||
(* There have been docstrings, none of which were
|
||||
preceded by a blank line *)
|
||||
| Before of docstring list * docstring list * docstring list
|
||||
(* There have been docstrings, some of which were
|
||||
preceded by a blank line *)
|
||||
|
||||
and docstring = Docstrings.docstring
|
||||
|
||||
let token lexbuf =
|
||||
let post_pos = lexeme_end_p lexbuf in
|
||||
let attach lines docs pre_pos =
|
||||
let open Docstrings in
|
||||
match docs, lines with
|
||||
| Initial, _ -> ()
|
||||
| After a, (NoLine | NewLine) ->
|
||||
set_post_docstrings post_pos (List.rev a);
|
||||
set_pre_docstrings pre_pos a;
|
||||
| After a, BlankLine ->
|
||||
set_post_docstrings post_pos (List.rev a);
|
||||
set_pre_extra_docstrings pre_pos (List.rev a)
|
||||
| Before(a, f, b), (NoLine | NewLine) ->
|
||||
set_post_docstrings post_pos (List.rev a);
|
||||
set_post_extra_docstrings post_pos
|
||||
(List.rev_append f (List.rev b));
|
||||
set_floating_docstrings pre_pos (List.rev f);
|
||||
set_pre_extra_docstrings pre_pos (List.rev a);
|
||||
set_pre_docstrings pre_pos b
|
||||
| Before(a, f, b), BlankLine ->
|
||||
set_post_docstrings post_pos (List.rev a);
|
||||
set_post_extra_docstrings post_pos
|
||||
(List.rev_append f (List.rev b));
|
||||
set_floating_docstrings pre_pos
|
||||
(List.rev_append f (List.rev b));
|
||||
set_pre_extra_docstrings pre_pos (List.rev a)
|
||||
in
|
||||
let rec loop lines docs lexbuf =
|
||||
let rec loop lexbuf =
|
||||
match token_with_comments lexbuf with
|
||||
| COMMENT (s, loc) ->
|
||||
add_comment (s, loc);
|
||||
let lines' =
|
||||
match lines with
|
||||
| NoLine -> NoLine
|
||||
| NewLine -> NoLine
|
||||
| BlankLine -> BlankLine
|
||||
in
|
||||
loop lines' docs lexbuf
|
||||
| EOL ->
|
||||
let lines' =
|
||||
match lines with
|
||||
| NoLine -> NewLine
|
||||
| NewLine -> BlankLine
|
||||
| BlankLine -> BlankLine
|
||||
in
|
||||
loop lines' docs lexbuf
|
||||
| DOCSTRING doc ->
|
||||
Docstrings.register doc;
|
||||
add_docstring_comment doc;
|
||||
let docs' =
|
||||
if Docstrings.docstring_body doc = "/*" then
|
||||
match docs with
|
||||
| Initial -> Before([], [doc], [])
|
||||
| After a -> Before (a, [doc], [])
|
||||
| Before(a, f, b) -> Before(a, doc :: b @ f, [])
|
||||
else
|
||||
match docs, lines with
|
||||
| Initial, (NoLine | NewLine) -> After [doc]
|
||||
| Initial, BlankLine -> Before([], [], [doc])
|
||||
| After a, (NoLine | NewLine) -> After (doc :: a)
|
||||
| After a, BlankLine -> Before (a, [], [doc])
|
||||
| Before(a, f, b), (NoLine | NewLine) -> Before(a, f, doc :: b)
|
||||
| Before(a, f, b), BlankLine -> Before(a, b @ f, [doc])
|
||||
in
|
||||
loop NoLine docs' lexbuf
|
||||
| tok ->
|
||||
attach lines docs (lexeme_start_p lexbuf);
|
||||
tok
|
||||
| COMMENT -> loop lexbuf
|
||||
| EOL -> loop lexbuf
|
||||
| tok -> tok
|
||||
in
|
||||
loop NoLine Initial lexbuf
|
||||
loop lexbuf
|
||||
|
||||
let init () =
|
||||
is_in_string := false;
|
||||
comment_start_loc := [];
|
||||
comment_list := [];
|
||||
match !preprocessor with
|
||||
| None -> ()
|
||||
| Some (init, _preprocess) -> init ()
|
||||
|
|
|
@ -0,0 +1,154 @@
|
|||
let string_of_token : Raw_parser.token -> _ = function
|
||||
| AMPERAMPER -> "AMPERAMPER"
|
||||
| AMPERSAND -> "AMPERSAND"
|
||||
| AND -> "AND"
|
||||
| ANDOP -> "ANDOP"
|
||||
| AS -> "AS"
|
||||
| ASSERT -> "ASSERT"
|
||||
| BACKQUOTE -> "BACKQUOTE"
|
||||
| BANG -> "BANG"
|
||||
| BAR -> "BAR"
|
||||
| BARBAR -> "BARBAR"
|
||||
| BARRBRACKET -> "BARRBRACKET"
|
||||
| BEGIN -> "BEGIN"
|
||||
| CHAR -> "CHAR"
|
||||
| CLASS -> "CLASS"
|
||||
| COLON -> "COLON"
|
||||
| COLONCOLON -> "COLONCOLON"
|
||||
| COLONEQUAL -> "COLONEQUAL"
|
||||
| COLONGREATER -> "COLONGREATER"
|
||||
| COMMA -> "COMMA"
|
||||
| COMMENT -> "COMMENT"
|
||||
| CONSTRAINT -> "CONSTRAINT"
|
||||
| DO -> "DO"
|
||||
| DOCSTRING -> "DOCSTRING"
|
||||
| DONE -> "DONE"
|
||||
| DOT -> "DOT"
|
||||
| DOTDOT -> "DOTDOT"
|
||||
| DOTOP -> "DOTOP"
|
||||
| DOWNTO -> "DOWNTO"
|
||||
| ELSE -> "ELSE"
|
||||
| END -> "END"
|
||||
| EOF -> "EOF"
|
||||
| EOL -> "EOL"
|
||||
| EQUAL -> "EQUAL"
|
||||
| EXCEPTION -> "EXCEPTION"
|
||||
| EXTERNAL -> "EXTERNAL"
|
||||
| FALSE -> "FALSE"
|
||||
| FLOAT -> "FLOAT"
|
||||
| FOR -> "FOR"
|
||||
| FUN -> "FUN"
|
||||
| FUNCTION -> "FUNCTION"
|
||||
| FUNCTOR -> "FUNCTOR"
|
||||
| GREATER -> "GREATER"
|
||||
| GREATERRBRACE-> "GREATERRBRACE"
|
||||
| GREATERRBRACKET -> "GREATERRBRACKET"
|
||||
| HASH -> "HASH"
|
||||
| HASHOP -> "HASHOP"
|
||||
| IF -> "IF"
|
||||
| IN -> "IN"
|
||||
| INCLUDE -> "INCLUDE"
|
||||
| INFIXOP0 -> "INFIXOP0"
|
||||
| INFIXOP1 -> "INFIXOP1"
|
||||
| INFIXOP2 -> "INFIXOP2"
|
||||
| INFIXOP3 -> "INFIXOP3"
|
||||
| INFIXOP4 -> "INFIXOP4"
|
||||
| INHERIT -> "INHERIT"
|
||||
| INITIALIZER -> "INITIALIZER"
|
||||
| INT -> "INT"
|
||||
| LABEL -> "LABEL"
|
||||
| LAZY -> "LAZY"
|
||||
| LBRACE -> "LBRACE"
|
||||
| LBRACELESS -> "LBRACELESS"
|
||||
| LBRACKET -> "LBRACKET"
|
||||
| LBRACKETAT -> "LBRACKETAT"
|
||||
| LBRACKETATAT -> "LBRACKETATAT"
|
||||
| LBRACKETATATAT -> "LBRACKETATATAT"
|
||||
| LBRACKETBAR -> "LBRACKETBAR"
|
||||
| LBRACKETGREATER -> "LBRACKETGREATER"
|
||||
| LBRACKETLESS -> "LBRACKETLESS"
|
||||
| LBRACKETPERCENT -> "LBRACKETPERCENT"
|
||||
| LBRACKETPERCENTPERCENT -> "LBRACKETPERCENTPERCENTERCENT"
|
||||
| LESS -> "LESS"
|
||||
| LESSMINUS -> "LESSMINUS"
|
||||
| LET -> "LET"
|
||||
| LETOP -> "LETOP"
|
||||
| LIDENT -> "LIDENT"
|
||||
| LPAREN -> "LPAREN"
|
||||
| MATCH -> "MATCH"
|
||||
| METHOD -> "METHOD"
|
||||
| MINUS -> "MINUS"
|
||||
| MINUSDOT -> "MINUSDOT"
|
||||
| MINUSGREATER -> "MINUSGREATER"
|
||||
| MODULE -> "MODULE"
|
||||
| MUTABLE -> "MUTABLE"
|
||||
| NEW -> "NEW"
|
||||
| NONREC -> "NONREC"
|
||||
| OBJECT -> "OBJECT"
|
||||
| OF -> "OF"
|
||||
| OPEN -> "OPEN"
|
||||
| OPTLABEL -> "OPTLABEL"
|
||||
| OR -> "OR"
|
||||
| PERCENT -> "PERCENT"
|
||||
| PLUS -> "PLUS"
|
||||
| PLUSDOT -> "PLUSDOT"
|
||||
| PLUSEQ -> "PLUSEQ"
|
||||
| PREFIXOP -> "PREFIXOP"
|
||||
| PRIVATE -> "PRIVATE"
|
||||
| QUESTION -> "QUESTION"
|
||||
| QUOTE -> "QUOTE"
|
||||
| RBRACE -> "RBRACE"
|
||||
| RBRACKET -> "RBRACKET"
|
||||
| REC -> "REC"
|
||||
| RPAREN -> "RPAREN"
|
||||
| SEMI -> "SEMI"
|
||||
| SEMISEMI -> "SEMISEMI"
|
||||
| SIG -> "SIG"
|
||||
| STAR -> "STAR"
|
||||
| STRING -> "STRING"
|
||||
| STRUCT -> "STRUCT"
|
||||
| THEN -> "THEN"
|
||||
| TILDE -> "TILDE"
|
||||
| TO -> "TO"
|
||||
| TRUE -> "TRUE"
|
||||
| TRY -> "TRY"
|
||||
| TYPE -> "TYPE"
|
||||
| UIDENT -> "UIDENT"
|
||||
| UNDERSCORE -> "UNDERSCORE"
|
||||
| VAL -> "VAL"
|
||||
| VIRTUAL -> "VIRTUAL"
|
||||
| WHEN -> "WHEN"
|
||||
| WHILE -> "WHILE"
|
||||
| WITH -> "WITH"
|
||||
|
||||
let () =
|
||||
for i = 1 to Array.length Sys.argv - 1 do
|
||||
let name = Sys.argv.(i) in
|
||||
let ic = open_in name in
|
||||
try
|
||||
let lexbuf = Lexing.from_channel ic in
|
||||
let rec aux () =
|
||||
match Raw_lexer.token lexbuf with
|
||||
| Raw_parser.EOF -> ()
|
||||
| tok ->
|
||||
print_char ' ';
|
||||
print_string (string_of_token tok);
|
||||
aux ()
|
||||
in
|
||||
let is_intf = name.[String.length name - 1] = 'i' in
|
||||
if is_intf then
|
||||
print_string "interface:"
|
||||
else
|
||||
print_string "implementation:";
|
||||
aux ();
|
||||
print_newline ();
|
||||
close_in ic
|
||||
with exn ->
|
||||
begin match exn with
|
||||
| Raw_lexer.Error (err, loc) ->
|
||||
Raw_lexer.print_error loc err;
|
||||
| exn -> prerr_endline (Printexc.to_string exn)
|
||||
end;
|
||||
close_in ic
|
||||
done
|
||||
|
|
@ -0,0 +1,177 @@
|
|||
open Raw_parser
|
||||
|
||||
let token_of_string = function
|
||||
| "AMPERAMPER" -> AMPERAMPER
|
||||
| "AMPERSAND" -> AMPERSAND
|
||||
| "AND" -> AND
|
||||
| "ANDOP" -> ANDOP
|
||||
| "AS" -> AS
|
||||
| "ASSERT" -> ASSERT
|
||||
| "BACKQUOTE" -> BACKQUOTE
|
||||
| "BANG" -> BANG
|
||||
| "BAR" -> BAR
|
||||
| "BARBAR" -> BARBAR
|
||||
| "BARRBRACKET" -> BARRBRACKET
|
||||
| "BEGIN" -> BEGIN
|
||||
| "CHAR" -> CHAR
|
||||
| "CLASS" -> CLASS
|
||||
| "COLON" -> COLON
|
||||
| "COLONCOLON" -> COLONCOLON
|
||||
| "COLONEQUAL" -> COLONEQUAL
|
||||
| "COLONGREATER" -> COLONGREATER
|
||||
| "COMMA" -> COMMA
|
||||
| "COMMENT" -> COMMENT
|
||||
| "CONSTRAINT" -> CONSTRAINT
|
||||
| "DO" -> DO
|
||||
| "DOCSTRING" -> DOCSTRING
|
||||
| "DONE" -> DONE
|
||||
| "DOT" -> DOT
|
||||
| "DOTDOT" -> DOTDOT
|
||||
| "DOTOP" -> DOTOP
|
||||
| "DOWNTO" -> DOWNTO
|
||||
| "ELSE" -> ELSE
|
||||
| "END" -> END
|
||||
| "EOF" -> EOF
|
||||
| "EOL" -> EOL
|
||||
| "EQUAL" -> EQUAL
|
||||
| "EXCEPTION" -> EXCEPTION
|
||||
| "EXTERNAL" -> EXTERNAL
|
||||
| "FALSE" -> FALSE
|
||||
| "FLOAT" -> FLOAT
|
||||
| "FOR" -> FOR
|
||||
| "FUN" -> FUN
|
||||
| "FUNCTION" -> FUNCTION
|
||||
| "FUNCTOR" -> FUNCTOR
|
||||
| "GREATER" -> GREATER
|
||||
| "GREATERRBRACE" -> GREATERRBRACE
|
||||
| "GREATERRBRACKET" -> GREATERRBRACKET
|
||||
| "HASH" -> HASH
|
||||
| "HASHOP" -> HASHOP
|
||||
| "IF" -> IF
|
||||
| "IN" -> IN
|
||||
| "INCLUDE" -> INCLUDE
|
||||
| "INFIXOP0" -> INFIXOP0
|
||||
| "INFIXOP1" -> INFIXOP1
|
||||
| "INFIXOP2" -> INFIXOP2
|
||||
| "INFIXOP3" -> INFIXOP3
|
||||
| "INFIXOP4" -> INFIXOP4
|
||||
| "INHERIT" -> INHERIT
|
||||
| "INITIALIZER" -> INITIALIZER
|
||||
| "INT" -> INT
|
||||
| "LABEL" -> LABEL
|
||||
| "LAZY" -> LAZY
|
||||
| "LBRACE" -> LBRACE
|
||||
| "LBRACELESS" -> LBRACELESS
|
||||
| "LBRACKET" -> LBRACKET
|
||||
| "LBRACKETAT" -> LBRACKETAT
|
||||
| "LBRACKETATAT"-> LBRACKETATAT
|
||||
| "LBRACKETATATAT" -> LBRACKETATATAT
|
||||
| "LBRACKETBAR" -> LBRACKETBAR
|
||||
| "LBRACKETGREATER" -> LBRACKETGREATER
|
||||
| "LBRACKETLESS" -> LBRACKETLESS
|
||||
| "LBRACKETPERCENT" -> LBRACKETPERCENT
|
||||
| "LBRACKETPERCENTPERCENT" -> LBRACKETPERCENTPERCENT
|
||||
| "LESS" -> LESS
|
||||
| "LESSMINUS" -> LESSMINUS
|
||||
| "LET" -> LET
|
||||
| "LETOP" -> LETOP
|
||||
| "LIDENT" -> LIDENT
|
||||
| "LPAREN" -> LPAREN
|
||||
| "MATCH" -> MATCH
|
||||
| "METHOD" -> METHOD
|
||||
| "MINUS" -> MINUS
|
||||
| "MINUSDOT" -> MINUSDOT
|
||||
| "MINUSGREATER"-> MINUSGREATER
|
||||
| "MODULE" -> MODULE
|
||||
| "MUTABLE" -> MUTABLE
|
||||
| "NEW" -> NEW
|
||||
| "NONREC" -> NONREC
|
||||
| "OBJECT" -> OBJECT
|
||||
| "OF" -> OF
|
||||
| "OPEN" -> OPEN
|
||||
| "OPTLABEL" -> OPTLABEL
|
||||
| "OR" -> OR
|
||||
| "PERCENT" -> PERCENT
|
||||
| "PLUS" -> PLUS
|
||||
| "PLUSDOT" -> PLUSDOT
|
||||
| "PLUSEQ" -> PLUSEQ
|
||||
| "PREFIXOP" -> PREFIXOP
|
||||
| "PRIVATE" -> PRIVATE
|
||||
| "QUESTION" -> QUESTION
|
||||
| "QUOTE" -> QUOTE
|
||||
| "RBRACE" -> RBRACE
|
||||
| "RBRACKET" -> RBRACKET
|
||||
| "REC" -> REC
|
||||
| "RPAREN" -> RPAREN
|
||||
| "SEMI" -> SEMI
|
||||
| "SEMISEMI" -> SEMISEMI
|
||||
| "SIG" -> SIG
|
||||
| "STAR" -> STAR
|
||||
| "STRING" -> STRING
|
||||
| "STRUCT" -> STRUCT
|
||||
| "THEN" -> THEN
|
||||
| "TILDE" -> TILDE
|
||||
| "TO" -> TO
|
||||
| "TRUE" -> TRUE
|
||||
| "TRY" -> TRY
|
||||
| "TYPE" -> TYPE
|
||||
| "UIDENT" -> UIDENT
|
||||
| "UNDERSCORE" -> UNDERSCORE
|
||||
| "VAL" -> VAL
|
||||
| "VIRTUAL" -> VIRTUAL
|
||||
| "WHEN" -> WHEN
|
||||
| "WHILE" -> WHILE
|
||||
| "WITH" -> WITH
|
||||
| token -> failwith ("Unknown token: " ^ token)
|
||||
|
||||
module Stat = Menhir_stat.Recorder(Raw_parser.MenhirInterpreter)
|
||||
|
||||
let word_iter str =
|
||||
let pos = ref 0 in
|
||||
fun () ->
|
||||
let i = !pos in
|
||||
match String.index_from str i ' ' with
|
||||
| exception Not_found ->
|
||||
let len = String.length str in
|
||||
if i < len then
|
||||
(pos := len; Some (String.sub str i (len - i)))
|
||||
else
|
||||
None
|
||||
| j ->
|
||||
pos := j + 1;
|
||||
Some (String.sub str i (j - i))
|
||||
|
||||
let parse_line str =
|
||||
let f = word_iter str in
|
||||
match f () with
|
||||
| Some "interface:" -> Some (`Intf, f)
|
||||
| Some "implementation:" -> Some (`Impl, f)
|
||||
| Some str ->
|
||||
failwith ("Unexpected input " ^ str ^
|
||||
", expecting \"interface:\" or \"implementation:\"")
|
||||
| None -> None
|
||||
|
||||
let parse entry words =
|
||||
let supplier () =
|
||||
let token = match words () with
|
||||
| Some tok -> token_of_string tok
|
||||
| None -> EOF
|
||||
in
|
||||
(token, Lexing.dummy_pos, Lexing.dummy_pos)
|
||||
in
|
||||
MenhirInterpreter.loop supplier (entry Lexing.dummy_pos)
|
||||
|
||||
let () =
|
||||
let rec aux () =
|
||||
match input_line stdin with
|
||||
| line ->
|
||||
begin match parse_line line with
|
||||
| Some (`Intf, words) -> parse Incremental.interface words
|
||||
| Some (`Impl, words) -> parse Incremental.implementation words
|
||||
| None -> ()
|
||||
end;
|
||||
aux ()
|
||||
| exception End_of_file ->
|
||||
()
|
||||
in
|
||||
aux ()
|
Loading…
Reference in New Issue