Quoted extensions in comments, ocamllex and ocamlyacc (#9166)

* Support quoted extensions in comments
* Support quoted extensions in ocamllex
* Support quoted extensions in ocamlyacc
* Fix copying of comments in ocamlyacc
This commit is contained in:
Pieter Goetschalckx 2020-02-03 09:55:28 +00:00 committed by GitHub
parent c7b1fe8513
commit 99224a96b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 58 additions and 17 deletions

View File

@ -5,9 +5,9 @@ Working version
### Language features
- #8820: quoted extensions: {%foo|...|} is lighter syntax for
- #8820, #9166: quoted extensions: {%foo|...|} is lighter syntax for
[%foo {||}], and {%foo bar|...|bar} for [%foo {bar|...|bar}].
(Gabriel Radanne, Leo White and Gabriel Scherer,
(Gabriel Radanne, Leo White, Gabriel Scherer and Pieter Goetschalckx,
request by Bikal Lem)
- #6673, #1132: Relax the handling of explicit polymorphic types

View File

@ -127,6 +127,11 @@ let identbody =
let backslash_escapes =
['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
let lowercase = ['a'-'z' '_']
let ident = identstart identbody*
let extattrident = ident ('.' ident)*
let blank = [' ' '\009' '\012']
rule main = parse
[' ' '\013' '\009' '\012' ] +
{ main lexbuf }
@ -144,7 +149,7 @@ rule main = parse
handle_lexical_error comment lexbuf;
main lexbuf }
| '_' { Tunderscore }
| identstart identbody *
| ident
{ match Lexing.lexeme lexbuf with
"rule" -> Trule
| "parse" -> Tparse
@ -270,7 +275,7 @@ and quoted_string delim = parse
quoted_string delim lexbuf }
| eof
{ raise (Lexical_error ("unterminated string", "", 0, 0)) }
| '|' (['a'-'z' '_'] * as delim') '}'
| '|' (lowercase* as delim') '}'
{ if delim <> delim' then
quoted_string delim lexbuf }
| _
@ -293,7 +298,7 @@ and comment = parse
string lexbuf;
reset_string_buffer();
comment lexbuf }
| '{' (['a'-'z' '_'] * as delim) '|'
| '{' ('%' '%'? extattrident blank*)? (lowercase* as delim) "|"
{ quoted_string delim lexbuf;
comment lexbuf }
| "'"
@ -304,7 +309,7 @@ and comment = parse
| '\010'
{ incr_loc lexbuf 0;
comment lexbuf }
| identstart identbody *
| ident
{ comment lexbuf }
| _
{ comment lexbuf }
@ -321,7 +326,7 @@ and action = parse
handle_lexical_error string lexbuf;
reset_string_buffer();
action lexbuf }
| '{' (['a'-'z' '_'] * as delim) '|'
| '{' ('%' '%'? extattrident blank*)? (lowercase* as delim) "|"
{ quoted_string delim lexbuf;
action lexbuf }
| "'"
@ -336,7 +341,7 @@ and action = parse
| '\010'
{ incr_loc lexbuf 0;
action lexbuf }
| identstart identbody *
| ident
{ action lexbuf }
| _
{ action lexbuf }

View File

@ -620,7 +620,7 @@ and comment = parse
is_in_string := false;
store_string_char '\"';
comment lexbuf }
| "{" (lowercase* as delim) "|"
| "{" ('%' '%'? extattrident blank*)? (lowercase* as delim) "|"
{
string_start_loc := Location.curr lexbuf;
store_lexeme lexbuf;
@ -639,7 +639,6 @@ and comment = parse
store_string delim;
store_string_char '}';
comment lexbuf }
| "\'\'"
{ store_lexeme lexbuf; comment lexbuf }
| "\'" newline "\'"
@ -670,7 +669,7 @@ and comment = parse
store_lexeme lexbuf;
comment lexbuf
}
| (lowercase | uppercase) identchar *
| ident
{ store_lexeme lexbuf; comment lexbuf }
| _
{ store_lexeme lexbuf; comment lexbuf }

View File

@ -30,3 +30,13 @@ let {%M.foo bar| <hello>{|x|} |bar}
{x}
</hello>
|}
(* Double quotes inside quoted strings inside comments *)
(* {|"|}, and *)
(* [%foo {|"|}], and *)
(* {%foo|"|} should be valid inside comments *)
(* Comment delimiters inside quoted strings inside comments: *)
(* {|*)|}, and *)
(* [%foo {bar|*)|bar}], and *)
(* {%foo bar|*)|bar} should be valid inside comments *)

View File

@ -14,3 +14,4 @@ rule token = parse
| 'c' { f1 "\u{1F42B}" }
| 'd' { f1 {|}|} }
| 'e' { (* " *) } (* " *) }
| 'f' { (* {%foo bar| *) } (* |bar} *) }

View File

@ -8,6 +8,11 @@ open Gram_aux
let () =
let f' = ignore in
f' '"'
(* test {|*)|}, {%foo|*)|} and {%%f.oo bar|*)|bar} *)
(* test {%foo {%| *)
let () = ignore {foo||foo}
%}
%token <string> Tident

View File

@ -261,9 +261,30 @@ void process_apostrophe_body(FILE *f)
static void process_open_curly_bracket(FILE *f) {
if (In_bitmap(caml_ident_start, *cptr) || *cptr == '|')
char *idcptr = cptr;
if (*idcptr == '%') {
if (*++idcptr == '%') idcptr++;
if (In_bitmap(caml_ident_start, *idcptr)) {
idcptr++;
while (In_bitmap(caml_ident_body, *idcptr)) idcptr++;
while (*idcptr == '.') {
idcptr++;
if (In_bitmap(caml_ident_start, *idcptr)) {
idcptr++;
while (In_bitmap(caml_ident_body, *idcptr)) idcptr++;
}
}
while (*idcptr == ' ' || *idcptr == 9 || *idcptr == 12) idcptr++;
} else {
return;
}
}
if (In_bitmap(caml_ident_start, *idcptr) || *idcptr == '|')
{
char *newcptr = cptr;
char *newcptr = idcptr;
size_t size = 0;
char *buf;
while(In_bitmap(caml_ident_body, *newcptr)) { newcptr++; }
@ -273,13 +294,13 @@ static void process_open_curly_bracket(FILE *f) {
char *s_line;
char *s_cptr;
size = newcptr - cptr;
size = newcptr - idcptr;
buf = MALLOC(size + 2);
if (!buf) no_space();
memcpy(buf, cptr, size);
memcpy(buf, idcptr, size);
buf[size] = '}';
buf[size + 1] = '\0';
fwrite(cptr, 1, size + 1, f);
fwrite(cptr, 1, newcptr - cptr + 1, f);
cptr = newcptr + 1;
s_lineno = lineno;
s_line = dup_line();
@ -369,7 +390,7 @@ static void process_comment(FILE *const f) {
continue;
default:
if (In_bitmap(caml_ident_start, c)) {
while (In_bitmap(caml_ident_body, *cptr)) cptr++;
while (In_bitmap(caml_ident_body, *cptr)) putc(*cptr++, f);
}
continue;
}