@@ -6,24 +6,106 @@ typedef struct {
66 int double_quoted;
77 int backticked;
88 int escaped;
9+ int raw_parse_state; // 1: R/r; 2: "/'; 3: (
10+ int raw_dashes; // number of dashes in raw string
11+ int raw_dashes_running; // used to store number of dashes temporarily while parsing
12+ char raw_string_token;
13+ int raw_string;
914} fsm_state;
1015*/
1116
1217
1318void fsm_initialize (fsm_state * s ) {
14- * s = (fsm_state ){0 , 0 , 0 , 0 };
19+ * s = (fsm_state ){0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 };
1520}
1621
1722
1823void fsm_feed (fsm_state * state , const char c ) {
24+ if (state -> raw_string == 0 ) {
25+ if (state -> raw_parse_state == 1 ) {
26+ if (c == '\'' ) {
27+ state -> raw_parse_state = 2 ;
28+ state -> single_quoted = 1 ;
29+ state -> raw_dashes_running = 0 ;
30+ return ;
31+ } else if (c == '"' ) {
32+ state -> raw_parse_state = 2 ;
33+ state -> double_quoted = 1 ;
34+ state -> raw_dashes_running = 0 ;
35+ return ;
36+ } else {
37+ state -> raw_parse_state = 0 ;
38+ state -> double_quoted = 0 ;
39+ state -> single_quoted = 0 ;
40+ }
41+ } else if (state -> raw_parse_state == 2 ) {
42+ if (c == '(' || c == '[' || c == '{' ) {
43+ state -> raw_parse_state = 3 ;
44+ state -> raw_dashes = state -> raw_dashes_running ;
45+ state -> raw_string_token = c ;
46+ state -> raw_string = 1 ;
47+ } else if (c == '-' ) {
48+ state -> raw_dashes_running ++ ;
49+ } else {
50+ state -> raw_parse_state = 0 ;
51+ state -> double_quoted = 0 ;
52+ state -> single_quoted = 0 ;
53+ }
54+ return ;
55+ }
56+ } else {
57+ if (state -> raw_parse_state == 3 ) {
58+ if (state -> raw_string_token == '(' && c == ')' ) {
59+ state -> raw_parse_state = 2 ;
60+ } else if (state -> raw_string_token == '[' && c == ']' ) {
61+ state -> raw_parse_state = 2 ;
62+ } else if (state -> raw_string_token == '{' && c == '}' ) {
63+ state -> raw_parse_state = 2 ;
64+ } else {
65+ // in raw string
66+ }
67+ } else if (state -> raw_parse_state == 2 ) {
68+ if (state -> raw_dashes_running == 0 ) {
69+ if (state -> single_quoted == 1 && c == '\'' ) {
70+ state -> single_quoted = 0 ;
71+ state -> raw_string = 0 ;
72+ state -> raw_parse_state = 0 ;
73+ } else if (state -> double_quoted == 1 && c == '"' ) {
74+ state -> double_quoted = 0 ;
75+ state -> raw_string = 0 ;
76+ state -> raw_parse_state = 0 ;
77+ } else {
78+ state -> raw_dashes_running = state -> raw_dashes ;
79+ state -> raw_parse_state = 3 ;
80+ }
81+ } else if (state -> raw_dashes_running > 0 && c == '-' ) {
82+ state -> raw_dashes_running -- ;
83+ } else {
84+ state -> raw_dashes_running = state -> raw_dashes ;
85+ state -> raw_parse_state = 3 ;
86+ }
87+ }
88+ return ;
89+ }
1990 if (state -> escaped == 1 ) {
2091 state -> escaped = 0 ;
21- } else if (state -> backticked == 1 && c == '`' ) {
92+ return ;
93+ }
94+ if (state -> backticked == 1 && c == '`' ) {
2295 state -> backticked = 0 ;
23- } else if (state -> single_quoted == 1 && c == '\'' ) {
96+ return ;
97+ }
98+ if (state -> single_quoted == 1 && c == '\'' ) {
2499 state -> single_quoted = 0 ;
25- } else if (state -> double_quoted == 1 && c == '\"' ) {
100+ return ;
101+ }
102+ if (state -> double_quoted == 1 && c == '\"' ) {
26103 state -> double_quoted = 0 ;
104+ return ;
105+ }
106+
107+ if (c == 'R' || c == 'r' ) {
108+ state -> raw_parse_state = 1 ;
27109 } else if (c == '\\' ) {
28110 state -> escaped = 1 ;
29111 } else if (state -> single_quoted || state -> double_quoted || state -> backticked ) {
@@ -32,7 +114,7 @@ void fsm_feed(fsm_state* state, const char c) {
32114 state -> backticked = 1 ;
33115 } else if (c == '\'' ) {
34116 state -> single_quoted = 1 ;
35- } else if (c == '\ "' ) {
117+ } else if (c == '"' ) {
36118 state -> double_quoted = 1 ;
37119 }
38120}
0 commit comments