Skip to content

Commit 47a93db

Browse files
authored
Merge pull request #276 from REditorSupport/raw_string_search
2 parents 9287868 + 5d72968 commit 47a93db

File tree

3 files changed

+108
-5
lines changed

3 files changed

+108
-5
lines changed

src/fsm.c

Lines changed: 87 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,24 +6,106 @@ typedef struct {
66
int double_quoted;
77
int backticked;
88
int escaped;
9+
int raw_parse_state; // 1: R/r; 2: "/'; 3: (
10+
int raw_dashes; // number of dashes in raw string
11+
int raw_dashes_running; // used to store number of dashes temporarily while parsing
12+
char raw_string_token;
13+
int raw_string;
914
} fsm_state;
1015
*/
1116

1217

1318
void fsm_initialize(fsm_state* s) {
14-
*s = (fsm_state){0, 0, 0, 0};
19+
*s = (fsm_state){0, 0, 0, 0, 0, 0, 0, 0, 0};
1520
}
1621

1722

1823
void fsm_feed(fsm_state* state, const char c) {
24+
if (state->raw_string == 0) {
25+
if (state->raw_parse_state == 1) {
26+
if (c == '\'') {
27+
state->raw_parse_state = 2;
28+
state->single_quoted = 1;
29+
state->raw_dashes_running = 0;
30+
return;
31+
} else if (c == '"') {
32+
state->raw_parse_state = 2;
33+
state->double_quoted = 1;
34+
state->raw_dashes_running = 0;
35+
return;
36+
} else {
37+
state->raw_parse_state = 0;
38+
state->double_quoted = 0;
39+
state->single_quoted = 0;
40+
}
41+
} else if (state->raw_parse_state == 2) {
42+
if (c == '(' || c == '[' || c == '{') {
43+
state->raw_parse_state = 3;
44+
state->raw_dashes = state->raw_dashes_running;
45+
state->raw_string_token = c;
46+
state->raw_string = 1;
47+
} else if (c == '-') {
48+
state->raw_dashes_running++;
49+
} else {
50+
state->raw_parse_state = 0;
51+
state->double_quoted = 0;
52+
state->single_quoted = 0;
53+
}
54+
return;
55+
}
56+
} else {
57+
if (state->raw_parse_state == 3) {
58+
if (state->raw_string_token == '(' && c == ')') {
59+
state->raw_parse_state = 2;
60+
} else if (state->raw_string_token == '[' && c == ']') {
61+
state->raw_parse_state = 2;
62+
} else if (state->raw_string_token == '{' && c == '}') {
63+
state->raw_parse_state = 2;
64+
} else {
65+
// in raw string
66+
}
67+
} else if (state->raw_parse_state == 2) {
68+
if (state->raw_dashes_running == 0) {
69+
if (state->single_quoted == 1 && c == '\'') {
70+
state->single_quoted = 0;
71+
state->raw_string = 0;
72+
state->raw_parse_state = 0;
73+
} else if (state->double_quoted == 1 && c == '"') {
74+
state->double_quoted = 0;
75+
state->raw_string = 0;
76+
state->raw_parse_state = 0;
77+
} else {
78+
state->raw_dashes_running = state->raw_dashes;
79+
state->raw_parse_state = 3;
80+
}
81+
} else if (state->raw_dashes_running > 0 && c == '-') {
82+
state->raw_dashes_running--;
83+
} else {
84+
state->raw_dashes_running = state->raw_dashes;
85+
state->raw_parse_state = 3;
86+
}
87+
}
88+
return;
89+
}
1990
if (state->escaped == 1) {
2091
state->escaped = 0;
21-
} else if (state->backticked == 1 && c == '`') {
92+
return;
93+
}
94+
if (state->backticked == 1 && c == '`') {
2295
state->backticked = 0;
23-
} else if (state->single_quoted == 1 && c == '\'') {
96+
return;
97+
}
98+
if (state->single_quoted == 1 && c == '\'') {
2499
state->single_quoted = 0;
25-
} else if (state->double_quoted == 1 && c == '\"') {
100+
return;
101+
}
102+
if (state->double_quoted == 1 && c == '\"') {
26103
state->double_quoted = 0;
104+
return;
105+
}
106+
107+
if (c == 'R' || c == 'r') {
108+
state->raw_parse_state = 1;
27109
} else if (c == '\\') {
28110
state->escaped = 1;
29111
} else if (state->single_quoted || state->double_quoted || state->backticked) {
@@ -32,7 +114,7 @@ void fsm_feed(fsm_state* state, const char c) {
32114
state->backticked = 1;
33115
} else if (c == '\'') {
34116
state->single_quoted = 1;
35-
} else if (c == '\"') {
117+
} else if (c == '"') {
36118
state->double_quoted = 1;
37119
}
38120
}

src/fsm.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,11 @@ typedef struct {
66
int double_quoted;
77
int backticked;
88
int escaped;
9+
int raw_parse_state; // 1: R/r; 2: "/'; 3: (
10+
int raw_dashes; // number of dashes in raw string
11+
int raw_dashes_running; // used to store number of dashes temporarily while parsing
12+
char raw_string_token;
13+
int raw_string;
914
} fsm_state;
1015

1116

tests/testthat/test-search.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,16 @@ test_that("enclosed_by_quotes works as expected", {
55
expect_false(enclosed("hello_world", 4))
66
expect_true(enclosed("'hello_world", 4))
77
expect_true(enclosed("\"hello_world", 4))
8+
expect_true(enclosed("R\"(hello_world", 4))
9+
expect_true(enclosed("r\"(hello_world", 4))
10+
expect_true(enclosed("R\"-(hello_world", 5))
11+
expect_true(enclosed("r\"-(hello_world", 5))
812
expect_true(enclosed("\"\\\"hello_world", 4))
913
expect_false(enclosed("\"a\"hello_world", 4))
14+
expect_false(enclosed("R\"(a)\"hello_world", 7))
15+
expect_false(enclosed("R\"-(a)-\"hello_world", 9))
16+
expect_true(enclosed("R\"-(a)--\"hello_world", 9))
17+
expect_true(enclosed("R\"-(a)\"hello_world", 9))
1018
expect_true(enclosed("'\\'hello_world", 4))
1119
expect_false(enclosed("'a'hello_world", 4))
1220
})
@@ -32,6 +40,14 @@ test_that("find_unbalanced_bracket works as expected", {
3240
expect_equal(bsearch("foo(\"xyz(bar", 0, 10)[[1]], c(0, 3))
3341
expect_equal(bsearch("foo('xyz', bar", 0, 10)[[1]], c(0, 3))
3442
expect_equal(bsearch("foo(\"xyz\", bar", 0, 10)[[1]], c(0, 3))
43+
expect_equal(bsearch("foo(R\"(x\"yz)\", bar", 0, 15)[[1]], c(0, 3))
44+
expect_equal(bsearch("foo(r\"(x\"yz)\", bar", 0, 15)[[1]], c(0, 3))
45+
expect_equal(bsearch("foo(R\"(x\"yz)\", bar", 0, 15)[[1]], c(0, 3))
46+
expect_equal(bsearch("foo(r\"(x\"yz)\", bar", 0, 15)[[1]], c(0, 3))
47+
expect_equal(bsearch("foo(\"(x\"yz)\", bar", 0, 15)[[1]], c(-1, -1))
48+
expect_equal(bsearch("foo(\"(x\"yz)\", bar", 0, 15)[[1]], c(-1, -1))
49+
expect_equal(bsearch("foo(\"(x\"yz)\", bar", 0, 15)[[1]], c(-1, -1))
50+
expect_equal(bsearch("foo(\"(x\"yz)\", bar", 0, 15)[[1]], c(-1, -1))
3551
expect_equal(bsearch("𐐀𐐀𐐀(𐐀𐐀𐐀", 0, 5)[[1]], c(0, 3))
3652

3753
# multiline

0 commit comments

Comments
 (0)