diff --git a/ext/rbs_extension/lexer.h b/ext/rbs_extension/lexer.h index 912bd6720..583224767 100644 --- a/ext/rbs_extension/lexer.h +++ b/ext/rbs_extension/lexer.h @@ -119,6 +119,8 @@ typedef struct { * */ typedef struct { VALUE string; + int start_pos; /* The character position that defines the start of the input */ + int end_pos; /* The character position that defines the end of the input */ position current; /* The current position */ position start; /* The start position of the current token */ bool first_token_of_line; /* This flag is used for tLINECOMMENT */ diff --git a/ext/rbs_extension/lexstate.c b/ext/rbs_extension/lexstate.c index 34a87ea0e..c47e266f4 100644 --- a/ext/rbs_extension/lexstate.c +++ b/ext/rbs_extension/lexstate.c @@ -100,9 +100,14 @@ int token_bytes(token tok) { } unsigned int peek(lexstate *state) { - unsigned int c = rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string)); - state->last_char = c; - return c; + if (state->current.char_pos == state->end_pos) { + state->last_char = '\0'; + return 0; + } else { + unsigned int c = rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string)); + state->last_char = c; + return c; + } } token next_token(lexstate *state, enum TokenType type) { @@ -137,6 +142,7 @@ void skip(lexstate *state) { void skipn(lexstate *state, size_t size) { for (size_t i = 0; i < size; i ++) { + peek(state); skip(state); } } diff --git a/ext/rbs_extension/parser.c b/ext/rbs_extension/parser.c index 35213162b..232db62d0 100644 --- a/ext/rbs_extension/parser.c +++ b/ext/rbs_extension/parser.c @@ -2502,12 +2502,19 @@ VALUE parse_signature(parserstate *state) { } static VALUE -rbsparser_parse_type(VALUE self, VALUE buffer, VALUE line, VALUE column, VALUE variables) +rbsparser_parse_type(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos, VALUE variables, VALUE requires_eof) { - parserstate *parser = alloc_parser(buffer, FIX2INT(line), FIX2INT(column), variables); + parserstate *parser = alloc_parser(buffer, FIX2INT(start_pos), FIX2INT(end_pos), variables); + + if (parser->next_token.type == pEOF) { + return Qnil; + } VALUE type = parse_type(parser); - parser_advance_assert(parser, pEOF); + + if (RTEST(requires_eof)) { + parser_advance_assert(parser, pEOF); + } free_parser(parser); @@ -2515,19 +2522,29 @@ rbsparser_parse_type(VALUE self, VALUE buffer, VALUE line, VALUE column, VALUE v } static VALUE -rbsparser_parse_method_type(VALUE self, VALUE buffer, VALUE line, VALUE column, VALUE variables) +rbsparser_parse_method_type(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos, VALUE variables, VALUE requires_eof) { - parserstate *parser = alloc_parser(buffer, FIX2INT(line), FIX2INT(column), variables); + parserstate *parser = alloc_parser(buffer, FIX2INT(start_pos), FIX2INT(end_pos), variables); + + if (parser->next_token.type == pEOF) { + return Qnil; + } + VALUE method_type = parse_method_type(parser); - free(parser); + + if (RTEST(requires_eof)) { + parser_advance_assert(parser, pEOF); + } + + free_parser(parser); return method_type; } static VALUE -rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE line, VALUE column) +rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE end_pos) { - parserstate *parser = alloc_parser(buffer, FIX2INT(line), FIX2INT(column), Qnil); + parserstate *parser = alloc_parser(buffer, 0, FIX2INT(end_pos), Qnil); VALUE signature = parse_signature(parser); free_parser(parser); @@ -2536,7 +2553,7 @@ rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE line, VALUE column) void rbs__init_parser(void) { RBS_Parser = rb_define_class_under(RBS, "Parser", rb_cObject); - rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 4); - rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 4); - rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 3); + rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 5); + rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 5); + rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 2); } diff --git a/ext/rbs_extension/parserstate.c b/ext/rbs_extension/parserstate.c index fc97579f2..adec56b3f 100644 --- a/ext/rbs_extension/parserstate.c +++ b/ext/rbs_extension/parserstate.c @@ -272,13 +272,15 @@ VALUE comment_to_ruby(comment *com, VALUE buffer) { ); } -parserstate *alloc_parser(VALUE buffer, int line, int column, VALUE variables) { +parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variables) { VALUE string = rb_funcall(buffer, rb_intern("content"), 0); lexstate *lexer = calloc(1, sizeof(lexstate)); lexer->string = string; - lexer->current.line = line; - lexer->current.column = column; + lexer->current.line = 1; + lexer->start_pos = start_pos; + lexer->end_pos = end_pos; + skipn(lexer, start_pos); lexer->start = lexer->current; lexer->first_token_of_line = lexer->current.column == 0; diff --git a/ext/rbs_extension/parserstate.h b/ext/rbs_extension/parserstate.h index 0678c4c97..e42bf7cd8 100644 --- a/ext/rbs_extension/parserstate.h +++ b/ext/rbs_extension/parserstate.h @@ -101,7 +101,7 @@ bool parser_typevar_member(parserstate *state, ID id); * alloc_parser(buffer, 3, 5, Qnil) // New parserstate without variables * ``` * */ -parserstate *alloc_parser(VALUE buffer, int line, int column, VALUE variables); +parserstate *alloc_parser(VALUE buffer, int start_pos, int end_pos, VALUE variables); void free_parser(parserstate *parser); /** * Advance one token. diff --git a/lib/rbs/parser_aux.rb b/lib/rbs/parser_aux.rb index 1b9e41324..731c3fbf3 100644 --- a/lib/rbs/parser_aux.rb +++ b/lib/rbs/parser_aux.rb @@ -2,16 +2,19 @@ module RBS class Parser - def self.parse_type(source, line: 1, column: 0, variables: []) - _parse_type(buffer(source), line, column, variables) + def self.parse_type(source, line: nil, column: nil, range: nil, variables: []) + buf = buffer(source) + _parse_type(buf, range&.begin || 0, range&.end || buf.last_position, variables, range.nil?) end - def self.parse_method_type(source, line: 1, column: 0, variables: []) - _parse_method_type(buffer(source), line, column, variables) + def self.parse_method_type(source, line: nil, column: nil, range: nil, variables: []) + buf = buffer(source) + _parse_method_type(buf, range&.begin || 0, range&.end || buf.last_position, variables, range.nil?) end - def self.parse_signature(source, line: 1, column: 0) - _parse_signature(buffer(source), line, column) + def self.parse_signature(source, line: nil, column: nil) + buf = buffer(source) + _parse_signature(buf, buf.last_position) end def self.buffer(source) diff --git a/sig/parser.rbs b/sig/parser.rbs index 91d12f5a5..cd64735f5 100644 --- a/sig/parser.rbs +++ b/sig/parser.rbs @@ -1,10 +1,43 @@ module RBS class Parser - def self.parse_method_type: (Buffer | String, ?line: Integer, ?column: Integer, ?variables: Array[Symbol]) -> MethodType - - def self.parse_type: (Buffer | String, ?line: Integer, ?column: Integer, ?variables: Array[Symbol]) -> Types::t - - def self.parse_signature: (Buffer | String, ?line: Integer, ?column: Integer) -> Array[AST::Declarations::t] + # Parse a method type and return it + # + # When `pos` keyword is specified, skips the first `pos` characters from the input. + # If no token is left in the input, it returns `nil`. + # + # ```ruby + # RBS::Parser.parse_method_type("() -> void", range: 0...) # => `() -> void` + # RBS::Parser.parse_method_type("() -> void () -> String", range: 11...) # => `() -> String` + # RBS::Parser.parse_method_type("() -> void () -> String", range: 23...) # => nil + # ``` + # + # `line` and `column` is deprecated and are ignored. + # + def self.parse_method_type: (Buffer | String, range: Range[Integer?], ?variables: Array[Symbol]) -> MethodType? + | (Buffer | String, ?line: top, ?column: top, ?variables: Array[Symbol]) -> MethodType + + # Parse a type and return it + # + # When `pos` keyword is specified, skips the first `pos` characters from the input. + # If no token is left in the input, it returns `nil`. + # + # ```ruby + # RBS::Parser.parse_type("String", range: 0...) # => `String` + # RBS::Parser.parse_type("String Integer", pos: 7...) # => `Integer` + # RBS::Parser.parse_type("String Integer", pos: 14...) # => nil + # ``` + # + # `line` and `column` is deprecated and are ignored. + # + def self.parse_type: (Buffer | String, range: Range[Integer?], ?variables: Array[Symbol]) -> Types::t? + | (Buffer | String, ?line: top, ?column: top, ?variables: Array[Symbol]) -> Types::t + + # Parse whole RBS file and return an array of declarations + # + # `line` and `column` is deprecated and are ignored. + # + def self.parse_signature: (Buffer | String) -> Array[AST::Declarations::t] + | (Buffer | String, ?line: top, ?column: top) -> Array[AST::Declarations::t] KEYWORDS: Hash[String, bot] @@ -12,14 +45,11 @@ module RBS def self.buffer: (String | Buffer source) -> Buffer - %a{no-defn} - def self._parse_type: (Buffer, Integer line, Integer column, Array[Symbol] variables) -> Types::t + def self._parse_type: (Buffer, Integer start_pos, Integer end_pos, Array[Symbol] variables, boolish eof) -> Types::t? - %a{no-defn} - def self._parse_method_type: (Buffer, Integer line, Integer column, Array[Symbol] variables) -> MethodType + def self._parse_method_type: (Buffer, Integer start_pos, Integer end_pos, Array[Symbol] variables, boolish eof) -> MethodType? - %a{no-defn} - def self._parse_signature: (Buffer, Integer line, Integer column) -> Array[AST::Declarations::t] + def self._parse_signature: (Buffer, Integer end_pos) -> Array[AST::Declarations::t] class LocatedValue end diff --git a/test/rbs/parser_test.rb b/test/rbs/parser_test.rb index 0ec73b269..62254d32b 100644 --- a/test/rbs/parser_test.rb +++ b/test/rbs/parser_test.rb @@ -647,4 +647,57 @@ def test_neline_inconsitency RBS::Parser.parse_signature(code) end + + def test_buffer_location + code = buffer("type1 type2 type3") + + RBS::Parser.parse_type(code, range: 0...).tap do |type| + assert_equal "type1", type.to_s + assert_equal 0...5, type.location.range + end + + RBS::Parser.parse_type(code, range: 5...).tap do |type| + assert_equal "type2", type.to_s + assert_equal 6...11, type.location.range + assert_equal 1, type.location.start_line + assert_equal 6, type.location.start_column + assert_equal 1, type.location.end_line + assert_equal 11, type.location.end_column + end + + RBS::Parser.parse_type(code, range: 5...).tap do |type| + assert_equal "type2", type.to_s + assert_equal 6...11, type.location.range + assert_equal 1, type.location.start_line + assert_equal 6, type.location.start_column + assert_equal 1, type.location.end_line + assert_equal 11, type.location.end_column + end + + RBS::Parser.parse_type(code, range: 6...8).tap do |type| + assert_equal "ty", type.to_s + assert_equal 6...8, type.location.range + assert_equal 1, type.location.start_line + assert_equal 6, type.location.start_column + assert_equal 1, type.location.end_line + assert_equal 8, type.location.end_column + end + end + + def test_parse_eof_nil + code = buffer("type1 ") + + RBS::Parser.parse_type(code, range: 0...).tap do |type| + assert_equal "type1", type.to_s + assert_equal 0...5, type.location.range + end + + RBS::Parser.parse_type(code, range: 5...).tap do |type| + assert_nil type + end + + RBS::Parser.parse_type(code, range: 5...8).tap do |type| + assert_nil type + end + end end