Skip to content

Commit 8a1d8f7

Browse files
committed
Initial Attempt at Supporting Overriding Core Delimiter Processors
1 parent 22d6637 commit 8a1d8f7

9 files changed

Lines changed: 204 additions & 9 deletions

File tree

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ all of them via methods on `Parser.Builder`
258258
- Parsing of inline content can be extended/overridden with `customInlineContentParserFactory`
259259
- Parsing of [delimiters](https://spec.commonmark.org/0.31.2/#emphasis-and-strong-emphasis) in inline content can be
260260
extended with `customDelimiterProcessor`
261+
- Core built-in delimiter parsing can be replaced with `overrideDelimiterProcessor`
262+
to support alternate emphasis syntax such as single `_` meaning emphasis and
263+
single `*` meaning strong emphasis.
261264
- Processing of links can be customized with `linkProcessor` and `linkMarker`
262265

263266
#### Thread-safety

commonmark/src/main/java/org/commonmark/internal/DocumentParser.java

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ public class DocumentParser implements ParserState {
7373
private final InlineParserFactory inlineParserFactory;
7474
private final List<InlineContentParserFactory> inlineContentParserFactories;
7575
private final List<DelimiterProcessor> delimiterProcessors;
76+
private final List<DelimiterProcessor> overrideDelimiterProcessors;
7677
private final List<LinkProcessor> linkProcessors;
7778
private final Set<Character> linkMarkers;
7879
private final IncludeSourceSpans includeSourceSpans;
@@ -85,12 +86,13 @@ public class DocumentParser implements ParserState {
8586

8687
public DocumentParser(List<BlockParserFactory> blockParserFactories, InlineParserFactory inlineParserFactory,
8788
List<InlineContentParserFactory> inlineContentParserFactories, List<DelimiterProcessor> delimiterProcessors,
88-
List<LinkProcessor> linkProcessors, Set<Character> linkMarkers,
89-
IncludeSourceSpans includeSourceSpans, int maxOpenBlockParsers) {
89+
List<DelimiterProcessor> overrideDelimiterProcessors, List<LinkProcessor> linkProcessors,
90+
Set<Character> linkMarkers, IncludeSourceSpans includeSourceSpans, int maxOpenBlockParsers) {
9091
this.blockParserFactories = blockParserFactories;
9192
this.inlineParserFactory = inlineParserFactory;
9293
this.inlineContentParserFactories = inlineContentParserFactories;
9394
this.delimiterProcessors = delimiterProcessors;
95+
this.overrideDelimiterProcessors = overrideDelimiterProcessors;
9496
this.linkProcessors = linkProcessors;
9597
this.linkMarkers = linkMarkers;
9698
this.includeSourceSpans = includeSourceSpans;
@@ -481,7 +483,8 @@ private BlockStartImpl findBlockStart(BlockParser blockParser) {
481483
* Walk through a block & children recursively, parsing string content into inline content where appropriate.
482484
*/
483485
private void processInlines() {
484-
var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, definitions);
486+
var context = new InlineParserContextImpl(inlineContentParserFactories, delimiterProcessors,
487+
overrideDelimiterProcessors, linkProcessors, linkMarkers, definitions);
485488
var inlineParser = inlineParserFactory.create(context);
486489

487490
for (var blockParser : allBlockParsers) {

commonmark/src/main/java/org/commonmark/internal/InlineParserContextImpl.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,17 +13,20 @@ public class InlineParserContextImpl implements InlineParserContext {
1313

1414
private final List<InlineContentParserFactory> inlineContentParserFactories;
1515
private final List<DelimiterProcessor> delimiterProcessors;
16+
private final List<DelimiterProcessor> overrideDelimiterProcessors;
1617
private final List<LinkProcessor> linkProcessors;
1718
private final Set<Character> linkMarkers;
1819
private final Definitions definitions;
1920

2021
public InlineParserContextImpl(List<InlineContentParserFactory> inlineContentParserFactories,
2122
List<DelimiterProcessor> delimiterProcessors,
23+
List<DelimiterProcessor> overrideDelimiterProcessors,
2224
List<LinkProcessor> linkProcessors,
2325
Set<Character> linkMarkers,
2426
Definitions definitions) {
2527
this.inlineContentParserFactories = inlineContentParserFactories;
2628
this.delimiterProcessors = delimiterProcessors;
29+
this.overrideDelimiterProcessors = overrideDelimiterProcessors;
2730
this.linkProcessors = linkProcessors;
2831
this.linkMarkers = linkMarkers;
2932
this.definitions = definitions;
@@ -39,6 +42,11 @@ public List<DelimiterProcessor> getCustomDelimiterProcessors() {
3942
return delimiterProcessors;
4043
}
4144

45+
@Override
46+
public List<DelimiterProcessor> getOverrideDelimiterProcessors() {
47+
return overrideDelimiterProcessors;
48+
}
49+
4250
@Override
4351
public List<LinkProcessor> getCustomLinkProcessors() {
4452
return linkProcessors;

commonmark/src/main/java/org/commonmark/internal/InlineParserImpl.java

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public class InlineParserImpl implements InlineParser, InlineParserState {
4242
public InlineParserImpl(InlineParserContext context) {
4343
this.context = context;
4444
this.inlineContentParserFactories = calculateInlineContentParserFactories(context.getCustomInlineContentParserFactories());
45-
this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors());
45+
this.delimiterProcessors = calculateDelimiterProcessors(context.getCustomDelimiterProcessors(), context.getOverrideDelimiterProcessors());
4646
this.linkProcessors = calculateLinkProcessors(context.getCustomLinkProcessors());
4747
this.linkMarkers = calculateLinkMarkers(context.getCustomLinkMarkers());
4848
this.specialCharacters = calculateSpecialCharacters(linkMarkers, this.delimiterProcessors.keySet(), this.inlineContentParserFactories);
@@ -66,13 +66,51 @@ private List<LinkProcessor> calculateLinkProcessors(List<LinkProcessor> linkProc
6666
return list;
6767
}
6868

69-
private static Map<Character, DelimiterProcessor> calculateDelimiterProcessors(List<DelimiterProcessor> delimiterProcessors) {
69+
private static Map<Character, DelimiterProcessor> calculateDelimiterProcessors(List<DelimiterProcessor> delimiterProcessors,
70+
List<DelimiterProcessor> overrideDelimiterProcessors) {
7071
var map = new HashMap<Character, DelimiterProcessor>();
7172
addDelimiterProcessors(List.of(new AsteriskDelimiterProcessor(), new UnderscoreDelimiterProcessor()), map);
73+
addOverrideDelimiterProcessors(overrideDelimiterProcessors, map);
7274
addDelimiterProcessors(delimiterProcessors, map);
7375
return map;
7476
}
7577

78+
private static void addOverrideDelimiterProcessors(Iterable<DelimiterProcessor> delimiterProcessors,
79+
Map<Character, DelimiterProcessor> map) {
80+
for (DelimiterProcessor delimiterProcessor : delimiterProcessors) {
81+
char opening = delimiterProcessor.getOpeningCharacter();
82+
char closing = delimiterProcessor.getClosingCharacter();
83+
if (opening == closing) {
84+
replaceOrAddDelimiterProcessorForChar(opening, delimiterProcessor, map);
85+
} else {
86+
replaceOrAddDelimiterProcessorForChar(opening, delimiterProcessor, map);
87+
replaceOrAddDelimiterProcessorForChar(closing, delimiterProcessor, map);
88+
}
89+
}
90+
}
91+
92+
private static void replaceOrAddDelimiterProcessorForChar(char delimiterChar,
93+
DelimiterProcessor delimiterProcessor,
94+
Map<Character, DelimiterProcessor> delimiterProcessors) {
95+
DelimiterProcessor existing = delimiterProcessors.get(delimiterChar);
96+
if (existing == null) {
97+
delimiterProcessors.put(delimiterChar, delimiterProcessor);
98+
return;
99+
}
100+
if (existing instanceof StaggeredDelimiterProcessor) {
101+
((StaggeredDelimiterProcessor) existing).replace(delimiterProcessor);
102+
return;
103+
}
104+
if (existing.getMinLength() == delimiterProcessor.getMinLength()) {
105+
delimiterProcessors.put(delimiterChar, delimiterProcessor);
106+
return;
107+
}
108+
StaggeredDelimiterProcessor staggered = new StaggeredDelimiterProcessor(delimiterChar);
109+
staggered.add(existing);
110+
staggered.add(delimiterProcessor);
111+
delimiterProcessors.put(delimiterChar, staggered);
112+
}
113+
76114
private static void addDelimiterProcessors(Iterable<DelimiterProcessor> delimiterProcessors, Map<Character, DelimiterProcessor> map) {
77115
for (DelimiterProcessor delimiterProcessor : delimiterProcessors) {
78116
char opening = delimiterProcessor.getOpeningCharacter();

commonmark/src/main/java/org/commonmark/internal/StaggeredDelimiterProcessor.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,19 @@ void add(DelimiterProcessor dp) {
6060
}
6161
}
6262

63+
void replace(DelimiterProcessor dp) {
64+
final int len = dp.getMinLength();
65+
ListIterator<DelimiterProcessor> it = processors.listIterator();
66+
while (it.hasNext()) {
67+
DelimiterProcessor p = it.next();
68+
if (p.getMinLength() == len) {
69+
it.set(dp);
70+
return;
71+
}
72+
}
73+
add(dp);
74+
}
75+
6376
private DelimiterProcessor findProcessor(int len) {
6477
for (DelimiterProcessor p : processors) {
6578
if (p.getMinLength() <= len) {

commonmark/src/main/java/org/commonmark/parser/InlineParserContext.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,12 @@ public interface InlineParserContext {
2525
*/
2626
List<DelimiterProcessor> getCustomDelimiterProcessors();
2727

28+
/**
29+
* @return delimiter processors that have been configured with
30+
* {@link Parser.Builder#overrideDelimiterProcessor(DelimiterProcessor)}
31+
*/
32+
List<DelimiterProcessor> getOverrideDelimiterProcessors();
33+
2834
/**
2935
* @return custom link processors that have been configured with {@link Parser.Builder#linkProcessor}.
3036
*/

commonmark/src/main/java/org/commonmark/parser/Parser.java

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import java.io.Reader;
1818
import java.util.*;
1919

20-
2120
/**
2221
* Parses input text to a tree of nodes.
2322
* <p>
@@ -32,6 +31,7 @@ public class Parser {
3231
private final List<BlockParserFactory> blockParserFactories;
3332
private final List<InlineContentParserFactory> inlineContentParserFactories;
3433
private final List<DelimiterProcessor> delimiterProcessors;
34+
private final List<DelimiterProcessor> overrideDelimiterProcessors;
3535
private final List<LinkProcessor> linkProcessors;
3636
private final Set<Character> linkMarkers;
3737
private final InlineParserFactory inlineParserFactory;
@@ -45,6 +45,7 @@ private Parser(Builder builder) {
4545
this.postProcessors = builder.postProcessors;
4646
this.inlineContentParserFactories = builder.inlineContentParserFactories;
4747
this.delimiterProcessors = builder.delimiterProcessors;
48+
this.overrideDelimiterProcessors = builder.overrideDelimiterProcessors;
4849
this.linkProcessors = builder.linkProcessors;
4950
this.linkMarkers = builder.linkMarkers;
5051
this.includeSourceSpans = builder.includeSourceSpans;
@@ -53,7 +54,8 @@ private Parser(Builder builder) {
5354
// Try to construct an inline parser. Invalid configuration might result in an exception, which we want to
5455
// detect as soon as possible.
5556
var context = new InlineParserContextImpl(
56-
inlineContentParserFactories, delimiterProcessors, linkProcessors, linkMarkers, new Definitions());
57+
inlineContentParserFactories, delimiterProcessors, overrideDelimiterProcessors,
58+
linkProcessors, linkMarkers, new Definitions());
5759
this.inlineParserFactory.create(context);
5860
}
5961

@@ -108,7 +110,7 @@ public Node parseReader(Reader input) throws IOException {
108110

109111
private DocumentParser createDocumentParser() {
110112
return new DocumentParser(blockParserFactories, inlineParserFactory, inlineContentParserFactories,
111-
delimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers);
113+
delimiterProcessors, overrideDelimiterProcessors, linkProcessors, linkMarkers, includeSourceSpans, maxOpenBlockParsers);
112114
}
113115

114116
private Node postProcess(Node document) {
@@ -125,6 +127,7 @@ public static class Builder {
125127
private final List<BlockParserFactory> blockParserFactories = new ArrayList<>();
126128
private final List<InlineContentParserFactory> inlineContentParserFactories = new ArrayList<>();
127129
private final List<DelimiterProcessor> delimiterProcessors = new ArrayList<>();
130+
private final List<DelimiterProcessor> overrideDelimiterProcessors = new ArrayList<>();
128131
private final List<LinkProcessor> linkProcessors = new ArrayList<>();
129132
private final List<PostProcessor> postProcessors = new ArrayList<>();
130133
private final Set<Character> linkMarkers = new HashSet<>();
@@ -273,6 +276,23 @@ public Builder customDelimiterProcessor(DelimiterProcessor delimiterProcessor) {
273276
return this;
274277
}
275278

279+
/**
280+
* Add a delimiter processor that replaces the built-in processor for the same delimiter character.
281+
* <p>
282+
* This can be used to override core syntax such as emphasis and strong emphasis parsing.
283+
* The built-in processor for the same delimiter character and minimum length is replaced.
284+
* If a processor with the same delimiter character but a different minimum length exists, it is combined
285+
* using the standard staggered delimiter processor behavior.
286+
*
287+
* @param delimiterProcessor a delimiter processor implementation
288+
* @return {@code this}
289+
*/
290+
public Builder overrideDelimiterProcessor(DelimiterProcessor delimiterProcessor) {
291+
Objects.requireNonNull(delimiterProcessor, "delimiterProcessor must not be null");
292+
overrideDelimiterProcessors.add(delimiterProcessor);
293+
return this;
294+
}
295+
276296
/**
277297
* Add a custom link/image processor for inline parsing.
278298
* <p>

commonmark/src/test/java/org/commonmark/test/InlineParserContextTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,11 @@ public List<DelimiterProcessor> getCustomDelimiterProcessors() {
5353
return inlineParserContext.getCustomDelimiterProcessors();
5454
}
5555

56+
@Override
57+
public List<DelimiterProcessor> getOverrideDelimiterProcessors() {
58+
return inlineParserContext.getOverrideDelimiterProcessors();
59+
}
60+
5661
@Override
5762
public List<LinkProcessor> getCustomLinkProcessors() {
5863
return inlineParserContext.getCustomLinkProcessors();

commonmark/src/test/java/org/commonmark/test/ParserTest.java

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import org.commonmark.node.*;
44
import org.commonmark.parser.*;
5+
import org.commonmark.parser.delimiter.DelimiterProcessor;
6+
import org.commonmark.parser.delimiter.DelimiterRun;
57
import org.commonmark.renderer.html.HtmlRenderer;
68
import org.commonmark.renderer.markdown.MarkdownRenderer;
79
import org.commonmark.testutil.TestResources;
@@ -13,7 +15,6 @@
1315
import java.nio.charset.StandardCharsets;
1416
import java.util.ArrayList;
1517
import java.util.HashSet;
16-
import java.util.List;
1718
import java.util.Set;
1819
import java.util.concurrent.Executors;
1920
import java.util.concurrent.Future;
@@ -111,6 +112,104 @@ public InlineParser create(InlineParserContext inlineParserContext) {
111112
assertThat(parser.parse(input).getFirstChild().getFirstChild()).isInstanceOf(ThematicBreak.class);
112113
}
113114

115+
@Test
116+
public void overrideDelimiterProcessorReplacesBuiltInProcessor() {
117+
Parser parser = Parser.builder().overrideDelimiterProcessor(new DelimiterProcessor() {
118+
@Override
119+
public char getOpeningCharacter() {
120+
return '*';
121+
}
122+
123+
@Override
124+
public char getClosingCharacter() {
125+
return '*';
126+
}
127+
128+
@Override
129+
public int getMinLength() {
130+
return 1;
131+
}
132+
133+
@Override
134+
public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
135+
return 0;
136+
}
137+
}).build();
138+
139+
HtmlRenderer renderer = HtmlRenderer.builder().build();
140+
assertThat(renderer.render(parser.parse("*hello*"))).isEqualTo("<p>*hello*</p>\n");
141+
}
142+
143+
@Test
144+
public void overrideDelimiterProcessorCanSwitchAsteriskAndUnderscoreEmphasisSemantics() {
145+
Parser parser = Parser.builder()
146+
.overrideDelimiterProcessor(new DelimiterProcessor() {
147+
@Override
148+
public char getOpeningCharacter() {
149+
return '*';
150+
}
151+
152+
@Override
153+
public char getClosingCharacter() {
154+
return '*';
155+
}
156+
157+
@Override
158+
public int getMinLength() {
159+
return 1;
160+
}
161+
162+
@Override
163+
public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
164+
Text opener = openingRun.getOpener();
165+
Node closer = closingRun.getCloser();
166+
StrongEmphasis emphasis = new StrongEmphasis("*");
167+
for (Node node = opener.getNext(); node != closer; ) {
168+
Node next = node.getNext();
169+
emphasis.appendChild(node);
170+
node = next;
171+
}
172+
opener.insertAfter(emphasis);
173+
return 1;
174+
}
175+
})
176+
.overrideDelimiterProcessor(new DelimiterProcessor() {
177+
@Override
178+
public char getOpeningCharacter() {
179+
return '_';
180+
}
181+
182+
@Override
183+
public char getClosingCharacter() {
184+
return '_';
185+
}
186+
187+
@Override
188+
public int getMinLength() {
189+
return 1;
190+
}
191+
192+
@Override
193+
public int process(DelimiterRun openingRun, DelimiterRun closingRun) {
194+
Text opener = openingRun.getOpener();
195+
Node closer = closingRun.getCloser();
196+
Emphasis emphasis = new Emphasis("_");
197+
for (Node node = opener.getNext(); node != closer; ) {
198+
Node next = node.getNext();
199+
emphasis.appendChild(node);
200+
node = next;
201+
}
202+
opener.insertAfter(emphasis);
203+
return 1;
204+
}
205+
})
206+
.build();
207+
208+
HtmlRenderer renderer = HtmlRenderer.builder().build();
209+
assertThat(renderer.render(parser.parse("*bold*"))).isEqualTo("<p><strong>bold</strong></p>\n");
210+
assertThat(renderer.render(parser.parse("_italic_"))).isEqualTo("<p><em>italic</em></p>\n");
211+
}
212+
114213
@Test
115214
public void threading() throws Exception {
116215
var parser = Parser.builder().build();

0 commit comments

Comments
 (0)