From 34fde113b525e4691ed5d81ccd01fb3d68538ebb Mon Sep 17 00:00:00 2001 From: Repo Assist Date: Sat, 7 Mar 2026 08:45:36 +0000 Subject: [PATCH 1/2] Precompile markdown parser regexes for improved performance The Punctuation and HtmlEntity active patterns in MarkdownInlineParser.fs were constructing regex pattern strings and calling Regex.Match on each invocation. The Punctuation pattern is called for every character during inline parsing, making the repeated regex construction a hot path. Changes: - Extract punctuationRegex and htmlEntityRegex as module-level compiled Regex values (RegexOptions.Compiled) in MarkdownInlineParser.fs - Limit the input string passed to each regex to its theoretical maximum match length (2 chars for punctuation, 34 chars for entities), avoiding conversion of the entire remaining input list on every call - Extract blockquoteRegex as a module-level compiled Regex in MarkdownBlockParser.fs, removing the per-call string concatenation Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../MarkdownBlockParser.fs | 11 ++++-- .../MarkdownInlineParser.fs | 39 +++++++++++-------- 2 files changed, 30 insertions(+), 20 deletions(-) diff --git a/src/FSharp.Formatting.Markdown/MarkdownBlockParser.fs b/src/FSharp.Formatting.Markdown/MarkdownBlockParser.fs index 90d30dcd2..ca2a267a9 100644 --- a/src/FSharp.Formatting.Markdown/MarkdownBlockParser.fs +++ b/src/FSharp.Formatting.Markdown/MarkdownBlockParser.fs @@ -594,14 +594,17 @@ let (|EmacsTableBlock|_|) (lines) = | _ -> None /// Recognizes a start of a blockquote -let (|BlockquoteStart|_|) (line: string, n: MarkdownRange) = - let regex = +let private blockquoteRegex = + Regex( "^ {0,3}" // Up to three leading spaces + ">" // Blockquote character + "\s?" // Maybe one whitespace character - + "(.*)" // Capture everything else + + "(.*)", // Capture everything else + RegexOptions.Compiled + ) - let match' = Regex.Match(line, regex) +let (|BlockquoteStart|_|) (line: string, n: MarkdownRange) = + let match' = blockquoteRegex.Match(line) if match'.Success then let group = match'.Groups.Item(1) diff --git a/src/FSharp.Formatting.Markdown/MarkdownInlineParser.fs b/src/FSharp.Formatting.Markdown/MarkdownInlineParser.fs index 959bc753a..54939549e 100644 --- a/src/FSharp.Formatting.Markdown/MarkdownInlineParser.fs +++ b/src/FSharp.Formatting.Markdown/MarkdownInlineParser.fs @@ -17,6 +17,21 @@ open FSharp.Formatting.Common // Parsing of Markdown - inline formatting (spans, characters, emphasis, links) // -------------------------------------------------------------------------------------- +// Precompiled regex for the full set of CommonMark Unicode punctuation characters. +// Compiled once at module initialisation to avoid per-call regex construction. +// Surrogate-pair cases require at most 2 chars; all others require 1 char, so callers +// need only supply a 2-char prefix of the remaining input. +let private punctuationRegex = + Regex( + """^[!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]""", + RegexOptions.Compiled + ) + +// Precompiled regex for CommonMark HTML entity and numeric character references. +// Max match length is 34 chars (&, up to 32 name chars, ;), so callers need only +// supply a 34-char prefix of the remaining input. +let private htmlEntityRegex = Regex("^&(?:#x[a-f0-9]{1,8}|#[0-9]{1,8}|[a-z][a-z0-9]{1,31});", RegexOptions.Compiled) + /// Splits a link formatted as `http://link "title"` into a link part /// and an optional title part (may be wrapped using quote or double-quotes) let getLinkAndTitle (StringPosition.TrimBoth(input, _n)) = @@ -54,11 +69,10 @@ let (|Punctuation|_|) input = match input with | EscapedChar _ -> None | _ -> - // from https://github.com/commonmark/commonmark.js/blob/master/lib/inlines.js#L38 - let re = - """^[!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~\xA1\xA7\xAB\xB6\xB7\xBB\xBF\u037E\u0387\u055A-\u055F\u0589\u058A\u05BE\u05C0\u05C3\u05C6\u05F3\u05F4\u0609\u060A\u060C\u060D\u061B\u061E\u061F\u066A-\u066D\u06D4\u0700-\u070D\u07F7-\u07F9\u0830-\u083E\u085E\u0964\u0965\u0970\u0AF0\u0DF4\u0E4F\u0E5A\u0E5B\u0F04-\u0F12\u0F14\u0F3A-\u0F3D\u0F85\u0FD0-\u0FD4\u0FD9\u0FDA\u104A-\u104F\u10FB\u1360-\u1368\u1400\u166D\u166E\u169B\u169C\u16EB-\u16ED\u1735\u1736\u17D4-\u17D6\u17D8-\u17DA\u1800-\u180A\u1944\u1945\u1A1E\u1A1F\u1AA0-\u1AA6\u1AA8-\u1AAD\u1B5A-\u1B60\u1BFC-\u1BFF\u1C3B-\u1C3F\u1C7E\u1C7F\u1CC0-\u1CC7\u1CD3\u2010-\u2027\u2030-\u2043\u2045-\u2051\u2053-\u205E\u207D\u207E\u208D\u208E\u2308-\u230B\u2329\u232A\u2768-\u2775\u27C5\u27C6\u27E6-\u27EF\u2983-\u2998\u29D8-\u29DB\u29FC\u29FD\u2CF9-\u2CFC\u2CFE\u2CFF\u2D70\u2E00-\u2E2E\u2E30-\u2E42\u3001-\u3003\u3008-\u3011\u3014-\u301F\u3030\u303D\u30A0\u30FB\uA4FE\uA4FF\uA60D-\uA60F\uA673\uA67E\uA6F2-\uA6F7\uA874-\uA877\uA8CE\uA8CF\uA8F8-\uA8FA\uA8FC\uA92E\uA92F\uA95F\uA9C1-\uA9CD\uA9DE\uA9DF\uAA5C-\uAA5F\uAADE\uAADF\uAAF0\uAAF1\uABEB\uFD3E\uFD3F\uFE10-\uFE19\uFE30-\uFE52\uFE54-\uFE61\uFE63\uFE68\uFE6A\uFE6B\uFF01-\uFF03\uFF05-\uFF0A\uFF0C-\uFF0F\uFF1A\uFF1B\uFF1F\uFF20\uFF3B-\uFF3D\uFF3F\uFF5B\uFF5D\uFF5F-\uFF65]|\uD800[\uDD00-\uDD02\uDF9F\uDFD0]|\uD801\uDD6F|\uD802[\uDC57\uDD1F\uDD3F\uDE50-\uDE58\uDE7F\uDEF0-\uDEF6\uDF39-\uDF3F\uDF99-\uDF9C]|\uD804[\uDC47-\uDC4D\uDCBB\uDCBC\uDCBE-\uDCC1\uDD40-\uDD43\uDD74\uDD75\uDDC5-\uDDC9\uDDCD\uDDDB\uDDDD-\uDDDF\uDE38-\uDE3D\uDEA9]|\uD805[\uDCC6\uDDC1-\uDDD7\uDE41-\uDE43\uDF3C-\uDF3E]|\uD809[\uDC70-\uDC74]|\uD81A[\uDE6E\uDE6F\uDEF5\uDF37-\uDF3B\uDF44]|\uD82F\uDC9F|\uD836[\uDE87-\uDE8B]""" - - let match' = Regex.Match(Array.ofList input |> String, re) + // Surrogate-pair punctuation cases need at most 2 chars; all other cases need 1. + // Passing only a short prefix avoids converting the entire remaining input list. + let prefix = input |> List.truncate 2 |> Array.ofList |> String + let match' = punctuationRegex.Match(prefix) if match'.Success then let entity = match'.Value @@ -289,17 +303,10 @@ let (|Emphasised|_|) = let (|HtmlEntity|_|) input = match input with | '&' :: _ -> - // regex from reference implementation: https://github.com/commonmark/commonmark.js/blob/da1db1e/lib/common.js#L10 - let re = - "^&" // beginning expect '&' - + "(?:" // start non-capturing group - + "#x[a-f0-9]{1,8}" // hex - + "|#[0-9]{1,8}" // or decimal - + "|[a-z][a-z0-9]{1,31}" // or name - + ")" // end non-capturing group - + ";" // expect ';' - - let match' = Regex.Match(Array.ofList input |> String, re) + // Max entity length: & + 32 name chars + ; = 34. Passing only a short prefix + // avoids allocating a string for the entire remaining input. + let prefix = input |> List.truncate 34 |> Array.ofList |> String + let match' = htmlEntityRegex.Match(prefix) if match'.Success then let entity = match'.Value From 6c69e981bd37d1af002608424a5328227bbe2c01 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sat, 7 Mar 2026 08:49:16 +0000 Subject: [PATCH 2/2] ci: trigger checks