parallelize lute lint by wmccrthy · Pull Request #1088 · luau-lang/lute

wmccrthy · 2026-05-09T22:47:39Z

Distribute walking input paths and linting files across workers. Number of workers capped by the minimum of available system threads and # of lintee files.

side note: it'd be reallyyyy sweet if we had documentation on the vm lib and how to use it, as it took me a while to figure out what it's constraints were and how to work around them. If i'm missing something, pls lmk 😄

Testing

All tests passing.

Also manually sanity checked on some local repos with the script in the collapsable below. Verified that every linted file had same the # of violations when linted with old / new build. Seeing pretty big improvements.
Comparing 5/08 nightly release (default) against release build from this branch (parallel)

Repo	Old	New	Diff	Diff %	Old Violations	New Violations
lua-apps	46.12s	7.75s	38.38s	83.2%	22943	22943
foundation	2.47s	0.56s	1.90s	77.2%	855	855
lute	0.65s	0.26s	0.39s	60.0%	23	23

Test script

local process = require("@std/process")
local path = require("@std/path")
local json = require("@std/json")
local types = require("./lute/cli/commands/lint/types")

local SEVERITY_MAP: { [number]: types.severity } = {
	[1] = "error",
	[2] = "warning",
	[3] = "info",
	[4] = "hint",
}

local function convertRange(range: any): { beginLine: number, beginColumn: number, endLine: number, endColumn: number }
	return {
		beginLine = range.start.line + 1,
		beginColumn = range.start.character + 1,
		endLine = range["end"].line + 1,
		endColumn = range["end"].character,
	}
end

local function convertDiagnostic(item: any, sourcepath: string): types.LintViolation
	local violation: types.LintViolation = {
		lintname = item.code,
		message = item.message,
		severity = (SEVERITY_MAP[item.severity] or "warning") :: types.severity,
		location = convertRange(item.range) :: any,
		sourcepath = path.parse(sourcepath),
		target = item.codeDescription,
	}

	if item.suggestedfix then
		violation.suggestedfix = {
			fix = item.suggestedfix.fix,
			location = if item.suggestedfix.range then convertRange(item.suggestedfix.range) :: any else nil,
		}
	end

	return violation
end

local function parseLintJsonOutput(jsonString: string): { [string]: { types.LintViolation } }
	local jsonStart = string.find(jsonString, "{")
	if not jsonStart then
		return {}
	end

	local parsed = json.deserialize(jsonString:sub(jsonStart, #jsonString)) :: any
	if not parsed or not parsed.items then
		return {}
	end

	local violationsByFile: { [string]: { types.LintViolation } } = {}

	for _, fileEntry in parsed.items do
		local filePath = fileEntry.uri :: string
		local violations: { types.LintViolation } = {}

		for _, item in fileEntry.items do
			table.insert(violations, convertDiagnostic(item, filePath))
		end

		violationsByFile[filePath] = violations
	end

	return violationsByFile
end

local function countViolations(results: { [string]: { types.LintViolation } }): number
	local count = 0
	for _, violations in results do
		count += #violations
	end
	return count
end

local function formatSeconds(value: number): string
	return string.format("%.2fs", value)
end

local function formatPercent(value: number): string
	return string.format("%.1f%%", value)
end

-- local build = path.join(".", "build", "xcode", "debug", "lute", "cli", "lute")

local releaseBuild = path.join(".", "build", "xcode", "release", "lute", "cli", "lute")

local matrix: { [string]: string } = {
	lute = "./",
	["lua-apps"] = "../roblox/lua-apps",
	["foundation"] = "../roblox/foundation",
}

type Result = {
	repo: string,
	old: number,
	new: number,
	diffSeconds: number,
	diffPercent: number,
	oldViolations: number,
	newViolations: number,
	violationDiff: number,
}

local results: { Result } = {}

for repo, repoPath in matrix do
	local start = os.clock()

	local normalLute = process.run({ "lute", "lint", "-j", repoPath })
	local normalTime = os.clock() - start

	start = os.clock()
	local parallelLint = process.run({ path.format(releaseBuild), "lint", "-j", repoPath })
	local parallelTime = os.clock() - start

	local specialResults = parseLintJsonOutput(parallelLint.stdout)

	local normalResults = parseLintJsonOutput(normalLute.stdout)

	for file, violations in specialResults do
		local normalViolations = normalResults[file]
		assert(normalViolations ~= nil, `Missing normal lint results for {file}`)
		assert(#violations == #normalViolations, `Violation count mismatch for {file}`)
	end

	for file, violations in normalResults do
		local specialViolations = specialResults[file]
		assert(specialViolations ~= nil, `Missing parallel lint results for {file}`)
		assert(#violations == #specialViolations, `Violation count mismatch for {file}`)
	end

	local normalViolationCount = countViolations(normalResults)
	local parallelViolationCount = countViolations(specialResults)
	local diffSeconds = normalTime - parallelTime
	local diffPercent = if normalTime > 0 then (diffSeconds / normalTime) * 100 else 0

	table.insert(results, {
		repo = repo,
		old = normalTime,
		new = parallelTime,
		diffSeconds = diffSeconds,
		diffPercent = diffPercent,
		oldViolations = normalViolationCount,
		newViolations = parallelViolationCount,
		violationDiff = parallelViolationCount - normalViolationCount,
	})
end

-- pretty print results in markdown table
print("| Repo | Old | New | Diff | Diff % | Old Violations | New Violations | Violation Diff |")
print("| --- | ---: | ---: | ---: | ---: | ---: | ---: | ---: |")

for _, result in results do
	print(
		`| {result.repo} | {formatSeconds(result.old)} | {formatSeconds(result.new)} | {formatSeconds(
			result.diffSeconds
		)} | {formatPercent(result.diffPercent)} | {result.oldViolations} | {result.newViolations} | {result.violationDiff} |`
	)
end

skberkeley · 2026-05-11T20:53:23Z

This is a pretty large change and difficult to review at the moment. It seems like this is currently a refactor combined with the functional changes to parallelize linting. If that's the case, could I ask that you split out the refactor part into a separate PR to make it easier to review?

wmccrthy · 2026-05-11T22:15:07Z

This is a pretty large change and difficult to review at the moment. It seems like this is currently a refactor combined with the functional changes to parallelize linting. If that's the case, could I ask that you split out the refactor part into a separate PR to make it easier to review?

Sure! Some of the refactor is tightly coupled to the constraints of using vm, so even with an additional PR I will have to move a couple things around from the refactor PR -> parallel PR, but it should be more digestible regardless

wmccrthy · 2026-05-11T23:00:18Z

This is a pretty large change and difficult to review at the moment. It seems like this is currently a refactor combined with the functional changes to parallelize linting. If that's the case, could I ask that you split out the refactor part into a separate PR to make it easier to review?

@skberkeley refactor up here, i'll rebase and clean up this branch once that is merged

Separating refactor from parallelization change, as per [this request](#1088 (comment)). This PR moves business logic from `lute lint` into a separate `lintCore` module, providing a clear pathway to re-use logic without duplication when we add parallelization to `lute lint`. Changes are entirely non-functional.

skberkeley

this change feels quite risky to me; can you gate it behind a command line flag?

skberkeley · 2026-05-15T17:36:54Z

+					rulePath = rulePath,
+					noDefaults = noDefaults,
+					configPath = passedConfigVal,
+					verbose = VERBOSE,


why are we passing these things around in a table?

also why does every worker need to do initialize?

We can't pass functions as args to the separate VM instance, so the best workaround I know of is re-initializing based on the location of relevant files (rules and config). If there's a better alternative, I'm all ears

Passing the args as a table is just a stylistic choice

i see, it sounds like we should tease apart the parts of initialize which load the rules and the parts that load the config, and do the config loading before spawning the workers then

creating and reading from tables is more costly, and this isn't an option table, so we should pass the arguments around unwrapped

Re passing as tables:
Sounds good, will change.

Re only initializing rules:
I think we still need to initialize both, since rule configs have arbitrary typing; it's totally possible, for instance, that a custom rule has an option that expects a function.

It looks like there's an issue up regarding support for passing all primitive types through the VM barrier, so we likely won't need initialize in the long-run. I can add a comment calling this out

skberkeley · 2026-05-15T17:50:54Z

+					while curr ~= nil do
+						if hasIgnores then
+							local inputFilePathString = pathLib.format(curr)
+
+							if
+								parseIgnores.isIgnored(
+									ignoreData,
+									inputFilePathString,
+									fs.type(inputFilePathString) == "dir"
+								)
+							then
+								curr = walker()
+								continue
+							end
+						end
+
+						local success, res = pcall(function()
+							return worker.lintFile({
+								file = pathLib.format(curr :: pathLib.Path),
+								autofix = job.autoFixEnabled,
+								verbose = VERBOSE,
+							})
+						end)
+						if success then
+							allViolations[curr] = res :: { types.LintViolation }
+						else
+							print(`(worker) Error linting file '{curr}': {res}`)
+						end
+
+						curr = walker()
+					end
+					curJob = lintNext()


this feels like a strange way to balance tasks across workers; if we give one worker a folder with many files and subfolders, we'll just end up waiting for that one. can you add a comment explaining that we don't have thread safe data structures so this is kind of the best we can do?

Yea that's fair; I also explored keeping the inputFilePaths walking sequential, queueing jobs as files were walked, and having workers pull from the queue, but found it was empirically slower. The scenario you call out definitely makes sense in theory but also seems impossible given that getSourceFiles returns exclusively files and no directories. I kept the fs walk logic because that is how lintPaths worked previously, but maybe it's worth dropping 🤷

I'd recommend scanning files and then dispatching them in groups afterwards. see what lute test/ the format PR does.

we should probably abstract this worker pool soon so we can share it between test, lint, transform, format, etc. it's all very similar

Hi! I'll put up my draft for this sometime this week. When that happens I can refactor this code during my PR :)

wmccrthy · 2026-05-15T18:41:51Z

this change feels quite risky to me; can you gate it behind a command line flag?

Yes, my only concern is duplicating test logic as I like the safety of having every test case exercise the parallel codepath

…age + update lint docs

wmccrthy · 2026-05-15T20:29:35Z

-				local inputFilePathString = pathLib.format(curr)
+	if not PARALLEL then
+		for _, inputPath in inputFiles do
+			local walker = fs.walk(inputPath, { recursive = true })


I removed fs walking in the parallel path since we identified it's redundant given how getSourceFiles works. Kept it in non-parallel to keep changes as minimal as possible, but maybe worth removing too? Will defer to @skberkeley

Yeah probably worth refactoring in a separate PR?

skberkeley

Looks good, thanks for taking my feedback into consideration! I'm going to ask @~Vighnesh-V to take a look too since he understands the VM/task library the best

skberkeley · 2026-05-18T21:25:02Z

-				local inputFilePathString = pathLib.format(curr)
+	if not PARALLEL then
+		for _, inputPath in inputFiles do
+			local walker = fs.walk(inputPath, { recursive = true })


Yeah probably worth refactoring in a separate PR?

skberkeley · 2026-05-18T21:25:19Z

+
+				curr = walker()
+			end
 		end


if we early return here we don't need to wrap the parallel part in an else block

I'm going to be a bit annoying and ask that we extract the code for 'running' lints into its own file. Probably something that exposes runSequential and runParallel as part of its export surface.

skberkeley · 2026-05-18T21:29:03Z

Oh, also maybe worth updating our CI to run lute lint in parallel?

Nicell · 2026-05-19T00:24:01Z

+### `-p, --parallel`
+
+Lint files in parallel instead of sequentially.


why isn't this just the default? why wouldn't you want this? tests already run them in parallel by default

I agree with Nick here. This should be parallel by default. Can you add the inverse (-s, --sequentially) instead, so users can opt in to determinism?

Nicell · 2026-05-19T00:26:33Z

this change feels quite risky to me; can you gate it behind a command line flag?

I think a flag means that no one will use it. lute should be fast by default, if we're not confident enough to have it on by default, why are we confident enough to add it behind a CLI flag that people are much less likely to use?

Vighnesh-V · 2026-05-19T16:53:28Z

+### `-p, --parallel`
+
+Lint files in parallel instead of sequentially.


I agree with Nick here. This should be parallel by default. Can you add the inverse (-s, --sequentially) instead, so users can opt in to determinism?

Vighnesh-V · 2026-05-19T17:51:47Z

+
+				curr = walker()
+			end
 		end


I'm going to be a bit annoying and ask that we extract the code for 'running' lints into its own file. Probably something that exposes runSequential and runParallel as part of its export surface.

Vighnesh-V · 2026-05-19T18:13:55Z

+					if
+						parseIgnores.isIgnored(ignoreData, inputFilePathString, fs.type(inputFilePathString) == "dir")


is this actually what stylua does if the condition is too long?

seems that way

Vighnesh-V · 2026-05-19T18:14:37Z

+			task.spawn(function()
+				xpcall(function()
+					local worker = vm.create("@self/worker")
+					-- Initialize will be UNNECESSARY once we can pass functions to separate VM


what does this mean?

oh, serializing functions across vm bodies. Could I suggest instead that we add support for adding arguments to vm.create calls, and the module can initialize state based on that?

e.g. vm.create(.., rulesPath, noDefaults, passedConfigVal, VERBOSE)?

That makes sense. Probably out of the scope of this PR though? Happy to handle in a separate PR and update this accordingly

Vighnesh-V · 2026-05-19T18:31:09Z

 	VERBOSE = args:has("verbose")
+	PARALLEL = args:has("parallel")


Capital case is usually used for constants. Can we stick these options in a table and then read them in here please?

As in have a module-scoped table like { verbose: boolean, parallel: boolean, }? If so, why is that better than just changing the casing?

Vighnesh-V · 2026-05-19T18:32:09Z

+			passedConfigVal,
+			autofixEnabled,
+			ignoreData
+		) -- LUAUFIX: type refinement on line 305 should mean that cast on inputFiles isn't needed


Isn't this file only 282 lines? Where does this point to now?

Good question, from a quick look at commit history it looks like this comment's been bouncing around for a while. Maybe we just remove it

Vighnesh-V · 2026-05-19T18:32:54Z

 		fs.removeDirectory(modulePath, { recursive = true })
 	end)

+	suite:case("luteLintParallelMultipleInputFiles", function(assert)


Can these be expressed as a snapshot test?

If we're going to default to parallel, I'll swap these cases for --sequential. We can use snapshots for those

On second thought, it doesn't really make sense to have dedicated snapshot tests (or explicit tests) for sequential linting. For the sake of safety I'd prefer if all test cases exercised both sequential and parallel code paths. This may be a bit redundant, but casts the widest net for catching regressions

…ential arg instead of parallel

…r to always test both parallel and sequential

wmccrthy and others added 5 commits May 9, 2026 15:34

draft parallelizing lute lint

007425d

use options tbl input pattern for lintFile + fix termination condition

9c6367c

nit type correction

528462d

better approach: parallelize at a higher level

6bef1c6

Merge branch 'primary' into parallelize-lint

a06d2b7

wmccrthy changed the title ~~parallelizing lute lint~~ parallelize lute lint May 9, 2026

wmccrthy marked this pull request as ready for review May 9, 2026 23:45

auto-assign Bot assigned wmccrthy May 9, 2026

wmccrthy commented May 10, 2026

View reviewed changes

Comment thread lute/cli/commands/lint/init.luau Outdated

nit simplification

e91e82c

wmccrthy mentioned this pull request May 11, 2026

Separate linting business logic into lintCore #1092

Merged

wmccrthy added 3 commits May 14, 2026 10:49

Merge branch 'primary' into parallelize-lint

d8fac40

simplify lintFile

3c0998d

rename worker file

860d6db

skberkeley requested changes May 15, 2026

View reviewed changes

some nits + remove redundant fs walking in parallel lints

21cdaf0

remove comment

b208035

skberkeley reviewed May 15, 2026

View reviewed changes

Comment thread lute/cli/commands/lint/init.luau Outdated

Comment thread lute/cli/commands/lint/init.luau Outdated

refactor to gate parallelization behind CLI arg + explicit test cover…

fa03115

…age + update lint docs

wmccrthy commented May 15, 2026

View reviewed changes

Comment thread tests/cli/lint.test.luau Outdated

wmccrthy and others added 2 commits May 15, 2026 13:30

Update tests/cli/lint.test.luau

91b2577

remove redundant args in test case

3918448

wmccrthy requested a review from skberkeley May 15, 2026 20:31

comment indicating initialize will eventually be redundant

c330e8d

skberkeley approved these changes May 18, 2026

View reviewed changes

Nicell reviewed May 19, 2026

View reviewed changes

Vighnesh-V requested changes May 19, 2026

View reviewed changes

wmccrthy added 5 commits May 19, 2026 16:38

abstract majority of lint running logic into it's own file + add sequ…

afc095a

…ential arg instead of parallel

remove explicit testing of different modes and refactor lintTestHelpe…

2139b08

…r to always test both parallel and sequential

remove mysterious comment

ff50306

fix

79b429b

fix

7353a3d

		### `-p, --parallel`

		Lint files in parallel instead of sequentially.

		if
		parseIgnores.isIgnored(ignoreData, inputFilePathString, fs.type(inputFilePathString) == "dir")

		VERBOSE = args:has("verbose")
		PARALLEL = args:has("parallel")

Conversation

wmccrthy commented May 9, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Testing

Uh oh!

Uh oh!

skberkeley commented May 11, 2026

Uh oh!

wmccrthy commented May 11, 2026

Uh oh!

wmccrthy commented May 11, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

skberkeley left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

wmccrthy May 15, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

wmccrthy May 15, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

wmccrthy commented May 15, 2026

Uh oh!

Uh oh!

Uh oh!

wmccrthy May 15, 2026 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

skberkeley left a comment

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

skberkeley commented May 18, 2026

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Nicell commented May 19, 2026

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

wmccrthy commented May 9, 2026 •

edited

Loading

wmccrthy commented May 11, 2026 •

edited

Loading

wmccrthy May 15, 2026 •

edited

Loading

wmccrthy May 15, 2026 •

edited

Loading

wmccrthy May 15, 2026 •

edited

Loading

wmccrthy May 19, 2026 •

edited

Loading