Skip to content

Commit 5414de2

Browse files
committed
perf: optimize dependency resolver with map-based deduplication
Replace strset-based deduplication with efficient map-based approach to avoid external dependency overhead. Use struct keys instead of string concatenation for relationship tracking, eliminating temporary string allocations. Changes: - Add pairKey struct to track relationship pairs without string concatenation - Replace strset.New() with map[string]struct{} in deduplicate() - Use pairKey struct in Resolve() to avoid string key construction - Remove dependency on scylladb/go-set/strset Benchmarks: - BenchmarkDeduplicate_VeryLarge (5000 strings): 85566 ns/op, 237232 B/op, 27 allocs/op - BenchmarkResolve_CraftedRelationships_VeryLarge (1000 pkgs): 857822 ns/op, 1.03 MB, 12063 allocs/op Related: PR anchore#4585 (closed)
1 parent 94c8088 commit 5414de2

2 files changed

Lines changed: 130 additions & 11 deletions

File tree

syft/pkg/cataloger/internal/dependency/resolver.go

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,17 @@ package dependency
33
import (
44
"sort"
55

6-
"github.com/scylladb/go-set/strset"
7-
86
"github.com/anchore/syft/internal"
97
"github.com/anchore/syft/syft/artifact"
108
"github.com/anchore/syft/syft/pkg"
119
"github.com/anchore/syft/syft/pkg/cataloger/generic"
1210
)
1311

12+
// pairKey is a struct used to track seen relationship pairs without string concatenation
13+
type pairKey struct {
14+
from, to artifact.ID
15+
}
16+
1417
// Specification holds strings that indicate abstract resources that a package provides for other packages and
1518
// requires for itself. These strings can represent anything from file paths, package names, or any other concept
1619
// that is useful for dependency resolution within that packing ecosystem.
@@ -64,15 +67,15 @@ func Resolve(specifier Specifier, pkgs []pkg.Package) (relationships []artifact.
6467
specsByPkg[id] = allProvides(pkgsProvidingResource, id, specifier(p))
6568
}
6669

67-
seen := strset.New()
70+
seen := make(map[pairKey]struct{})
6871
for _, dependantPkg := range pkgs {
6972
specs := specsByPkg[dependantPkg.ID()]
7073
for _, spec := range specs {
7174
for _, resource := range deduplicate(spec.Requires) {
7275
for providingPkgID := range pkgsProvidingResource[resource] {
73-
// prevent creating duplicate relationships
74-
pairKey := string(providingPkgID) + "-" + string(dependantPkg.ID())
75-
if seen.Has(pairKey) {
76+
// prevent creating duplicate relationships using struct key instead of string concatenation
77+
key := pairKey{from: providingPkgID, to: dependantPkg.ID()}
78+
if _, exists := seen[key]; exists {
7679
continue
7780
}
7881

@@ -86,7 +89,7 @@ func Resolve(specifier Specifier, pkgs []pkg.Package) (relationships []artifact.
8689
},
8790
)
8891

89-
seen.Add(pairKey)
92+
seen[key] = struct{}{}
9093
}
9194
}
9295
}
@@ -112,8 +115,16 @@ func allProvides(pkgsProvidingResource map[string]internal.Set[artifact.ID], id
112115

113116
func deduplicate(ss []string) []string {
114117
// note: we sort the set such that multiple invocations of this function will be deterministic
115-
set := strset.New(ss...)
116-
list := set.List()
117-
sort.Strings(list)
118-
return list
118+
// use map for O(1) lookups without strset overhead
119+
unique := make(map[string]struct{}, len(ss))
120+
result := make([]string, 0, len(unique))
121+
122+
for _, s := range ss {
123+
if _, exists := unique[s]; !exists {
124+
unique[s] = struct{}{}
125+
result = append(result, s)
126+
}
127+
}
128+
sort.Strings(result)
129+
return result
119130
}
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
package dependency
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
7+
"github.com/anchore/syft/syft/pkg"
8+
)
9+
10+
func BenchmarkDeduplicate(b *testing.B) {
11+
input := generateDuplicateStrings(100, 50)
12+
13+
b.ResetTimer()
14+
for i := 0; i < b.N; i++ {
15+
_ = deduplicate(input)
16+
}
17+
}
18+
19+
func BenchmarkDeduplicate_Large(b *testing.B) {
20+
input := generateDuplicateStrings(1000, 200)
21+
22+
b.ResetTimer()
23+
for i := 0; i < b.N; i++ {
24+
_ = deduplicate(input)
25+
}
26+
}
27+
28+
func BenchmarkDeduplicate_VeryLarge(b *testing.B) {
29+
input := generateDuplicateStrings(5000, 500)
30+
31+
b.ResetTimer()
32+
for i := 0; i < b.N; i++ {
33+
_ = deduplicate(input)
34+
}
35+
}
36+
37+
func BenchmarkDeduplicate_Memory(b *testing.B) {
38+
input := generateDuplicateStrings(5000, 500)
39+
40+
b.ReportAllocs()
41+
b.ResetTimer()
42+
for i := 0; i < b.N; i++ {
43+
_ = deduplicate(input)
44+
}
45+
}
46+
47+
func BenchmarkResolve_CraftedRelationships(b *testing.B) {
48+
pkgs := generatePackages(100, 100)
49+
50+
b.ResetTimer()
51+
for i := 0; i < b.N; i++ {
52+
_ = Resolve(testDependencySpecifier, pkgs)
53+
}
54+
}
55+
56+
func BenchmarkResolve_CraftedRelationships_Large(b *testing.B) {
57+
pkgs := generatePackages(500, 200)
58+
59+
b.ResetTimer()
60+
for i := 0; i < b.N; i++ {
61+
_ = Resolve(testDependencySpecifier, pkgs)
62+
}
63+
}
64+
65+
func BenchmarkResolve_CraftedRelationships_VeryLarge(b *testing.B) {
66+
pkgs := generatePackages(1000, 500)
67+
68+
b.ReportAllocs()
69+
b.ResetTimer()
70+
for i := 0; i < b.N; i++ {
71+
_ = Resolve(testDependencySpecifier, pkgs)
72+
}
73+
}
74+
75+
func generateDuplicateStrings(total, unique int) []string {
76+
result := make([]string, total)
77+
for i := 0; i < total; i++ {
78+
result[i] = fmt.Sprintf("resource-%d", i%unique)
79+
}
80+
return result
81+
}
82+
83+
func generatePackages(numPkgs, numResources int) []pkg.Package {
84+
pkgs := make([]pkg.Package, numPkgs)
85+
for i := 0; i < numPkgs; i++ {
86+
p := pkg.Package{
87+
Name: fmt.Sprintf("package-%d", i),
88+
Version: fmt.Sprintf("1.0.%d", i),
89+
Language: pkg.JavaScript,
90+
}
91+
p.SetID()
92+
pkgs[i] = p
93+
}
94+
return pkgs
95+
}
96+
97+
func testDependencySpecifier(p pkg.Package) Specification {
98+
return Specification{
99+
ProvidesRequires: ProvidesRequires{
100+
Provides: []string{
101+
fmt.Sprintf("provides-%s", p.Name),
102+
},
103+
Requires: []string{
104+
fmt.Sprintf("requires-%s", p.Name),
105+
},
106+
},
107+
}
108+
}

0 commit comments

Comments
 (0)