Skip to content
Merged
1 change: 1 addition & 0 deletions benchmarks/sequence_set-new.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ prelude: |
include ProfilingHelper

N_RAND = 100
ENV["BENCHMARK_RANDOM_SEED"]&.then {|seed| srand Integer(seed) }

def rand_nums(n, min: 1, max: (n * 1.25).to_i) = Array.new(n) { rand(1..max) }
def rand_entries(...) = SeqSet[rand_nums(...)].elements
Expand Down
3 changes: 2 additions & 1 deletion benchmarks/sequence_set-ops.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ prelude: |
INPUT_COUNT = Integer ENV.fetch("BENCHMARK_INPUT_COUNT", 1000)
MAX_INPUT = Integer ENV.fetch("BENCHMARK_MAX_INPUT", 1400)
WARMUP_RUNS = Integer ENV.fetch("BENCHMARK_WARMUP_RUNS", 200)
ENV["BENCHMARK_RANDOM_SEED"]&.then {|seed| srand Integer(seed) }
SETS = Array.new(1000) {
Net::IMAP::SequenceSet[Array.new(INPUT_COUNT) { rand(1..MAX_INPUT) }]
Expand All @@ -17,7 +18,7 @@ prelude: |
end
# warmup (esp. for JIT)
200.times do
WARMUP_RUNS.times do
lhs, rhs = sets
lhs | rhs
lhs & rhs
Expand Down
8 changes: 5 additions & 3 deletions benchmarks/sequence_set-slice.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@ prelude: |
require "./test/lib/profiling_helper"
include ProfilingHelper

def init(label = nil, n:, d: 1.5)
ENV["BENCHMARK_RANDOM_SEED"]&.then {|seed| srand Integer(seed) }

def init(label = nil, n:, d: 1.2)
n = n.to_int
samples = (1e5.to_i / n).ceil.clamp(1..100)
domain = 1..(n*d).floor
Expand All @@ -15,10 +17,10 @@ prelude: |
set
}
$idxs = Array.new([10_000, 2 * n].min) { rand(-n..n - 1) }
$lens = Array.new([10_000, n].min) { rand(1..n) }
$lens = Array.new([10_000, n].min) { [rand(1..n), 100].min }
$ranges = Array.new([10_000, n * n].min) {
start = idx
stop = start.negative? ? rand(start..-1) : rand(start...n)
stop = [start.negative? ? rand(start..-1) : rand(start...n), start + 100].min
start..stop
}
maybe_profile("seqset-slice-#{label}-#{n}") if label
Expand Down
149 changes: 92 additions & 57 deletions lib/net/imap/sequence_set.rb
Original file line number Diff line number Diff line change
Expand Up @@ -555,12 +555,17 @@ def full; FULL end
# combined with set operations (#|, #&, #^, #-, etc) to make new sets.
#
# See SequenceSet@Creating+sequence+sets.
def initialize(input = nil) input ? replace(input) : clear end
def initialize(input = nil)
@set_data = new_set_data
@string = nil
replace(input) unless input.nil?
end

# Removes all elements and returns self.
def clear
modifying! # redundant check (normalizes the error message for JRuby)
@set_data, @string = [], nil
set_data.clear
@string = nil
self
end

Expand Down Expand Up @@ -626,7 +631,7 @@ def string=(input)
entries = each_parsed_entry(str).to_a
clear
if normalized_entries?(entries)
minmaxes.replace entries.map!(&:minmax)
replace_minmaxes entries.map!(&:minmax)
else
add_minmaxes entries.map!(&:minmax)
@string = -str
Expand All @@ -647,7 +652,7 @@ def to_s; string || "" end
# Freezes and returns the set. A frozen SequenceSet is Ractor-safe.
def freeze
return self if frozen?
set_data.each(&:freeze).freeze
freeze_set_data
super
end

Expand Down Expand Up @@ -744,7 +749,7 @@ def include?(element)
alias member? include?

# Returns +true+ when the set contains <tt>*</tt>.
def include_star?; minmaxes.last&.last == STAR_INT end
def include_star?; max_num == STAR_INT end

# Returns +true+ if the set and a given object have any common elements,
# +false+ otherwise.
Expand Down Expand Up @@ -784,7 +789,7 @@ def disjoint?(other)
def max(count = nil, star: :*)
if count
slice(-[count, size].min..) || remain_frozen_empty
elsif (val = minmaxes.last&.last)
elsif (val = max_num)
val == STAR_INT ? star : val
end
end
Expand All @@ -804,7 +809,7 @@ def max(count = nil, star: :*)
def min(count = nil, star: :*)
if count
slice(0...count) || remain_frozen_empty
elsif (val = minmaxes.first&.first)
elsif (val = min_num)
val != STAR_INT ? val : star
end
end
Expand Down Expand Up @@ -1003,7 +1008,7 @@ def append(entry)
modifying! # short-circuit before import_minmax
minmax = import_minmax entry
adj = minmax.first - 1
if @string.nil? && (runs.empty? || minmaxes.last.last <= adj)
if @string.nil? && (runs.empty? || max_num <= adj)
# append to elements or coalesce with last element
add_minmax minmax
return self
Expand Down Expand Up @@ -1333,7 +1338,7 @@ def to_set; Set.new(numbers) end
# set.cardinality #=> 4294967296
#
# Related: #count, #count_with_duplicates
def cardinality = minmaxes.sum(@set_data.count) { _2 - _1 }
def cardinality = minmaxes.sum(runs.count) { _2 - _1 }

# Returns the count of distinct #numbers in the set.
#
Expand Down Expand Up @@ -1642,7 +1647,7 @@ def complement!
flat = minmaxes.flat_map { [_1 - 1, _2 + 1] }
if flat.first < 1 then flat.shift else flat.unshift 1 end
if STAR_INT < flat.last then flat.pop else flat.push STAR_INT end
@set_data = flat.each_slice(2).to_a
replace_minmaxes flat.each_slice(2).to_a
normalize!
end

Expand Down Expand Up @@ -1763,7 +1768,7 @@ def normalize!
#
# Related: #normalize!, #normalize, #string, #to_s, #normalized?
def normalized_string
-runs.map { export_run _1 }.join(",") unless runs.empty?
export_runs(runs) unless runs.empty?
end

# Returns an inspection string for the SequenceSet.
Expand Down Expand Up @@ -1841,7 +1846,7 @@ def encode_with(coder) # :nodoc:

# For YAML deserialization
def init_with(coder) # :nodoc:
@set_data = []
@set_data = new_set_data
self.string = coder['string']
end

Expand All @@ -1853,8 +1858,6 @@ def init_with(coder) # :nodoc:
alias runs set_data
alias minmaxes runs

def dup_set_data; set_data.map { _1.dup } end # :nodoc:

private

def remain_frozen(set) frozen? ? set.freeze : set end
Expand Down Expand Up @@ -2023,14 +2026,9 @@ def intersect_minmax?((min, max))
alias include_run? include_minmax?
alias intersect_run? intersect_minmax?

def bsearch_minmax_with_index(num)
idx = minmaxes.bsearch_index { _2 >= num } and [minmaxes[idx], idx]
end

def bsearch_range(num)
first, last = minmaxes.bsearch { _2 >= num }
first..last if first
end
def bsearch_index(num) = minmaxes.bsearch_index { _2 >= num }
def bsearch_minmax(num) = minmaxes.bsearch { _2 >= num }
def bsearch_range(num) = (min, max = bsearch_minmax(num)) && (min..max)

######################################################################{{{2
# Number indexing methods
Expand Down Expand Up @@ -2095,6 +2093,35 @@ def slice_range(range)
end
end

######################################################################{{{2
# Core set data create/freeze/dup primitives

def new_set_data = []
def freeze_set_data = set_data.each(&:freeze).freeze
def dup_set_data = set_data.map { _1.dup }
protected :dup_set_data

######################################################################{{{2
# Core set data query/enumeration primitives

def min_num = minmaxes.first&.first
def max_num = minmaxes.last&.last

def min_at(idx) = minmaxes[idx][0]
def max_at(idx) = minmaxes[idx][1]

######################################################################{{{2
# Core set data modification primitives

def set_min_at(idx, min) = minmaxes[idx][0] = min
def set_max_at(idx, max) = minmaxes[idx][1] = max
def replace_minmaxes(other) = minmaxes.replace(other)
def append_minmax(min, max) = minmaxes << [min, max]
def insert_minmax(idx, min, max) = minmaxes.insert idx, [min, max]
def delete_run_at(idx) = runs.delete_at(idx)
def slice_runs!(...) = runs.slice!(...)
def truncate_runs!(idx) = runs.slice!(idx..)

######################################################################{{{2
# Update methods

Expand Down Expand Up @@ -2130,26 +2157,30 @@ def subtract_minmaxes(minmaxes)
# ---------??===lower==|--|==|--|=====upper===|-- join to upper
def add_minmax(minmax)
modifying!
min, max = minmax
lower, lower_idx = bsearch_minmax_with_index(min - 1)
if lower.nil? then minmaxes << [min, max]
elsif (max + 1) < lower.first then minmaxes.insert(lower_idx, [min, max])
else add_coalesced_minmax(lower, lower_idx, min, max)
min, max = minmax
lower_idx = bsearch_index(min - 1)
lmin, lmax = min_at(lower_idx), max_at(lower_idx) if lower_idx
if lmin.nil? then append_minmax min, max
elsif (max + 1) < lmin then insert_minmax lower_idx, min, max
else add_coalesced_minmax(lower_idx, lmin, lmax, min, max)
end
end

def add_coalesced_minmax(lower, lower_idx, min, max)
return if lower.first <= min && max <= lower.last
lower[0] = [min, lower.first].min
lower[1] = [max, lower.last].max
lower_idx += 1
return if lower_idx == minmaxes.count
tmax_adj = lower.last + 1
upper, upper_idx = bsearch_minmax_with_index(tmax_adj)
if upper
tmax_adj < upper.first ? (upper_idx -= 1) : (lower[1] = upper.last)
def add_coalesced_minmax(lower_idx, lmin, lmax, min, max)
return if lmin <= min && max <= lmax
set_min_at lower_idx, (lmin = min) if min < lmin
set_max_at lower_idx, (lmax = max) if lmax < max
next_idx = lower_idx + 1
return if next_idx == runs.count
tmax_adj = lmax + 1
if (upper_idx = bsearch_index(tmax_adj))
if tmax_adj < min_at(upper_idx)
upper_idx -= 1
else
set_max_at lower_idx, max_at(upper_idx)
end
end
minmaxes.slice!(lower_idx..upper_idx)
slice_runs! next_idx..upper_idx
end

# |====subtracted run=======|
Expand All @@ -2167,34 +2198,38 @@ def add_coalesced_minmax(lower, lower_idx, min, max)
# -------??=====lower====|--|====|--|=====upper====|-- 8. delete and trim
def subtract_minmax(minmax)
modifying!
min, max = minmax
lower, idx = bsearch_minmax_with_index(min)
if lower.nil? then nil # case 1.
elsif max < lower.first then nil # case 2.
elsif max < lower.last then trim_or_split_minmax lower, idx, min, max
else trim_or_delete_minmax lower, idx, min, max
min, max = minmax
idx = bsearch_index(min)
lmin, lmax = min_at(idx), max_at(idx) if idx
if lmin.nil? then nil # case 1.
elsif max < lmin then nil # case 2.
elsif max < lmax then trim_or_split_minmax idx, lmin, min, max
else trim_or_delete_minmax idx, lmin, lmax, min, max
end
end

def trim_or_split_minmax(lower, idx, tmin, tmax)
if lower.first < tmin # split
minmaxes.insert(idx, [lower.first, tmin - 1])
def trim_or_split_minmax(idx, lmin, tmin, tmax)
set_min_at idx, tmax + 1
if lmin < tmin # split
insert_minmax idx, lmin, tmin - 1
end
lower[0] = tmax + 1
end

def trim_or_delete_minmax(lower, lower_idx, tmin, tmax)
if lower.first < tmin # trim lower
lower[1] = tmin - 1
def trim_or_delete_minmax(lower_idx, lmin, lmax, tmin, tmax)
if lmin < tmin # trim lower
lmax = set_max_at lower_idx, tmin - 1
lower_idx += 1
end
if tmax == lower.last # case 5
upper_idx = lower_idx
elsif (upper, upper_idx = bsearch_minmax_with_index(tmax + 1))
upper_idx -= 1 # cases 7 and 8
upper[0] = tmax + 1 if upper.first <= tmax # case 8 (else case 7)
if tmax == lmax # case 5
delete_run_at lower_idx
elsif (upper_idx = bsearch_index(tmax + 1))
if min_at(upper_idx) <= tmax # case 8
set_min_at upper_idx, tmax + 1
end
slice_runs! lower_idx..upper_idx - 1 # cases 7 and 8
else # case 6
truncate_runs! lower_idx
end
minmaxes.slice!(lower_idx..upper_idx)
end

alias add_runs add_minmaxes
Expand Down