Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
31d19b9
use method in apply instead of duplicating code
May 30, 2025
ae3bdc9
classes to handle file pairs
Jun 3, 2025
a31c922
install coverage to ensure latest version
Jun 3, 2025
ea3d585
use forge channel
Jun 3, 2025
93397f6
earlier version
Jun 3, 2025
2982503
no coverage for python 3.11
Jun 3, 2025
959c431
run nosetests verbose
Jun 3, 2025
46b84ae
check dir
Jun 3, 2025
7e7d6c2
go up one level
Jun 3, 2025
c19da1e
add init to get detected by test
Jun 3, 2025
85b7e22
add documentation
Jun 3, 2025
5e42a2c
intentionally break test
Jun 3, 2025
dedeae9
add argument to nose
Jun 3, 2025
4732351
add exe arg for test discovery
Jun 3, 2025
6225da7
restore test value
Jun 3, 2025
08ac3fe
remove verbose output
Jun 3, 2025
88be977
keep directory
Jun 3, 2025
b8638fc
test cache
Jun 5, 2025
b55b56b
fix cache indexing
Jun 5, 2025
60fedd1
allow setting chunksize
Jun 5, 2025
549e349
keep flist out of dict, cast keys to string
Jun 5, 2025
e5d2c79
mock method to speed up testing
Jun 5, 2025
1c77388
reduce iterations to speed up testing
Jun 5, 2025
d9aabc9
restrict coverage
Jun 5, 2025
0558ad0
omit unused imports
Jun 5, 2025
ac282b1
omit obsolete comment
Jun 5, 2025
cb3bb93
remove unused imports
Jun 5, 2025
46307fa
restore import of constant
Jun 5, 2025
99c6948
speed up test with less iterations
Jun 5, 2025
86fd5a7
cache key tests
Jun 6, 2025
ae27c0c
specify directory
Jun 6, 2025
2c2042c
add init
Jun 6, 2025
cc48c81
remove dir
Jun 6, 2025
4aa527a
no need for extra unit test run
Jun 6, 2025
9eac18e
model rdm with file pairs
Jun 23, 2025
2246c92
metho to load from dictionary
Jun 26, 2025
35b33a9
handle empty results
Aug 7, 2025
a217375
keep track of cache hits
Aug 7, 2025
43d7ff0
format
Aug 28, 2025
93dac09
add logging to mrdm cache
Sep 29, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions .github/workflows/nosetests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,17 +46,30 @@ jobs:
- name: show python path
run: |
python -c "import sys; print('\n'.join(sys.path))"
- name: Unit Testing

- name: Unit Testing with coverage using nosetests
if: matrix.python-version != '3.11'
run: |
source activate test-env
python --version
conda install -q nose coveralls
nosetests --with-coverage
source activate test-env
python --version
conda install -q nose coveralls
nosetests --exe --with-coverage --cover-package rsa
#~ python -m unittest

# Step to run nosetests without coverage for Python 3.11
- name: Unit Testing without coverage using nosetest
if: matrix.python-version == '3.11'
run: |
source activate test-env
python --version
conda install -q nose coveralls
nosetests --exe

- name: Coveralls
uses: AndreMiras/coveralls-python-action@develop
with:
parallel: true
flag-name: Unit Testing
flag-name: Unit Testing with coverage

finish:
needs: build
Expand Down
Empty file added rsa/cache/__init__.py
Empty file.
15 changes: 15 additions & 0 deletions rsa/cache/cache_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@

class CacheKey:

def __init__(self, separator='__-__'):

self.separator = separator

def join(self, x, y):
pair = sorted([x, y])
key = self.separator.join(pair)
return key

def split(self, key):
x, y = key.split(self.separator)
return x, y
74 changes: 64 additions & 10 deletions rsa/cache/rdm_cache.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,88 @@
import yaml


def _get_key(x, y):
pair = sorted([x, y])
key = ''.join(pair)
return key
from rsa.cache.cache_key import CacheKey


class RDMCache:

def __init__(self):
self.cache_dict = {}
self.key_handler = CacheKey()

def load_from_dict(self, data):

version = data.get('version', None)
if version is None:
self.cache_dict = data

elif version == 250605:

if 'cache' in data and 'flist' in data:
flist = data['flist']
cache_dict_mapped = data['cache']
separator = data.get('separator', self.key_handler.separator)
# update separator
self.key_handler.separator = separator
for k, v in data['cache'].items():
xi, yi = self.key_handler.split(k)
x = flist[int(xi)]
y = flist[int(yi)]
k_new = self.key_handler.join(x, y)
self.cache_dict[k_new] = v
else:
raise KeyError("Cache file does not contain 'cache' or 'flist' keys.")

def load_from_file(self, fp_cache):
with open(fp_cache, 'r') as h:
self.cache_dict = yaml.safe_load(h)
data = yaml.safe_load(h)

self.load_from_dict(data)

def save_to_file(self, fp_dst):
with open(fp_dst, 'w') as h:
h.write(yaml.dump(self.cache_dict))

keys = list(self.cache_dict.keys())
flist = set()
for k in keys:
x, y = self.key_handler.split(k)
flist.add(x)
flist.add(y)
flist = sorted(list(flist))

cache_dict_mapped = {}
for k, v in self.cache_dict.items():
x, y = self.key_handler.split(k)
xi = flist.index(x)
yi = flist.index(y)
k_new = self.key_handler.join(str(xi), str(yi))
cache_dict_mapped[k_new] = v

h.write(yaml.dump({
'version': 250605, # Add version information
'cache': cache_dict_mapped,
'flist': list(flist),
'separator': self.key_handler.separator
}))

# def is_in(self, x, y):
#
# key = self._get_key(x, y)
# return key in self.cache_dict

def add(self, x, y, value):
key = _get_key(x, y)
key = self.key_handler.join(x, y)
self.cache_dict[key] = value

def get(self, x, y, default_value):
key = _get_key(x, y)
"""
Retrieves a cached value for a pair of elements.

Args:
x (str): First element.
y (str): Second element.
default_value: Value to return if the pair is not in the cache.

Returns:
Cached value or the default value.
"""
key = self.key_handler.join(x, y)
return self.cache_dict.get(key, default_value)
31 changes: 31 additions & 0 deletions rsa/cache/test_cache_key.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Python
import unittest
from rsa.cache.cache_key import CacheKey

class TestCacheKey(unittest.TestCase):

def setUp(self):
self.cache_key = CacheKey(separator="__-__")

def test_join_creates_key(self):
key = self.cache_key.join("file1", "file2")
self.assertEqual(key, "file1__-__file2", "The key should be correctly joined with the separator.")

def test_join_is_order_independent(self):
key1 = self.cache_key.join("file1", "file2")
key2 = self.cache_key.join("file2", "file1")
self.assertEqual(key1, key2, "The key should be the same regardless of the order of x and y.")

def test_split_key(self):
key = "file1__-__file2"
x, y = self.cache_key.split(key)
self.assertEqual(x, "file1", "The first element should be correctly split from the key.")
self.assertEqual(y, "file2", "The second element should be correctly split from the key.")

def test_split_and_join_consistency(self):
key = self.cache_key.join("file1", "file2")
x, y = self.cache_key.split(key)
self.assertEqual(key, self.cache_key.join(x, y), "Splitting and rejoining the key should result in the same key.")

if __name__ == "__main__":
unittest.main()
90 changes: 90 additions & 0 deletions rsa/cache/test_rdm_cache.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Python
import unittest
import tempfile
import os
import yaml
from rsa.cache.rdm_cache import RDMCache


class TestRDMCacheAddAndGet(unittest.TestCase):

def setUp(self):
self.cache = RDMCache()

def test_add_and_get(self):
# Add a pair to the cache
self.cache.add("file1.txt", "file2.txt", 42)

# Retrieve the value for the added pair
value = self.cache.get("file1.txt", "file2.txt", None)
self.assertEqual(value, 42, "The value retrieved from the cache should match the added value.")

def test_get_default_value(self):
# Attempt to retrieve a value for a pair not in the cache
default_value = self.cache.get("file3.txt", "file4.txt", -1)
self.assertEqual(default_value, -1, "The default value should be returned for a missing pair.")
default_value = self.cache.get("file3.txt", "file4.txt", -2)
self.assertEqual(default_value, -2, "The default value should be returned for a missing pair.")

def test_overwrite_existing_value(self):
# Add a pair to the cache
self.cache.add("file1.txt", "file2.txt", 42)

# Overwrite the value for the same pair
self.cache.add("file1.txt", "file2.txt", 84)

# Retrieve the updated value
updated_value = self.cache.get("file1.txt", "file2.txt", None)
self.assertEqual(updated_value, 84, "The value should be updated when the pair is added again.")


class TestRDMCacheSerializationVersion250605(unittest.TestCase):

def setUp(self):
self.cache = RDMCache()

def test_load_version_250605(self):
# Prepare cache data for version 250605
data = {
"version": 250605,
"cache": {"0__-__1": 42, "2__-__3": 84},
"flist": ["/path/to/file1", "/path/to/file2", "/path/to/file3", "/path/to/file4"],
"separator": "__-__"
}

with tempfile.NamedTemporaryFile(delete=False, mode='w') as temp_file:
fp = temp_file.name
yaml.dump(data, temp_file)

try:
self.cache.load_from_file(fp)
self.assertEqual(self.cache.get("/path/to/file1", "/path/to/file2", None), 42)
self.assertEqual(self.cache.get("/path/to/file3", "/path/to/file4", None), 84)
finally:
os.remove(fp)


class TestRDMCacheSerializationNoVersion(unittest.TestCase):

def setUp(self):
self.cache = RDMCache()

def test_load_no_version(self):
# Prepare cache data without version
cache_data = {"0__-__1": 42, "2__-__3": 84}

# avoid using save_to_file and write to yaml directly
with tempfile.NamedTemporaryFile(delete=False, mode='w') as temp_file:
fp = temp_file.name
yaml.dump(cache_data, temp_file)

try:
self.cache.load_from_file(fp)
self.assertEqual(self.cache.get("0", "1", None), 42)
self.assertEqual(self.cache.get("2", "3", None), 84)
finally:
os.remove(fp)


if __name__ == "__main__":
unittest.main()
Empty file added rsa/corr/__init__.py
Empty file.
Empty file added rsa/fpairs/__init__.py
Empty file.
81 changes: 81 additions & 0 deletions rsa/fpairs/file_pairs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
import os
from itertools import product


class FilePairs:

def __init__(self, fpairs):
self.pairs = None
self.flist = None
self.tuple_indices = None
self._validate_pairs(fpairs)

if fpairs is not None and len(fpairs) > 0:
self.pairs = fpairs
self.generate_unique_strings_and_indices()

@staticmethod
def _validate_pairs(pairs):
"""
Validates the pairs attribute.

Args:
pairs (list): A list of tuples containing strings.

Raises:
TypeError: If pairs is not a list.
ValueError: If pairs contains invalid elements.
"""
if not isinstance(pairs, list):
raise TypeError("pairs must be a list.")
for item in pairs:
if not isinstance(item, tuple) or not all(isinstance(sub_item, str) for sub_item in item):
raise ValueError("pairs must contain only tuples of strings.")

def generate_unique_strings_and_indices(self):
if not self.pairs:
raise ValueError("fpath_list must be a list of tuples of strings to generate indices.")
self.flist = sorted(list(set(s for tup in self.pairs for s in tup)))

self.tuple_indices = [tuple(self.flist.index(s) for s in tup) for tup in self.pairs]

def save_to_files(self, save_dir, flist_path='flist.txt', indices_path='indices.txt'):
flist_full_path = os.path.join(save_dir, flist_path)
indices_full_path = os.path.join(save_dir, indices_path)

with open(flist_full_path, 'w') as f:
f.write('\n'.join(self.flist))

with open(indices_full_path, 'w') as f:
for indices in self.tuple_indices:
f.write(','.join(map(str, indices)) + '\n')

def load(self, load_dir, flist_path='flist.txt', indices_path='indices.txt'):

flist_full_path = os.path.join(load_dir, flist_path)
with open(flist_full_path, 'r') as f:
self.flist = f.read().splitlines()

indices_full_path = os.path.join(load_dir, indices_path)
if os.path.isfile(indices_full_path):
with open(indices_full_path, 'r') as f:
self.tuple_indices = [tuple(map(int, line.split(','))) for line in f]

else:
numels = len(self.flist)
self.tuple_indices = sorted(list(product(range(numels), repeat=2)))

self.pairs = [tuple(self.flist[i] for i in indices) for indices in self.tuple_indices]

def __len__(self):
"""
Returns the number of file pairs.

Returns:
int: Number of file pairs.
"""
return len(self.pairs)

def get(self, idx):

return self.pairs[idx]
25 changes: 25 additions & 0 deletions rsa/fpairs/file_pairs_cross_list.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Python
from itertools import product
from rsa.fpairs.file_pairs import FilePairs


class FilePairsCrossList(FilePairs):
def __init__(self, flist1, flist2):
"""
Initializes the CrossFilePairs class with pairs constructed
between two lists of files, excluding pairs from the same list.

Args:
list1 (list): First list of file names.
list2 (list): Second list of file names.
"""
if not isinstance(flist1, list) or not isinstance(flist2, list):
raise TypeError("Both inputs must be lists.")

if not all(isinstance(item, str) for item in flist1 + flist2):
raise ValueError("Both lists must contain only strings.")

# Generate pairs between the two lists
fpairs = list(product(flist1, flist2))
fpairs = sorted([(a, b) for a, b in fpairs if a != b]) # remove duplicates
super().__init__(fpairs)
Loading