|
| 1 | +""" |
| 2 | +MIT License |
| 3 | +
|
| 4 | +Copyright (c) 2023 Ulster University (https://www.ulster.ac.uk). |
| 5 | +Project: Harmony (https://harmonydata.ac.uk) |
| 6 | +Maintainer: Thomas Wood (https://fastdatascience.com) |
| 7 | +
|
| 8 | +Permission is hereby granted, free of charge, to any person obtaining a copy |
| 9 | +of this software and associated documentation files (the "Software"), to deal |
| 10 | +in the Software without restriction, including without limitation the rights |
| 11 | +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 12 | +copies of the Software, and to permit persons to whom the Software is |
| 13 | +furnished to do so, subject to the following conditions: |
| 14 | +
|
| 15 | +The above copyright notice and this permission notice shall be included in all |
| 16 | +copies or substantial portions of the Software. |
| 17 | +
|
| 18 | +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 19 | +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 20 | +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 21 | +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 22 | +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 23 | +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 24 | +SOFTWARE. |
| 25 | +
|
| 26 | +""" |
| 27 | + |
| 28 | +import sys |
| 29 | +import unittest |
| 30 | +import pandas as pd |
| 31 | +import numpy as np |
| 32 | + |
| 33 | +sys.path.append("../src") |
| 34 | + |
| 35 | +from harmony.matching.generate_crosswalk_table import generate_crosswalk_table |
| 36 | +from harmony import match_instruments |
| 37 | +from harmony.schemas.requests.text import Instrument, Question |
| 38 | + |
| 39 | +class TestGenerateCrosswalkTable(unittest.TestCase): |
| 40 | + def setUp(self): |
| 41 | + # Sample data |
| 42 | + self.all_questions_dummy = [ |
| 43 | + Question(question_no="1", question_text="potato"), |
| 44 | + Question(question_no="2", question_text="tomato"), |
| 45 | + Question(question_no="3", question_text="radish"), |
| 46 | + ] |
| 47 | + |
| 48 | + self.instruments_dummy = Instrument(questions=self.all_questions_dummy) |
| 49 | + |
| 50 | + self.similarity = np.array([ |
| 51 | + [1.0, 0.7, 0.9], |
| 52 | + [0.7, 1.0, 0.8], |
| 53 | + [0.9, 0.8, 1.0] |
| 54 | + ]) |
| 55 | + self.all_questions_real = [Question(question_no="1", question_text="Feeling nervous, anxious, or on edge"), |
| 56 | + Question(question_no="2", question_text="Not being able to stop or control worrying")] |
| 57 | + self.instruments = Instrument(questions=self.all_questions_real) |
| 58 | + |
| 59 | + self.threshold = 0.6 |
| 60 | + |
| 61 | + |
| 62 | + def test_generate_crosswalk_table_dummy_data(self): |
| 63 | + result = generate_crosswalk_table(self.instruments_dummy.questions, self.similarity, self.threshold) |
| 64 | + |
| 65 | + expected_matches = [ |
| 66 | + {"pair_name": "0_1", "question1_no": "1", "question1_text": "potato", |
| 67 | + "question2_no": "2", "question2_text": "tomato", "match_score": 0.7}, |
| 68 | + {"pair_name": "0_2", "question1_no": "1", "question1_text": "potato", |
| 69 | + "question2_no": "3", "question2_text": "radish", "match_score": 0.9}, |
| 70 | + {"pair_name": "1_2", "question1_no": "2", "question1_text": "tomato", |
| 71 | + "question2_no": "3", "question2_text": "radish", "match_score": 0.8}, |
| 72 | + ] |
| 73 | + |
| 74 | + for _, row in pd.DataFrame(expected_matches).iterrows(): |
| 75 | + self.assertTrue(any(row.equals(result_row) for _, result_row in result.iterrows())) |
| 76 | + |
| 77 | + self.assertEqual(len(result), len(expected_matches)) |
| 78 | + |
| 79 | + def test_generate_crosswalk_table_empty(self): |
| 80 | + empty_similarity = np.eye(3) # Identity matrix, no matches above threshold |
| 81 | + result = generate_crosswalk_table(self.all_questions_dummy, empty_similarity, self.threshold) |
| 82 | + self.assertTrue(result.empty) |
| 83 | + |
| 84 | + def test_generate_crosswalk_table_real(self): |
| 85 | + all_questions, similarity_with_polarity, _, _ = match_instruments([self.instruments]) |
| 86 | + result = generate_crosswalk_table(all_questions, similarity_with_polarity, self.threshold) |
| 87 | + expected_matches = [] |
| 88 | + |
| 89 | + for _, row in pd.DataFrame(expected_matches).iterrows(): |
| 90 | + self.assertTrue(any(row.equals(result_row) for _, result_row in result.iterrows())) |
| 91 | + |
| 92 | + self.assertEqual(len(result), len(expected_matches)) |
| 93 | + |
| 94 | + lower_threshold = 0.5 |
| 95 | + result = generate_crosswalk_table(all_questions, similarity_with_polarity, lower_threshold) |
| 96 | + |
| 97 | + self.assertEqual(len(result), 1) |
| 98 | + |
| 99 | + |
| 100 | +if __name__ == '__main__': |
| 101 | + unittest.main() |
0 commit comments