Skip to content

Commit 4b485ab

Browse files
authored
Merge pull request #38 from MIDRC/feature/pip_support
Fix color pallettes
2 parents b70ab4a + 4736fad commit 4b485ab

11 files changed

Lines changed: 248 additions & 85 deletions

File tree

docs/source/modules.rst

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
midrc_react
2-
===========
1+
src
2+
===
33

44
.. toctree::
5-
:maxdepth: 8
5+
:maxdepth: 7
66

77
midrc_react

jsdconfig-zipcode.yaml

Lines changed: 50 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,22 @@
11
data sources:
22
# The data sources will be loaded in the order they are populated here
33
- name: MIDRC
4-
description: MIDRC Excel File
4+
description: MIDRC TSV File
55
data type: file
6-
filename: data/MIDRC Open A1 and R1 - cumulative by batch.xlsx
7-
remove column name text: [(CUSUM)]
6+
filename: data/midrc_data_download-2025-01-29.tsv
7+
columns:
8+
- Age at Index
9+
- Ethnicity
10+
- Race
11+
- Sex
12+
- COVID-19 Positive
13+
- Race and Ethnicity
14+
numeric_cols:
15+
Age at Index:
16+
raw column: age_at_index
17+
bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
18+
labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
19+
plugin: midrc_tsv_loader
820

921
- name: CDC
1022
description: CDC Excel File
@@ -18,47 +30,71 @@ data sources:
1830
date: '2020-01-01' # The census file does not have a date column, so we specify the date here
1931

2032
- name: MIDRC COVID+
21-
description: MIDRC COVID+ Excel File
33+
description: MIDRC COVID+ TSV File
2234
data type: file
23-
filename: data/MIDRC Open A1 and R1 COVIDpos only - cumulative by batch.xlsx
24-
remove column name text: [(CUSUM)]
35+
filename: data/midrc_data_download-2025-01-29_covid_pos.tsv
36+
columns:
37+
- Age at Index
38+
- Ethnicity
39+
- Race
40+
- Sex
41+
- COVID-19 Positive
42+
- Race and Ethnicity
43+
numeric_cols:
44+
Age at Index:
45+
raw column: age_at_index
46+
bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
47+
labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
48+
plugin: midrc_tsv_loader
2549

2650
- name: Zip Code 1
27-
description: Zip Code 1 Excel File
51+
description: Zip Code 1 TSV File
2852
data type: file
2953
filename: data/midrc_data_download-2025-01-29_0.tsv
3054
columns:
3155
- Age at Index
3256
- Ethnicity
3357
- Race
3458
- Sex
35-
- Covid19 Positive
59+
- COVID-19 Positive
3660
- Race and Ethnicity
3761
numeric_cols:
3862
Age at Index:
3963
raw column: age_at_index
40-
bins: [ 0, 17, 50, 65, 1000 ]
41-
labels: ['0-17', "18-49", '50-64', '65+']
64+
bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
65+
labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
4266
plugin: midrc_tsv_loader
4367

4468
- name: Zip Code 2
45-
description: Zip Code 2 Excel File
69+
description: Zip Code 2 TSV File
4670
data type: file
4771
filename: data/midrc_data_download-2025-01-29_1.tsv
4872
columns:
4973
- Age at Index
5074
- Ethnicity
5175
- Race
5276
- Sex
53-
- Covid19 Positive
77+
- COVID-19 Positive
5478
- Race and Ethnicity
5579
numeric_cols:
5680
Age at Index:
5781
raw column: age_at_index
58-
bins: [ 0, 17, 50, 65, 1000 ]
59-
labels: ['0-17', "18-49", '50-64', '65+']
82+
bins: [ 0, 5, 12, 16, 18, 30, 40, 50, 65, 75, 85, 1000 ]
83+
labels: [ '0-4', '5-11', '12-15', '16-17', '18-29', '30-39', '40-49', '50-64', '65-74', '75-84', '85+' ]
6084
plugin: midrc_tsv_loader
6185

86+
- name: MIDRC COVID+
87+
description: MIDRC COVID+ Excel File
88+
data type: file
89+
filename: data/MIDRC Open A1 and R1 COVIDpos only - cumulative by batch.xlsx
90+
remove column name text: [(CUSUM)]
91+
92+
- name: MIDRC
93+
description: MIDRC Excel File
94+
data type: file
95+
filename: data/MIDRC Open A1 and R1 - cumulative by batch.xlsx
96+
remove column name text: [(CUSUM)]
97+
6298
# TODO: The following should be moved into QSettings for modifications within the GUI
6399
# For custom age columns, please use .inf as the maximum age in the final age group
64100
custom age ranges:

src/midrc_react/core/aggregate_jsd_calc.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,22 @@ def calc_jsd_from_counts_dict(counts_dict, dataset_names):
6262

6363
return output_dict
6464

65+
def calc_jsd_by_features_combined(combined_df: pd.DataFrame, cols_to_use: list[str], dataset_column) -> dict[str, float]:
66+
# Pivot table to get counts for each combination
67+
pivot_table = combined_df.pivot_table(index=cols_to_use, columns=dataset_column, aggfunc='size', fill_value=0)
68+
pivot_table = pivot_table.reset_index()
69+
70+
# Convert dataset columns to string in case they are integers
71+
pivot_table.columns = pivot_table.columns.astype(str)
72+
73+
labels = combined_df[dataset_column].unique().astype(str)
74+
75+
# Create a dictionary to hold counts for each dataset
76+
counts_dict = {dataset: pivot_table[dataset].values if dataset in pivot_table else np.zeros(len(pivot_table)) for
77+
dataset in labels}
78+
79+
return calc_jsd_from_counts_dict(counts_dict, labels)
80+
6581

6682
def calc_jsd_by_features(df_list: list[pd.DataFrame], cols_to_use: list[str]) -> dict[str, float]:
6783
"""
@@ -76,21 +92,7 @@ def calc_jsd_by_features(df_list: list[pd.DataFrame], cols_to_use: list[str]) ->
7692
"""
7793
dataset_column = '_dataset_' # Temporary column name to store dataset information
7894
combined_df = combine_datasets_from_list(df_list, dataset_column)
79-
80-
# Pivot table to get counts for each combination
81-
pivot_table = combined_df.pivot_table(index=cols_to_use, columns=dataset_column, aggfunc='size', fill_value=0)
82-
pivot_table = pivot_table.reset_index()
83-
84-
# Convert dataset columns to string in case they are integers
85-
pivot_table.columns = pivot_table.columns.astype(str)
86-
87-
labels = combined_df[dataset_column].unique()
88-
89-
# Create a dictionary to hold counts for each dataset
90-
counts_dict = {dataset: pivot_table[dataset].values if dataset in pivot_table else np.zeros(len(pivot_table)) for
91-
dataset in labels}
92-
93-
return calc_jsd_from_counts_dict(counts_dict, labels)
95+
return calc_jsd_by_features_combined(combined_df, cols_to_use, dataset_column)
9496

9597

9698
def calc_jsd_by_features_2df(df1: pd.DataFrame, df2: pd.DataFrame, cols_to_use: list[str]) -> float:

src/midrc_react/core/data_preprocessing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,12 @@ def _adjust_outliers(df: pd.DataFrame, cut_column_name: str, column_name: str, b
6060
low_text = "Outlier_Low"
6161
high_text = "Outlier_High"
6262
print(f"WARNING: There are values outside the bins specified for the '{column_name}' column.")
63-
df.loc[df[cut_column_name].isna() & (df[column_name] < bins[0]), cut_column_name] = low_text
64-
df.loc[df[cut_column_name].isna() & (df[column_name] >= bins[-1]), cut_column_name] = high_text
63+
64+
# Only compare numeric values, ignore strings or other types
65+
col_numeric = pd.to_numeric(df[column_name], errors='coerce')
66+
67+
df.loc[df[cut_column_name].isna() & (col_numeric < bins[0]), cut_column_name] = low_text
68+
df.loc[df[cut_column_name].isna() & (col_numeric >= bins[-1]), cut_column_name] = high_text
6569
df.loc[df[cut_column_name].isna(), cut_column_name] = new_text
6670
if (df[cut_column_name] == low_text).sum() > 0:
6771
print(f" {(df[cut_column_name] == low_text).sum()} values are below the min bin value.\n"

src/midrc_react/core/excel_layout.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,11 @@ def build_data_frames_from_csv(self, filename: str):
162162
# Apply numeric column adjustments
163163
df = self.apply_numeric_column_adjustments(df)
164164

165+
# Convert all non-numeric columns to string
166+
for col in self._columns:
167+
if col in df.columns:
168+
df[col] = df[col].astype(str)
169+
165170
self.raw_data = df
166171
self.create_sheets_from_df(df)
167172

src/midrc_react/core/famd_calc.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -165,15 +165,20 @@ def calc_famd_distances(df, cols_to_use, numeric_cols, dataset_column='_dataset_
165165
dict: Dictionary of distance values specified in distance_metrics for each dataset combination.
166166
167167
"""
168-
return calc_distances_via_df(calc_famd_df(df, cols_to_use, numeric_cols, print_outliers=print_outliers),
168+
return calc_distances_via_df(calc_famd_df(df,
169+
cols_to_use,
170+
numeric_cols,
171+
dataset_column,
172+
print_outliers=print_outliers
173+
),
169174
'famd_x_coordinates',
170-
dataset_column,
175+
dataset_column=dataset_column,
171176
distance_metrics=distance_metrics,
172177
jsd_scaled_bin_width=jsd_scaled_bin_width,
173178
)
174179

175180

176-
def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
181+
def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date, dataset_column='_dataset_'):
177182
"""
178183
Calculate the KS2 distance between two datasets at a specific date.
179184
@@ -190,7 +195,6 @@ def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
190195
df1_at_date = df1[df1['date'] <= calc_date]
191196
df2_at_date = df2[df2['date'] <= calc_date]
192197

193-
dataset_column = '_dataset_'
194198
combined_df = combine_datasets_from_list([df1_at_date, df2_at_date], dataset_column=dataset_column)
195199

196200
distance_metrics = ['ks2']
@@ -199,7 +203,7 @@ def calc_famd_ks2_at_date(df1, df2, cols_to_use, numeric_cols, calc_date):
199203
return distance_dict['ks2']['Dataset 0 vs Dataset 1']
200204

201205

202-
def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list):
206+
def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list, dataset_column='_dataset_'):
203207
"""
204208
Calculate the KS2 distance between two datasets at multiple dates.
205209
@@ -213,10 +217,9 @@ def calc_famd_ks2_at_dates(df1, df2, cols_to_use, numeric_cols, calc_date_list):
213217
Returns:
214218
list(float): list of KS2 distances at each date
215219
"""
216-
dataset_column = '_dataset_'
217220
combined_df = combine_datasets_from_list([df1, df2], dataset_column=dataset_column)
218221

219-
famd_df = calc_famd_df(combined_df, cols_to_use, numeric_cols)
222+
famd_df = calc_famd_df(combined_df, cols_to_use, numeric_cols, dataset_column=dataset_column)
220223

221224
# Add date column to the DataFrame after FAMD fitting
222225
famd_df['date'] = combined_df['date']

src/midrc_react/core/jsdcontroller.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,9 @@ def connect_signals(self):
9696
dataselectiongroupbox_class_name = type(jsd_view.dataselectiongroupbox).__name__
9797

9898
if dataselectiongroupbox_class_name == 'JsdDataSelectionGroupBox':
99-
for f_c in jsd_view.dataselectiongroupbox.file_comboboxes:
100-
f_c.currentIndexChanged.connect(self.file_changed)
10199
jsd_view.dataselectiongroupbox.num_data_items_changed.connect(self.file_changed)
102100
jsd_view.dataselectiongroupbox.file_checkbox_state_changed.connect(self.file_changed)
101+
jsd_view.dataselectiongroupbox.file_combobox_changed.connect(self.file_changed)
103102
jsd_view.dataselectiongroupbox.category_combobox.currentIndexChanged.connect(self.category_changed)
104103

105104
elif dataselectiongroupbox_class_name == 'DataSelectionGroupBox':
@@ -399,12 +398,15 @@ def update_file_based_charts(self):
399398
sheet_dict[i] = self.get_file_sheets_from_index(i)
400399

401400
spider_plot_values = self.get_spider_plot_values(spider_plot_date)
402-
self.jsd_view.update_spider_chart(spider_plot_values)
401+
try:
402+
self.jsd_view.update_spider_chart(spider_plot_values)
403+
except (ValueError, KeyError, TypeError):
404+
print('An error occurred during the update of the spider chart.')
403405

404406
try:
405407
self.jsd_view.update_pie_chart_dock(sheet_dict)
406408
except (ValueError, KeyError, TypeError):
407-
return False
409+
print('An error occurred during the update of file-based charts.')
408410

409411
return True
410412

src/midrc_react/gui/pyside6/copyabletableview.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
from PySide6.QtCore import QDate, QEvent, QObject, Qt
2525
from PySide6.QtGui import QGuiApplication, QKeySequence
26-
from PySide6.QtWidgets import QTableView
26+
from PySide6.QtWidgets import QTableView, QMenu
2727

2828

2929
class CopyableTableView(QTableView):
@@ -78,3 +78,23 @@ def copy_selection(self) -> None:
7878
stream = io.StringIO()
7979
csv.writer(stream, delimiter='\t').writerows(table)
8080
QGuiApplication.clipboard().setText(stream.getvalue())
81+
82+
def contextMenuEvent(self, event) -> None:
83+
"""
84+
Create a context menu with 'Select All' and 'Copy' options on right-click.
85+
86+
Args:
87+
event (QContextMenuEvent): The context menu event.
88+
89+
Returns:
90+
None
91+
"""
92+
menu = QMenu(self)
93+
select_all_action = menu.addAction("Select All")
94+
copy_action = menu.addAction("Copy")
95+
96+
action = menu.exec(event.globalPos())
97+
if action == select_all_action:
98+
self.selectAll()
99+
elif action == copy_action:
100+
self.copy_selection()

src/midrc_react/gui/pyside6/dataselectiongroupbox.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ class JsdDataSelectionGroupBox(QGroupBox, GroupBoxData):
3838
"""
3939
num_data_items_changed: Signal = Signal(int)
4040
file_checkbox_state_changed: Signal = Signal(bool)
41+
file_combobox_changed: Signal = Signal(int)
4142
NUM_DEFAULT_DATA_ITEMS: int = 2
4243

4344
def __init__(self, data_sources):
@@ -145,6 +146,7 @@ def add_file_combobox_to_layout(self, auto_populate: bool = True):
145146
self.form_layout.insertRow(index - 1, new_label, new_hbox)
146147

147148
self.file_comboboxes.append(new_combobox)
149+
new_combobox.currentIndexChanged.connect(self.file_combobox_changed.emit)
148150
self.file_checkboxes.append(new_checkbox)
149151
new_checkbox.toggled.connect(self.file_checkbox_state_changed.emit)
150152

0 commit comments

Comments
 (0)