@@ -41,20 +41,20 @@ def __init__(self, data_source, custom_age_ranges=None):
4141 data_source (dict): The data source configuration.
4242 custom_age_ranges (dict, optional): A dictionary of custom age ranges.
4343 """
44- self .name = data_source [ ' name' ]
44+ self .name = data_source . name
4545 self .sheets = {}
46- self .datatype = data_source [ 'data type' ]
47- self .filename = data_source [ ' filename' ]
46+ self .datatype = data_source . data_type
47+ self .filename = data_source . filename
4848 self .data_source = data_source
4949 self .custom_age_ranges = custom_age_ranges
50- self ._numeric_cols = data_source .get ( ' numeric_cols' , {}) # Extract numeric columns from config
51- self ._columns = data_source .get ( ' columns' , [])
50+ self ._numeric_cols = data_source .numeric_cols # Extract numeric columns from config
51+ self ._columns = data_source .columns
5252 self .raw_data = None
5353
5454 # Load preprocessing plugin if specified
5555 self .preprocessor = None
56- if 'plugin' in data_source and data_source [ ' plugin' ] :
57- plugin_name = data_source [ ' plugin' ]
56+ if data_source . plugin :
57+ plugin_name = data_source . plugin
5858 plugin_path = os .path .join ("plugins" , f"{ plugin_name } .py" )
5959 self .preprocessor = DataSource .load_plugin (plugin_path )
6060
@@ -64,8 +64,8 @@ def __init__(self, data_source, custom_age_ranges=None):
6464 self .build_data_frames_from_csv (self .filename )
6565 else :
6666 self .build_data_frames_from_file (self .filename )
67- if self .datatype == 'content' and 'content' in data_source :
68- self .build_data_frames_from_content (data_source [ ' content' ] )
67+ if self .datatype == 'content' and hasattr ( data_source , 'content' ) and data_source . content is not None :
68+ self .build_data_frames_from_content (data_source . content )
6969
7070 def raw_columns_to_use (self ):
7171 """
@@ -126,9 +126,9 @@ def apply_numeric_column_adjustments(self, df: pd.DataFrame):
126126 pd.DataFrame: The DataFrame with numeric column adjustments.
127127 """
128128 for str_col , col_dict in self ._numeric_cols .items ():
129- num_col = col_dict [ 'raw column' ] if 'raw column' in col_dict else str_col
130- bins = col_dict [ ' bins' ] if 'bins' in col_dict else None
131- labels = col_dict [ ' labels' ] if 'labels' in col_dict else None
129+ num_col = col_dict . raw_column if hasattr ( col_dict , 'raw_column' ) else str_col
130+ bins = col_dict . bins if hasattr ( col_dict , 'bins' ) else None
131+ labels = col_dict . labels if hasattr ( col_dict , 'labels' ) else None
132132
133133 if num_col in df .columns :
134134 df = bin_dataframe_column (df , num_col , str_col , bins = bins , labels = labels )
@@ -139,7 +139,6 @@ def apply_numeric_column_adjustments(self, df: pd.DataFrame):
139139 # else:
140140 # # Default "N-N" format conversion
141141 # df[str_col] = df[num_col].apply(lambda x: f'{int(x)}-{int(x)}' if pd.notna(x) else x)
142-
143142 return df
144143
145144 def build_data_frames_from_csv (self , filename : str ):
@@ -226,7 +225,7 @@ def create_sheets_from_df(self, df: pd.DataFrame):
226225 if col in df .columns :
227226 df_cumsum = self .calculate_cumulative_sums (df , col )
228227 if col in self ._numeric_cols :
229- labels = self ._numeric_cols [col ].get ( 'labels' , None )
228+ labels = self ._numeric_cols [col ].labels if hasattr ( self . _numeric_cols [ col ], 'labels' ) else None
230229 if labels :
231230 # The first column (e.g., date) remains at index 0.
232231 date_column = df_cumsum .columns [0 ]
@@ -333,25 +332,28 @@ def _process_date_column(self, data_source: dict):
333332 """
334333
335334 # This assumes that the first column is either the date column or does not have useful data
336- if data_source .get ('date' ):
335+ date_value = getattr (data_source , 'date' , None )
336+ if date_value :
337337 self ._df .drop (self ._df .columns [0 ], axis = 1 , inplace = True )
338- self ._df .insert (0 , 'date' , data_source [ 'date' ] , False )
338+ self ._df .insert (0 , 'date' , date_value , False )
339339
340340 self ._df ['date' ] = pd .to_datetime (self ._df ['date' ], errors = 'coerce' )
341341
342342 self ._columns ['date' ] = self ._df .columns [0 ]
343343
344- def _process_columns (self , data_source : dict ):
344+ def _process_columns (self , data_source ):
345345 """
346346 Process and rename columns according to the data source settings.
347347
348348 Args:
349- data_source (dict ): The data source object.
349+ data_source (DataSource ): The data source object.
350350 """
351351 for col in self ._df .columns [1 :]:
352352 col_name = col
353- if 'remove column name text' in data_source :
354- for txt in data_source ['remove column name text' ]:
353+ # Access remove_column_name_text from pydantic model
354+ remove_text = getattr (data_source , 'remove_column_name_text' , None )
355+ if remove_text :
356+ for txt in remove_text :
355357 col_name = col .split (txt )[0 ]
356358 col_name = col_name .rstrip ()
357359 self ._columns [col_name ] = col
0 commit comments