Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nc2pt/climatedata.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class ClimateModel:
hr_ref: Optional[ClimateVariable] = None
engine: Optional[str] = None # Optional engine override for this model
emulation_data: Optional[bool] = False # Optional bool to handle metadata ingestion
loader: Optional[str] = "default" # Optional flag for UBC WRF specific io 'ubc_wrf'
alignment_pipeline: List[str] = field(default_factory=lambda: [
"temporal_crop", "regrid", "spatial_crop", "coarsen", "user_defined_transforms", "split_data"
])
Expand Down
5 changes: 3 additions & 2 deletions nc2pt/conf/climate_models/hr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

_target_: nc2pt.climatedata.ClimateModel
name: hr
info: "High Resolution USask WRF, Western Canada"
info: "High Resolution UBC WRF, Western Canada"
alignment_pipeline: ["temporal_crop", "spatial_crop", "user_defined_transforms", "split_data"]
loader: "ubc_wrf"

climate_variables:
- ${internal.hr_uas}
- ${internal.hr_vas}
- ${internal.hr_tas}
- ${internal.hr_pr}
- ${internal.hr_ps}
10 changes: 10 additions & 0 deletions nc2pt/conf/climate_models/hr/ps.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@

_target_: nc2pt.climatedata.ClimateVariable
name: "ps"
alternative_names: ["PSFC"]
path: ${internal.paths.hr.ps}
is_west_negative: true
apply_standardize: false
apply_normalize: true
invariant: false
transform: []
5 changes: 4 additions & 1 deletion nc2pt/conf/climate_models/hr_invariant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ engine: netcdf4
alignment_pipeline: ['spatial_crop']

climate_variables:
- ${internal.hr_invariant_topo}
- ${internal.hr_invariant_topo}
- ${internal.hr_invariant_land_mask}
- ${internal.hr_invariant_land_use}
- ${internal.hr_invariant_surface_roughness}
11 changes: 11 additions & 0 deletions nc2pt/conf/climate_models/hr_invariant/land_mask.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Land mask used in WRF simulations (HR Model)

_target_: nc2pt.climatedata.ClimateVariable
name: "land_mask"
alternative_names: ["LANDMASK"]
path: ${internal.paths.hr_invariant}
is_west_negative: false
invariant: true
apply_standardize: false
apply_normalize: true
transform: []
11 changes: 11 additions & 0 deletions nc2pt/conf/climate_models/hr_invariant/land_use.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Topography used in WRF simulations (HR Model)

_target_: nc2pt.climatedata.ClimateVariable
name: "land_use"
alternative_names: ["LU_INDEX"]
path: ${internal.paths.hr_invariant}
is_west_negative: false
invariant: true
apply_standardize: false
apply_normalize: true
transform: []
11 changes: 11 additions & 0 deletions nc2pt/conf/climate_models/hr_invariant/surface_roughness.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Topography used in WRF simulations (HR Model)

_target_: nc2pt.climatedata.ClimateVariable
name: "surface_roughness"
alternative_names: ["VAR_SSO"]
path: ${internal.paths.hr_invariant}
is_west_negative: false
invariant: true
apply_standardize: false
apply_normalize: true
transform: []
8 changes: 4 additions & 4 deletions nc2pt/conf/climate_models/lr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ alignment_pipeline: ["temporal_crop", "regrid", "spatial_crop", "coarsen", "user
hr_ref:
_target_: nc2pt.climatedata.ClimateVariable
name: "hr_ref"
alternative_names: ["T2"]
alternative_names: ["HGT"]
path: ${internal.paths.hr_ref}
is_west_negative: true
invariant: true

climate_variables:
# Activate variables to include in the LR model
- ${internal.lr_uas}
#- ${internal.lr_vas}
#- ${internal.lr_tas}
#- ${internal.lr_pr}
- ${internal.lr_vas}
- ${internal.lr_tas}
- ${internal.lr_ps}
9 changes: 9 additions & 0 deletions nc2pt/conf/climate_models/lr/ps.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
_target_: nc2pt.climatedata.ClimateVariable
name: "ps"
alternative_names: ["PSFC"]
path: ${internal.paths.lr.ps}
is_west_negative: false
apply_standardize: false
apply_normalize: true
invariant: false
transform: []
2 changes: 1 addition & 1 deletion nc2pt/conf/climate_models/lr/tas.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ _target_: nc2pt.climatedata.ClimateVariable
name: "tas"
alternative_names: ["T2", "surface temperature"]
path: ${internal.paths.lr.tas}
is_west_negative: true
is_west_negative: false
apply_standardize: false
apply_normalize: true
invariant: false
Expand Down
4 changes: 2 additions & 2 deletions nc2pt/conf/climate_models/lr/uas.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ name: "uas"
alternative_names: ["U10", "u10", "uas"]
path: ${internal.paths.lr.uas}
is_west_negative: false
apply_standardize: true
apply_normalize: false
apply_standardize: false
apply_normalize: true
invariant: false
coarsening_method: "mean"
transform: []
5 changes: 4 additions & 1 deletion nc2pt/conf/climate_models/lr_invariant.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,7 @@ engine: netcdf4
alignment_pipeline: ['spatial_crop', 'coarsen']

climate_variables:
- ${internal.hr_invariant_topo}
- ${internal.hr_invariant_topo}
- ${internal.hr_invariant_land_mask}
- ${internal.hr_invariant_land_use}
- ${internal.hr_invariant_surface_roughness}
4 changes: 2 additions & 2 deletions nc2pt/conf/coords.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
coords:
- _target_: nc2pt.climatedata.ClimateDimension
name: lat
alternative_names: ["latitude", "Lat", "Latitude"]
alternative_names: ["latitude", "Lat", "Latitude","XLAT"]
chunksize: 100

- _target_: nc2pt.climatedata.ClimateDimension
name: lon
alternative_names: ["longitude", "Long", "Lon", "Longitude"]
alternative_names: ["longitude", "Long", "Lon", "Longitude", "XLONG"]
chunksize: 100
6 changes: 6 additions & 0 deletions nc2pt/conf/injections.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ defaults:
# HR Invariant Model
- climate_models/hr_invariant@internal._hr_invariant_model
- climate_models/hr_invariant/topo@internal._hr_invariant_topo
- climate_models/hr_invariant/land_mask@internal._hr_invariant_land_mask
- climate_models/hr_invariant/land_use@internal._hr_invariant_land_use
- climate_models/hr_invariant/surface_roughness@internal._hr_invariant_surface_roughness

# LR_emulation Model
- climate_models/lr_emulation@internal._lr_emulation_model
Expand Down Expand Up @@ -63,6 +66,9 @@ internal:
# HR Invariant aliases
hr_invariant: ${internal._hr_invariant_model}
hr_invariant_topo: ${internal._hr_invariant_topo}
hr_invariant_land_mask: ${internal._hr_invariant_land_mask}
hr_invariant_land_use: ${internal._hr_invariant_land_use}
hr_invariant_surface_roughness: ${internal._hr_invariant_surface_roughness}

# LR Emulation aliases
lr_emulation: ${internal._lr_emulation_model}
Expand Down
20 changes: 10 additions & 10 deletions nc2pt/conf/paths.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,16 @@ paths:
# ────────────────────────────────────────
# User-dependent path for HR reference grid
# ────────────────────────────────────────
hr_ref: /home/username/projects/nc2pt/nc2pt/data/hr_ref.nc
hr_ref: /net/venus/kenes/data/sbeairsto/UBC_WRF/ubc_wrf_staticfields_d03.nc # UBC_WRF reference grid (should be accessible to everyone)

# ────────────────────────────────────────
# Shared data paths – High-Resolution WRF (HR)
# ────────────────────────────────────────
hr:
uas: /net/venus/kenes/downloaded-data/acannon/USask-WRF-WCA/ctl-wrf-wca/U10/*.nc
vas: /net/venus/kenes/downloaded-data/acannon/USask-WRF-WCA/ctl-wrf-wca/V10/*.nc
pr: /net/venus/kenes/downloaded-data/acannon/USask-WRF-WCA/ctl-wrf-wca/PREC/*.nc
tas: /net/venus/kenes/downloaded-data/acannon/USask-WRF-WCA/ctl-wrf-wca/T2/*.nc
uas: /net/venus/blue/data/WRF/kikridaust/Share_Data/SUBSETTED/*/metgrid_*/COMPRESSED_SUBSETTED_d03_metgrid_*.nc
vas: /net/venus/blue/data/WRF/kikridaust/Share_Data/SUBSETTED/*/metgrid_*/COMPRESSED_SUBSETTED_d03_metgrid_*.nc
pr: /net/venus/blue/data/WRF/kikridaust/Share_Data/SUBSETTED/*/metgrid_*/COMPRESSED_SUBSETTED_d03_metgrid_*.nc
tas: /net/venus/blue/data/WRF/kikridaust/Share_Data/SUBSETTED/*/metgrid_*/COMPRESSED_SUBSETTED_d03_metgrid_*.nc

# ────────────────────────────────────────
# Shared data paths – Low-Resolution ERA5 (LR)
Expand All @@ -30,16 +30,16 @@ paths:
# ────────────────────────────────────────
# Shared invariant fields (e.g., orography, landmask)
# ────────────────────────────────────────
hr_invariant: /net/venus/kenes/downloaded-data/acannon/USask-WRF-WCA/CA4km_const.nc
hr_invariant: /net/venus/kenes/data/sbeairsto/UBC_WRF/ubc_wrf_staticfields_d03.nc # UBC_WRF static fields (should be accessible to everyone)

# ────────────────────────────────────────
# User-dependent path for – Low-Resolution ERA5 Emulation Metadata (LR Emulation)
# ────────────────────────────────────────
lr_emulation:
uas_metadata: /home/username/data/path/lr_uas_feature_scaling_metadata.json
vas_metadata: /home/username/data/path/lr_vas_feature_scaling_metadata.json
pr_metadata: /home/username/data/path/lr_pr_feature_scaling_metadata.json
tas_metadata: /home/username/data/path/lr_tas_feature_scaling_metadata.json
uas_metadata: /home/username/data/path/lr_uas_feature_scaling_metadata.json
vas_metadata: /home/username/data/path/lr_vas_feature_scaling_metadata.json
pr_metadata: /home/username/data/path/lr_pr_feature_scaling_metadata.json
tas_metadata: /home/username/data/path/lr_tas_feature_scaling_metadata.json

# ────────────────────────────────────────
# User-dependent path for My Model Template
Expand Down
16 changes: 8 additions & 8 deletions nc2pt/conf/select.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@ select:
time:
# Define the full available date range
range:
start: "20001201T06:00:00"
end: "20010101T12:00:00"
start: "20140101T00:00:00"
end: "20170101T00:00:00"

# Select years to reserve for testing and validation
# Remaining years are used for training
test_years: [2001]
validation_years: [None]
test_years: [2014]
validation_years: [2015]

# ───── Spatial subsetting ─────
spatial:
scale_factor: 8 # Factor to downscale HR data to LR
x:
first_index: 110
last_index: 622
first_index: 125
last_index: 253
y:
first_index: 20
last_index: 532
first_index: 100
last_index: 228
17 changes: 15 additions & 2 deletions nc2pt/io.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,34 @@
from datetime import datetime
import xarray as xr
import pandas as pd
import re
from pathlib import Path


def load_grid(path: str, engine: str = "netcdf4", chunks: int = 250) -> xr.Dataset:
def load_grid(path: str, engine: str = "netcdf4", loader: str = "default", chunks: int = 250) -> xr.Dataset:
"""Load the grid to regrid to.

Parameters
----------
path : str
Path to the grid to regrid to.
engine : str
Engine to use for loading.
chunks : int
Chunk size for dask.
loader : str
Loader type: 'default', 'ubc_wrf'

Returns
-------
grid : xarray.Dataset
Grid to regrid to.
"""
if loader == "ubc_wrf":
from ubc_wrf_io import load_ubc_wrf
return load_ubc_wrf(path, engine=engine, chunks=chunks)

# Default loader
if "*" in path or isinstance(path, list):
with xr.open_mfdataset(path, engine=engine, parallel=True, chunks="auto") as ds:
return ds
Expand All @@ -41,4 +54,4 @@ def write_to_zarr(ds: xr.Dataset, path: str) -> None:
"history": f"Created by {__file__} on {datetime.now()}",
}
)
ds.to_zarr(f"{path}.zarr", mode="w", consolidated=True)
ds.to_zarr(f"{path}.zarr", mode="w", consolidated=True)
3 changes: 2 additions & 1 deletion nc2pt/preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ def preprocess_variables(model: ClimateModel, climdata: ClimateData) -> None:
# Instantiates climate_variable object in cliamtedata.py
climate_variable = instantiate(climate_variable)
engine = model.engine or climdata.compute.engine
loader = model.loader

ds = load_grid(climate_variable.path, engine=engine)
ds = load_grid(climate_variable.path, engine=engine, loader=loader)

start = timer()
logging.info(
Expand Down
79 changes: 79 additions & 0 deletions nc2pt/ubc_wrf_io.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
"""UBC WRF-specific data loading utilities."""

import xarray as xr
import pandas as pd
import re
from pathlib import Path


def get_ubc_wrf_file_list(pattern: str) -> list:
"""Get file list"""
import glob

print("Scanning for files (this may take ~30s over NFS)...")
files = sorted(glob.glob(pattern, recursive=True))
print(f"Found {len(files)} files")

return files


def add_ubc_wrf_timesteps(ds):
"""
Preprocess WRF metgrid files with dummy Time coordinate.
Extracts date from filename and creates proper time axis.
Drops last timestep (corresponding to next month's 00:00:00).
"""
# Get filename from dataset encoding
filepath = ds.encoding.get('source', '')

# Extract year and month: metgrid_YYYY_MM.nc
match = re.search(r'metgrid_(\d{4})_(\d{2})\.nc$', filepath)

if match:
year, month = match.groups()
start_date = f"{year}-{month}-01"

# Drop last timestep (spin-up for next month)
ds = ds.isel(Times=slice(None, -1))
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just a note, but the time coordinate in the COMPRESSED_RAIN ubc wrf files is just 'time', as opposed to the COMPRESSED_SUBSETTED and COMPRESSED_SNOW files where the time coordinate is 'Times'. Maybe it's possible to check the name of the time coordinate and adjust accordingly, so that it can process ubc wrf precip?

(Note that the precip and snow variables sometimes found in COMPRESSED_SUBSETTED are not necessarily correct and are to be ignored, as per chatting with Tim.)

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just another quick note on the precip files should we choose to consider them--they have a different number of time steps per file than the SUBSETTED files.

For instance, COMPRESSED_RAIN_d03_metgrid_1999_09.nc has 720 timesteps, ranging from 1999-09-01_01:00:00 to 1999-10-01_00:00:00 (no Sept 00:00:00 time step!). As per Tim, this is because the precipitation values are valid for the preceding hour.


# Create time coordinates for remaining timesteps
n_times = ds.sizes['Times']
time_coords = pd.date_range(start=start_date, periods=n_times, freq='h')
ds = ds.assign_coords(Times=time_coords)
else:
# For invariant fields or unparseable files, drop Time if it's dummy
if 'Times' in ds.dims and len(ds.Times) <= 2:
ds = ds.isel(Times=0, drop=True)

return ds


def load_ubc_wrf(path: str, engine: str = "netcdf4", chunks: str = "auto") -> xr.Dataset:
"""Load WRF metgrid files with proper time coordinate handling."""

if "*" in path or isinstance(path, list):
if isinstance(path, str):
file_list = get_ubc_wrf_file_list(path)
else:
file_list = path

print(f"Opening {len(file_list)} files...")
print("Note: Dropping last timestep of each month (corresponds to M+1 00:00:00)") # Log once here

ds = xr.open_mfdataset(
file_list,
engine=engine,
parallel=True,
chunks='auto',
preprocess=add_ubc_wrf_timesteps,
combine='nested',
concat_dim='Times',
combine_attrs='override',
data_vars='minimal',
coords='minimal',
compat='override'
)
return ds
else:
ds = xr.open_dataset(path, engine=engine, chunks=chunks)
return add_ubc_wrf_timesteps(ds)
Loading
Loading