Data Exploration

This notebook demonstrates how to load datasets and explore their contents using gcmprocpy’s data inspection functions.

Note: This notebook requires TIE-GCM or WACCM-X model output files.

[11]:
import warnings
warnings.filterwarnings('ignore')

import gcmprocpy as gy

Loading Datasets

Use load_datasets() to lazily load NetCDF files from a directory. The optional dataset_filter parameter filters filenames.

[12]:
directory = '/glade/work/nikhilr/tiegcm3.0/benchmarks/2.5/seasons/decsol_smin/hist'
dataset_filter = 'sech'

datasets = gy.load_datasets(directory, dataset_filter=dataset_filter)

print(f'Loaded {len(datasets)} files')
for mds in datasets:
    print(f'  {mds.filename} — model: {mds.model}, times: {len(mds._time_values)}')
Loaded 5 files
  decsol_smin_2.5x0.25_sech_001.nc — model: TIE-GCM, times: 24
  decsol_smin_2.5x0.25_sech_002.nc — model: TIE-GCM, times: 24
  decsol_smin_2.5x0.25_sech_003.nc — model: TIE-GCM, times: 24
  decsol_smin_2.5x0.25_sech_004.nc — model: TIE-GCM, times: 24
  decsol_smin_2.5x0.25_sech_005.nc — model: TIE-GCM, times: 24

Listing Timestamps

time_list() returns all unique timestamps across the loaded datasets.

[13]:
times = gy.time_list(datasets)
print(f'Total timestamps: {len(times)}')
print(f'First: {times[0]}')
print(f'Last:  {times[-1]}')
Total timestamps: 120
First: 2002-12-21T01:00:00.000000000
Last:  2002-12-26T00:00:00.000000000

Listing Variables

var_list() returns all variable names (excluding coordinate variables) in sorted order.

[14]:
variables = gy.var_list(datasets)
print(f'Variables ({len(variables)}): {variables}')
Variables (84): ['HE', 'HMF2', 'Kp', 'NE', 'NMF2', 'NO', 'O1', 'O2', 'OP', 'POTEN', 'TE', 'TEC', 'TI', 'TLBC', 'TLBC_NM', 'TN', 'UI_ExB', 'ULBC', 'ULBC_NM', 'UN', 'VI_ExB', 'VLBC', 'VLBC_NM', 'VN', 'WI_ExB', 'WN', 'Z', 'ZG', 'ZMAG', 'al', 'alfac', 'alfad', 'amienh_ncfile', 'amiesh_ncfile', 'bgrddata_ncfile', 'bximf', 'byimf', 'bzimf', 'calendar_advance', 'colfac', 'coupled_mage', 'crit1', 'crit2', 'ctmt_ncfile', 'ctpoten', 'day', 'dtide', 'e1', 'e2', 'ec', 'ed', 'f107a', 'f107d', 'gnsrhs', 'gpi_ncfile', 'grav', 'gswm_mi_di_ncfile', 'gswm_mi_sdi_ncfile', 'gswm_nm_di_ncfile', 'gswm_nm_sdi_ncfile', 'gzigm1', 'gzigm2', 'h1', 'h2', 'hpower', 'imf_ncfile', 'iter', 'joulefac', 'mag', 'mtime', 'ncep_ncfile', 'ntask_mpi', 'p0', 'p0_model', 'saber_ncfile', 'sdtide', 'see_ncfile', 'swden', 'swvel', 'tidi_ncfile', 'timestep', 'ut', 'write_date', 'year']

Listing Levels, Longitudes, and Latitudes

[15]:
levels = gy.level_list(datasets)
print(f'Levels ({len(levels)}): {levels}')

lons = gy.lon_list(datasets)
print(f'\nLongitudes ({len(lons)}): {lons[0]} to {lons[-1]}')

lats = gy.lat_list(datasets)
print(f'Latitudes ({len(lats)}): {lats[0]} to {lats[-1]}')
Levels (114): [np.float64(-7.0), np.float64(-6.875), np.float64(-6.75), np.float64(-6.625), np.float64(-6.5), np.float64(-6.375), np.float64(-6.25), np.float64(-6.125), np.float64(-6.0), np.float64(-5.875), np.float64(-5.75), np.float64(-5.625), np.float64(-5.5), np.float64(-5.375), np.float64(-5.25), np.float64(-5.125), np.float64(-5.0), np.float64(-4.875), np.float64(-4.75), np.float64(-4.625), np.float64(-4.5), np.float64(-4.375), np.float64(-4.25), np.float64(-4.125), np.float64(-4.0), np.float64(-3.875), np.float64(-3.75), np.float64(-3.625), np.float64(-3.5), np.float64(-3.375), np.float64(-3.25), np.float64(-3.125), np.float64(-3.0), np.float64(-2.875), np.float64(-2.75), np.float64(-2.625), np.float64(-2.5), np.float64(-2.375), np.float64(-2.25), np.float64(-2.125), np.float64(-2.0), np.float64(-1.875), np.float64(-1.75), np.float64(-1.625), np.float64(-1.5), np.float64(-1.375), np.float64(-1.25), np.float64(-1.125), np.float64(-1.0), np.float64(-0.875), np.float64(-0.75), np.float64(-0.625), np.float64(-0.5), np.float64(-0.375), np.float64(-0.25), np.float64(-0.125), np.float64(0.0), np.float64(0.125), np.float64(0.25), np.float64(0.375), np.float64(0.5), np.float64(0.625), np.float64(0.75), np.float64(0.875), np.float64(1.0), np.float64(1.125), np.float64(1.25), np.float64(1.375), np.float64(1.5), np.float64(1.625), np.float64(1.75), np.float64(1.875), np.float64(2.0), np.float64(2.125), np.float64(2.25), np.float64(2.375), np.float64(2.5), np.float64(2.625), np.float64(2.75), np.float64(2.875), np.float64(3.0), np.float64(3.125), np.float64(3.25), np.float64(3.375), np.float64(3.5), np.float64(3.625), np.float64(3.75), np.float64(3.875), np.float64(4.0), np.float64(4.125), np.float64(4.25), np.float64(4.375), np.float64(4.5), np.float64(4.625), np.float64(4.75), np.float64(4.875), np.float64(5.0), np.float64(5.125), np.float64(5.25), np.float64(5.375), np.float64(5.5), np.float64(5.625), np.float64(5.75), np.float64(5.875), np.float64(6.0), np.float64(6.125), np.float64(6.25), np.float64(6.375), np.float64(6.5), np.float64(6.625), np.float64(6.75), np.float64(6.875), np.float64(7.0), np.float64(7.125)]

Longitudes (144): -180.0 to 177.5
Latitudes (72): -88.75 to 88.75

Listing Dimensions

[16]:
dims = gy.dim_list(datasets)
print(f'Dimensions: {dims}')
Dimensions: ['dtidedim', 'ilev', 'imlev', 'lat', 'latlon', 'lev', 'lon', 'mlat', 'mlev', 'mlon', 'mtimedim', 'sdtidedim', 'time']

Variable Information

var_info() returns attributes and dimensions for a specific variable across all loaded files.

[17]:
info = gy.var_info(datasets, 'TN')
for fname, details in info.items():
    if details:
        print(f'{fname}:')
        print(f'  units:      {details["attributes"]["units"]}')
        print(f'  long_name:  {details["attributes"]["long_name"]}')
        print(f'  dimensions: {details["dimensions"]}')
        break
decsol_smin_2.5x0.25_sech_001.nc:
  units:      K
  long_name:  NEUTRAL TEMPERATURE
  dimensions: ('time', 'lev', 'lat', 'lon')

Dimension Information

dim_info() returns the size of a specific dimension.

[18]:
for dim_name in ['lat', 'lon', 'lev', 'ilev']:
    info = gy.dim_info(datasets, dim_name)
    for fname, details in info.items():
        if details:
            print(f'{dim_name}: size={details["size"]}')
            break
lat: size=72
lon: size=144
lev: size=57
ilev: size=57

Time Conversion Utilities

get_mtime() converts a datetime to model time (mtime), and get_time() converts mtime back to datetime.

[19]:
t = times[0]
mtime = gy.get_mtime(datasets[0].ds, t)
print(f'Time:  {t}')
print(f'mtime: {mtime}')

t_back = gy.get_time(datasets, mtime)
print(f'Back:  {t_back}')
Time:  2002-12-21T01:00:00.000000000
mtime: [355, 1, 0, 0]
Back:  2002-12-21T01:00:00.000000000

Cleanup

[20]:
gy.close_datasets(datasets)
print('Datasets closed.')
Datasets closed.