Reading data with `read_grid()`

Files are in the data directory

library(here)
library(harp)
library(dplyr)
library(forcats)
library(scico)

t2m <- read_grid(
  here("data/grib/AAEPS/2021/01/21/00/mbr000/fc2021012100+000grib"), 
  "T2m"
)

## Reading /home/andrewts/harp-documentation/md/harp_training_2022/data/grib/AAEPS/2021/01/21/00/mbr000/fc2021012100+000grib

t2m

## eidb : T Temperature K 
## Time:
##  2021/01/21 00:00 +0
## Domain summary:
## 189 x 198 domain
## Projection summary:
## proj= lcc 
## NE = ( 33.94219 , 78.64862 )
## SW = ( 3.104 , 78.223 )
## Data summary:
## 252.1425 261.2941 264.8183 264.1619 267.4577 270.4696

A geofield can be plotted using the plot_field function from the harpVis package.

plot_field(t2m)

plot_field(t2m, palette = rev(heat.colors(256)))

plot_field(t2m - 273, palette = rev(heat.colors(256)))

plot_field(t2m, zoom_centre = c(18, 79), zoom_length = c(60, 40))

NetCDF requires a bit more information from the user using netcdf_opts() to set the names of dimensions.

netcdf_opts()

## $options_set
## [1] "none"
## 
## $param_find
## list()
## 
## $proj4_var
## [1] "projection_lambert"
## 
## $proj4_att
## [1] "proj4"
## 
## $proj4
## NULL
## 
## $x_dim
## [1] "x"
## 
## $y_dim
## [1] "y"
## 
## $lon_var
## [1] "longitude"
## 
## $lat_var
## [1] "latitude"
## 
## $x_rev
## [1] FALSE
## 
## $y_rev
## [1] FALSE
## 
## $dx
## NULL
## 
## $dy
## NULL
## 
## $z_var
## [1] NA
## 
## $member_var
## [1] NA
## 
## $time_var
## [1] "time"
## 
## $ref_time_var
## [1] NA
## 
## $force_param_name
## [1] FALSE

my_opts <- netcdf_opts(
  ref_time_var = "forecast_reference_time",
  member_var   = "ensemble_member",
  z_var        = "height0"
)

read_grid(
  here(
    "data/netcdf/meps/2022/02/12/meps_lagged_6_h_subset_2_5km_20220212T00Z.nc"
  ), 
  "Pcp", 
  file_format_opts = my_opts
)

## Reading /home/andrewts/harp-documentation/md/harp_training_2022/data/netcdf/meps/2022/02/12/meps_lagged_6_h_subset_2_5km_20220212T00Z.nc

## <harp_geolist[750]>
##  [[1]] <numeric geofield [121 x 206] Min = 0.000 Max = 0.000 Mean = 0.000>
##  [[2]] <numeric geofield [121 x 206] Min = 0.000 Max = 0.000 Mean = 0.000>
##  [[3]] <numeric geofield [121 x 206] Min = 0.000 Max = 0.000 Mean = 0.000>
##  [[4]] <numeric geofield [121 x 206] Min = 0.000 Max = 8.843 Mean = 0.521>
##  [[5]] <numeric geofield [121 x 206] Min = 0.000 Max = 9.689 Mean = 0.641>
##  [[6]] <numeric geofield [121 x 206] Min = 0.000 Max = 4.809 Mean = 0.190>
##  [[7]] <numeric geofield [121 x 206] Min = 0.000 Max = 7.642 Mean = 0.196>
##  [[8]] <numeric geofield [121 x 206] Min = 0.000 Max = 8.157 Mean = 0.504>
##  [[9]] <numeric geofield [121 x 206] Min = 0.000 Max = 7.071 Mean = 0.187>
## [[10]] <numeric geofield [121 x 206] Min = 0.000 Max = 0.000 Mean = 0.000>
## # 740 more geofields
## # Use `print(n = ...)` to see more

That’s a lot data to keep track of, so a data frame would be a better choice of output format

precip <- read_grid(
  here(
    "data/netcdf/meps/2022/02/13/meps_lagged_6_h_subset_2_5km_20220213T00Z.nc"
  ), 
  "Pcp", 
  file_format_opts = my_opts,
  data_frame       = TRUE
)

## Reading /home/andrewts/harp-documentation/md/harp_training_2022/data/netcdf/meps/2022/02/13/meps_lagged_6_h_subset_2_5km_20220213T00Z.nc

head(precip)

## # A tibble: 6 × 9
##   fcst_dttm           valid_dttm          lead_time parameter level_type level
##   <dttm>              <dttm>                  <dbl> <chr>     <chr>      <dbl>
## 1 2022-02-13 00:00:00 2022-02-13 00:00:00         0 Pcp       unknown        0
## 2 2022-02-13 00:00:00 2022-02-13 00:00:00         0 Pcp       unknown        0
## 3 2022-02-13 00:00:00 2022-02-13 00:00:00         0 Pcp       unknown        0
## 4 2022-02-13 00:00:00 2022-02-13 00:00:00         0 Pcp       unknown        0
## 5 2022-02-13 00:00:00 2022-02-13 00:00:00         0 Pcp       unknown        0
## 6 2022-02-13 00:00:00 2022-02-13 00:00:00         0 Pcp       unknown        0
## # ℹ 3 more variables: units <chr>, gridded_data <geolist>, members <int[1d]>

We can get the hourly precipitation by using the decum() function. decum() only works on We need to tell it the decumulation time - in this case 1 hour, the column(s) we want to decumulate (gridded_data), the column that contains the time we want to use to compute the decumulation time (can be lead_time or valid_dttm), and the column to group by if any - in this case we want to group by the members column.

precip <- decum(
  precip, 
  decum_time = "1h", 
  cols       = gridded_data, 
  time_col   = lead_time, 
  group_col  = members
)

We can then plot hourly precipitation using ggplot and the geom_georaster() geom.

ggplot(filter(precip, members == 0)) + 
  geom_georaster(aes(geofield = gridded_data)) +  
  geom_polygon(
    aes(x, y, group = group), 
    get_map(dom = precip$gridded_data[[1]]), 
    fill = NA, 
    colour = "grey80"
  ) + 
   scale_fill_scico(
    "mm",
    direction = -1,
    palette   = "oslo", 
    limits    = c(0.125, NA), 
    na.value  = NA 
  ) +
  facet_wrap(vars(fct_inorder(format(valid_dttm, "%a %H:%M")))) + 
  coord_equal(expand = FALSE) + 
  theme_void() + 
  theme(
    panel.background = element_rect(fill = NA, colour = "grey30"), 
    plot.margin = unit(c(1, 1, 1, 1), "cm")
  )

## Warning: Removed 303268 rows containing missing values (`geom_raster()`).

ggplot(filter(precip, lead_time == 5)) + 
  geom_georaster(aes(geofield = gridded_data)) +  
  geom_polygon(
    aes(x, y, group = group), 
    get_map(dom = get_domain(precip$gridded_data)), 
    fill = NA, 
    colour = "grey80"
  ) + 
  scale_fill_scico(
    "mm",
    direction = -1,
    palette   = "oslo", 
    limits    = c(0.125, NA), 
    na.value  = NA 
  ) +
  facet_wrap(vars(paste("Member", formatC(members, width = 3, flag = "0")))) + 
  coord_equal(expand = FALSE) + 
  theme_void() + 
  theme(
    panel.background = element_rect(fill = NA, colour = "grey30"), 
    plot.margin = unit(c(1, 1, 1, 1), "cm")
  )

## Warning: Removed 350797 rows containing missing values (`geom_raster()`).

Reading data with `read_forecast()`

read_forecast() is used to read in data from multiple files. The output is given classes depending on whether it is gridded or point data, and whether it is an ensemble or deterministic forecast. File templates are used instead of file names with substitution placeholders for things like year, month, day, hour, ensemble member, lead time.

template <- "{fcst_model}/{YYYY}/{MM}/{DD}/{HH}/mbr{MBR3}/fc{YYYY}{MM}{DD}{HH}+{LDT3}grib"
t2m <- read_forecast(
  dttm          = seq_dttm(2021012100, 2021012200, "1d"),
  fcst_model    = "AAEPS",
  parameter     = "T2m",
  lead_time     = seq(0, 24, 3),
  members       = seq(0, 3),
  file_path     = here("data/grib"),
  file_template = template,
  return_data   = TRUE
)
t2m

## ::ensemble gridded forecast:: # A tibble: 18 × 14
##    fcst_model fcst_dttm           lead_time parameter valid_dttm         
##    <chr>      <dttm>                  <dbl> <chr>     <dttm>             
##  1 AAEPS      2021-01-21 00:00:00         0 T2m       2021-01-21 00:00:00
##  2 AAEPS      2021-01-21 00:00:00         3 T2m       2021-01-21 03:00:00
##  3 AAEPS      2021-01-21 00:00:00         6 T2m       2021-01-21 06:00:00
##  4 AAEPS      2021-01-21 00:00:00         9 T2m       2021-01-21 09:00:00
##  5 AAEPS      2021-01-21 00:00:00        12 T2m       2021-01-21 12:00:00
##  6 AAEPS      2021-01-21 00:00:00        15 T2m       2021-01-21 15:00:00
##  7 AAEPS      2021-01-21 00:00:00        18 T2m       2021-01-21 18:00:00
##  8 AAEPS      2021-01-21 00:00:00        21 T2m       2021-01-21 21:00:00
##  9 AAEPS      2021-01-21 00:00:00        24 T2m       2021-01-22 00:00:00
## 10 AAEPS      2021-01-22 00:00:00         0 T2m       2021-01-22 00:00:00
## 11 AAEPS      2021-01-22 00:00:00         3 T2m       2021-01-22 03:00:00
## 12 AAEPS      2021-01-22 00:00:00         6 T2m       2021-01-22 06:00:00
## 13 AAEPS      2021-01-22 00:00:00         9 T2m       2021-01-22 09:00:00
## 14 AAEPS      2021-01-22 00:00:00        12 T2m       2021-01-22 12:00:00
## 15 AAEPS      2021-01-22 00:00:00        15 T2m       2021-01-22 15:00:00
## 16 AAEPS      2021-01-22 00:00:00        18 T2m       2021-01-22 18:00:00
## 17 AAEPS      2021-01-22 00:00:00        21 T2m       2021-01-22 21:00:00
## 18 AAEPS      2021-01-22 00:00:00        24 T2m       2021-01-23 00:00:00
## # ℹ 9 more variables: step_range <chr>, level_type <chr>, level <int>,
## #   units <chr>, fcst_cycle <chr>, AAEPS_mbr000 <geolist>,
## #   AAEPS_mbr001 <geolist>, AAEPS_mbr002 <geolist>, AAEPS_mbr003 <geolist>

class(t2m)

## [1] "harp_ens_grid_df" "harp_grid_df"     "harp_df"          "tbl_df"          
## [5] "tbl"              "data.frame"

plot_field(t2m, plot_col = AAEPS_mbr000, fcst_dttm = 2021012100, lead_time = 9)

Reading lagged models

With read_forecast() the lags argument can be used to read in data from lagged ensembles with the lagged members in different files. Here the lags are used to construct file names based on modified analysis date times and lead times resulting from lags.

  read_forecast(
    dttm          = seq_dttm(2019021700, 2019021706, "6h"),
    fcst_model    = "CMEPS_prod",
    parameter     = "S10m",
    lead_time     = seq(0, 12, 3),
    members       = c(0, 1, 3, 4, 5, 6),
    lags          = c(0, 0, 2, 2, 1, 1),
    file_path     = here("data/vfld"),
    file_template = "vfld_eps",
    return_data   = TRUE
  )

## ::ensemble point forecast:: # A tibble: 12,030 × 16
##    fcst_model fcst_dttm           lead_time parameter   SID   lat   lon units
##    <chr>      <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1001  70.9 -8.67 m/s  
##  2 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1010  69.3 16.1  m/s  
##  3 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1014  69.2 17.9  m/s  
##  4 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1015  69.6 17.8  m/s  
##  5 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1018  69.2 16.0  m/s  
##  6 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1021  70.0 18.7  m/s  
##  7 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1022  70   18.7  m/s  
##  8 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1023  69.1 18.5  m/s  
##  9 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1025  69.7 18.9  m/s  
## 10 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1026  69.7 18.9  m/s  
## # ℹ 12,020 more rows
## # ℹ 8 more variables: valid_dttm <dttm>, fcst_cycle <chr>,
## #   CMEPS_prod_mbr000 <dbl>, CMEPS_prod_mbr001 <dbl>,
## #   CMEPS_prod_mbr003_lag2h <dbl>, CMEPS_prod_mbr004_lag2h <dbl>,
## #   CMEPS_prod_mbr005_lag1h <dbl>, CMEPS_prod_mbr006_lag1h <dbl>

We can also read in more than one model at a time, but if they have different members and lags we need to use named lists.

  read_forecast(
    dttm          = seq_dttm(2019021700, 2019021706, "6h"),
    fcst_model    = c("MEPS_prod", "CMEPS_prod"),
    parameter     = "S10m",
    lead_time     = seq(0, 12, 3),
    members       = list(
      MEPS_prod  = seq(0, 3), 
      CMEPS_prod = c(0, 3, 5)
    ),
    lags          = list(
      CMEPS_prod = c(0, 2, 1)
    ),
    file_path     = here("data/vfld"),
    file_template = "vfld_eps",
    return_data   = TRUE
  )

## • CMEPS_prod
## ::ensemble point forecast:: # A tibble: 12,030 × 13
##    fcst_model fcst_dttm           lead_time parameter   SID   lat   lon units
##    <chr>      <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1001  70.9 -8.67 m/s  
##  2 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1010  69.3 16.1  m/s  
##  3 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1014  69.2 17.9  m/s  
##  4 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1015  69.6 17.8  m/s  
##  5 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1018  69.2 16.0  m/s  
##  6 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1021  70.0 18.7  m/s  
##  7 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1022  70   18.7  m/s  
##  8 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1023  69.1 18.5  m/s  
##  9 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1025  69.7 18.9  m/s  
## 10 CMEPS_prod 2019-02-17 00:00:00         0 S10m       1026  69.7 18.9  m/s  
## # ℹ 12,020 more rows
## # ℹ 5 more variables: valid_dttm <dttm>, fcst_cycle <chr>,
## #   CMEPS_prod_mbr000 <dbl>, CMEPS_prod_mbr003_lag2h <dbl>,
## #   CMEPS_prod_mbr005_lag1h <dbl>
## 
## • MEPS_prod
## ::ensemble point forecast:: # A tibble: 12,030 × 14
##    fcst_model fcst_dttm           lead_time parameter   SID   lat   lon units
##    <chr>      <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 MEPS_prod  2019-02-17 00:00:00         0 S10m       1001  70.9 -8.67 m/s  
##  2 MEPS_prod  2019-02-17 00:00:00         0 S10m       1010  69.3 16.1  m/s  
##  3 MEPS_prod  2019-02-17 00:00:00         0 S10m       1014  69.2 17.9  m/s  
##  4 MEPS_prod  2019-02-17 00:00:00         0 S10m       1015  69.6 17.8  m/s  
##  5 MEPS_prod  2019-02-17 00:00:00         0 S10m       1018  69.2 16.0  m/s  
##  6 MEPS_prod  2019-02-17 00:00:00         0 S10m       1021  70.0 18.7  m/s  
##  7 MEPS_prod  2019-02-17 00:00:00         0 S10m       1022  70   18.7  m/s  
##  8 MEPS_prod  2019-02-17 00:00:00         0 S10m       1023  69.1 18.5  m/s  
##  9 MEPS_prod  2019-02-17 00:00:00         0 S10m       1025  69.7 18.9  m/s  
## 10 MEPS_prod  2019-02-17 00:00:00         0 S10m       1026  69.7 18.9  m/s  
## # ℹ 12,020 more rows
## # ℹ 6 more variables: valid_dttm <dttm>, fcst_cycle <chr>,
## #   MEPS_prod_mbr000 <dbl>, MEPS_prod_mbr001 <dbl>, MEPS_prod_mbr002 <dbl>,
## #   MEPS_prod_mbr003 <dbl>

Read and transform examples - part 1

Andrew Singleton

16/02/2022 - updated 6/11/2023 for harp v0.2.0

Reading data with `read_grid()`

Reading data with `read_forecast()`

Reading lagged models

Read and transform examples - part 1

Andrew Singleton

16/02/2022 - updated 6/11/2023 for harp v0.2.0

Reading data with read_grid()

Reading data with read_forecast()

Reading lagged models

Reading data with `read_grid()`

Reading data with `read_forecast()`