read_grid()
Files are in the data directory
library(here)
library(harp)
library(dplyr)
library(forcats)
library(scico)
t2m <- read_grid(
here("data/grib/AAEPS/2021/01/21/00/mbr000/fc2021012100+000grib"),
"T2m"
)
## Reading /home/andrewts/harp-documentation/md/harp_training_2022/data/grib/AAEPS/2021/01/21/00/mbr000/fc2021012100+000grib
t2m
## eidb : T Temperature K
## Time:
## 2021/01/21 00:00 +0
## Domain summary:
## 189 x 198 domain
## Projection summary:
## proj= lcc
## NE = ( 33.94219 , 78.64862 )
## SW = ( 3.104 , 78.223 )
## Data summary:
## 252.1425 261.2941 264.8183 264.1619 267.4577 270.4696
A geofield can be plotted using the plot_field function from the
harpVis
package.
plot_field(t2m)
plot_field(t2m, palette = rev(heat.colors(256)))
plot_field(t2m - 273, palette = rev(heat.colors(256)))
plot_field(t2m, zoom_centre = c(18, 79), zoom_length = c(60, 40))
NetCDF requires a bit more information from the user using
netcdf_opts()
to set the names of dimensions.
netcdf_opts()
## $options_set
## [1] "none"
##
## $param_find
## list()
##
## $proj4_var
## [1] "projection_lambert"
##
## $proj4_att
## [1] "proj4"
##
## $proj4
## NULL
##
## $x_dim
## [1] "x"
##
## $y_dim
## [1] "y"
##
## $lon_var
## [1] "longitude"
##
## $lat_var
## [1] "latitude"
##
## $x_rev
## [1] FALSE
##
## $y_rev
## [1] FALSE
##
## $dx
## NULL
##
## $dy
## NULL
##
## $z_var
## [1] NA
##
## $member_var
## [1] NA
##
## $time_var
## [1] "time"
##
## $ref_time_var
## [1] NA
##
## $force_param_name
## [1] FALSE
my_opts <- netcdf_opts(
ref_time_var = "forecast_reference_time",
member_var = "ensemble_member",
z_var = "height0"
)
read_grid(
here(
"data/netcdf/meps/2022/02/12/meps_lagged_6_h_subset_2_5km_20220212T00Z.nc"
),
"Pcp",
file_format_opts = my_opts
)
## Reading /home/andrewts/harp-documentation/md/harp_training_2022/data/netcdf/meps/2022/02/12/meps_lagged_6_h_subset_2_5km_20220212T00Z.nc
## <harp_geolist[750]>
## [[1]] <numeric geofield [121 x 206] Min = 0.000 Max = 0.000 Mean = 0.000>
## [[2]] <numeric geofield [121 x 206] Min = 0.000 Max = 0.000 Mean = 0.000>
## [[3]] <numeric geofield [121 x 206] Min = 0.000 Max = 0.000 Mean = 0.000>
## [[4]] <numeric geofield [121 x 206] Min = 0.000 Max = 8.843 Mean = 0.521>
## [[5]] <numeric geofield [121 x 206] Min = 0.000 Max = 9.689 Mean = 0.641>
## [[6]] <numeric geofield [121 x 206] Min = 0.000 Max = 4.809 Mean = 0.190>
## [[7]] <numeric geofield [121 x 206] Min = 0.000 Max = 7.642 Mean = 0.196>
## [[8]] <numeric geofield [121 x 206] Min = 0.000 Max = 8.157 Mean = 0.504>
## [[9]] <numeric geofield [121 x 206] Min = 0.000 Max = 7.071 Mean = 0.187>
## [[10]] <numeric geofield [121 x 206] Min = 0.000 Max = 0.000 Mean = 0.000>
## # 740 more geofields
## # Use `print(n = ...)` to see more
That’s a lot data to keep track of, so a data frame would be a better choice of output format
precip <- read_grid(
here(
"data/netcdf/meps/2022/02/13/meps_lagged_6_h_subset_2_5km_20220213T00Z.nc"
),
"Pcp",
file_format_opts = my_opts,
data_frame = TRUE
)
## Reading /home/andrewts/harp-documentation/md/harp_training_2022/data/netcdf/meps/2022/02/13/meps_lagged_6_h_subset_2_5km_20220213T00Z.nc
head(precip)
## # A tibble: 6 × 9
## fcst_dttm valid_dttm lead_time parameter level_type level
## <dttm> <dttm> <dbl> <chr> <chr> <dbl>
## 1 2022-02-13 00:00:00 2022-02-13 00:00:00 0 Pcp unknown 0
## 2 2022-02-13 00:00:00 2022-02-13 00:00:00 0 Pcp unknown 0
## 3 2022-02-13 00:00:00 2022-02-13 00:00:00 0 Pcp unknown 0
## 4 2022-02-13 00:00:00 2022-02-13 00:00:00 0 Pcp unknown 0
## 5 2022-02-13 00:00:00 2022-02-13 00:00:00 0 Pcp unknown 0
## 6 2022-02-13 00:00:00 2022-02-13 00:00:00 0 Pcp unknown 0
## # ℹ 3 more variables: units <chr>, gridded_data <geolist>, members <int[1d]>
We can get the hourly precipitation by using the decum()
function. decum()
only works on We need to tell it the
decumulation time - in this case 1 hour, the column(s) we want to
decumulate (gridded_data), the column that contains the time we want to
use to compute the decumulation time (can be lead_time or valid_dttm),
and the column to group by if any - in this case we want to group by the
members column.
precip <- decum(
precip,
decum_time = "1h",
cols = gridded_data,
time_col = lead_time,
group_col = members
)
We can then plot hourly precipitation using ggplot
and
the geom_georaster()
geom.
ggplot(filter(precip, members == 0)) +
geom_georaster(aes(geofield = gridded_data)) +
geom_polygon(
aes(x, y, group = group),
get_map(dom = precip$gridded_data[[1]]),
fill = NA,
colour = "grey80"
) +
scale_fill_scico(
"mm",
direction = -1,
palette = "oslo",
limits = c(0.125, NA),
na.value = NA
) +
facet_wrap(vars(fct_inorder(format(valid_dttm, "%a %H:%M")))) +
coord_equal(expand = FALSE) +
theme_void() +
theme(
panel.background = element_rect(fill = NA, colour = "grey30"),
plot.margin = unit(c(1, 1, 1, 1), "cm")
)
## Warning: Removed 303268 rows containing missing values (`geom_raster()`).
ggplot(filter(precip, lead_time == 5)) +
geom_georaster(aes(geofield = gridded_data)) +
geom_polygon(
aes(x, y, group = group),
get_map(dom = get_domain(precip$gridded_data)),
fill = NA,
colour = "grey80"
) +
scale_fill_scico(
"mm",
direction = -1,
palette = "oslo",
limits = c(0.125, NA),
na.value = NA
) +
facet_wrap(vars(paste("Member", formatC(members, width = 3, flag = "0")))) +
coord_equal(expand = FALSE) +
theme_void() +
theme(
panel.background = element_rect(fill = NA, colour = "grey30"),
plot.margin = unit(c(1, 1, 1, 1), "cm")
)
## Warning: Removed 350797 rows containing missing values (`geom_raster()`).
read_forecast()
read_forecast()
is used to read in data from multiple
files. The output is given classes depending on whether it is gridded or
point data, and whether it is an ensemble or deterministic forecast.
File templates are used instead of file names with substitution
placeholders for things like year, month, day, hour, ensemble member,
lead time.
template <- "{fcst_model}/{YYYY}/{MM}/{DD}/{HH}/mbr{MBR3}/fc{YYYY}{MM}{DD}{HH}+{LDT3}grib"
t2m <- read_forecast(
dttm = seq_dttm(2021012100, 2021012200, "1d"),
fcst_model = "AAEPS",
parameter = "T2m",
lead_time = seq(0, 24, 3),
members = seq(0, 3),
file_path = here("data/grib"),
file_template = template,
return_data = TRUE
)
t2m
## ::ensemble gridded forecast:: # A tibble: 18 × 14
## fcst_model fcst_dttm lead_time parameter valid_dttm
## <chr> <dttm> <dbl> <chr> <dttm>
## 1 AAEPS 2021-01-21 00:00:00 0 T2m 2021-01-21 00:00:00
## 2 AAEPS 2021-01-21 00:00:00 3 T2m 2021-01-21 03:00:00
## 3 AAEPS 2021-01-21 00:00:00 6 T2m 2021-01-21 06:00:00
## 4 AAEPS 2021-01-21 00:00:00 9 T2m 2021-01-21 09:00:00
## 5 AAEPS 2021-01-21 00:00:00 12 T2m 2021-01-21 12:00:00
## 6 AAEPS 2021-01-21 00:00:00 15 T2m 2021-01-21 15:00:00
## 7 AAEPS 2021-01-21 00:00:00 18 T2m 2021-01-21 18:00:00
## 8 AAEPS 2021-01-21 00:00:00 21 T2m 2021-01-21 21:00:00
## 9 AAEPS 2021-01-21 00:00:00 24 T2m 2021-01-22 00:00:00
## 10 AAEPS 2021-01-22 00:00:00 0 T2m 2021-01-22 00:00:00
## 11 AAEPS 2021-01-22 00:00:00 3 T2m 2021-01-22 03:00:00
## 12 AAEPS 2021-01-22 00:00:00 6 T2m 2021-01-22 06:00:00
## 13 AAEPS 2021-01-22 00:00:00 9 T2m 2021-01-22 09:00:00
## 14 AAEPS 2021-01-22 00:00:00 12 T2m 2021-01-22 12:00:00
## 15 AAEPS 2021-01-22 00:00:00 15 T2m 2021-01-22 15:00:00
## 16 AAEPS 2021-01-22 00:00:00 18 T2m 2021-01-22 18:00:00
## 17 AAEPS 2021-01-22 00:00:00 21 T2m 2021-01-22 21:00:00
## 18 AAEPS 2021-01-22 00:00:00 24 T2m 2021-01-23 00:00:00
## # ℹ 9 more variables: step_range <chr>, level_type <chr>, level <int>,
## # units <chr>, fcst_cycle <chr>, AAEPS_mbr000 <geolist>,
## # AAEPS_mbr001 <geolist>, AAEPS_mbr002 <geolist>, AAEPS_mbr003 <geolist>
class(t2m)
## [1] "harp_ens_grid_df" "harp_grid_df" "harp_df" "tbl_df"
## [5] "tbl" "data.frame"
plot_field(t2m, plot_col = AAEPS_mbr000, fcst_dttm = 2021012100, lead_time = 9)
With read_forecast()
the lags
argument can
be used to read in data from lagged ensembles with the lagged members in
different files. Here the lags are used to construct file names based on
modified analysis date times and lead times resulting from lags.
read_forecast(
dttm = seq_dttm(2019021700, 2019021706, "6h"),
fcst_model = "CMEPS_prod",
parameter = "S10m",
lead_time = seq(0, 12, 3),
members = c(0, 1, 3, 4, 5, 6),
lags = c(0, 0, 2, 2, 1, 1),
file_path = here("data/vfld"),
file_template = "vfld_eps",
return_data = TRUE
)
## ::ensemble point forecast:: # A tibble: 12,030 × 16
## fcst_model fcst_dttm lead_time parameter SID lat lon units
## <chr> <dttm> <dbl> <chr> <int> <dbl> <dbl> <chr>
## 1 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1001 70.9 -8.67 m/s
## 2 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1010 69.3 16.1 m/s
## 3 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1014 69.2 17.9 m/s
## 4 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1015 69.6 17.8 m/s
## 5 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1018 69.2 16.0 m/s
## 6 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1021 70.0 18.7 m/s
## 7 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1022 70 18.7 m/s
## 8 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1023 69.1 18.5 m/s
## 9 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1025 69.7 18.9 m/s
## 10 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1026 69.7 18.9 m/s
## # ℹ 12,020 more rows
## # ℹ 8 more variables: valid_dttm <dttm>, fcst_cycle <chr>,
## # CMEPS_prod_mbr000 <dbl>, CMEPS_prod_mbr001 <dbl>,
## # CMEPS_prod_mbr003_lag2h <dbl>, CMEPS_prod_mbr004_lag2h <dbl>,
## # CMEPS_prod_mbr005_lag1h <dbl>, CMEPS_prod_mbr006_lag1h <dbl>
We can also read in more than one model at a time, but if they have different members and lags we need to use named lists.
read_forecast(
dttm = seq_dttm(2019021700, 2019021706, "6h"),
fcst_model = c("MEPS_prod", "CMEPS_prod"),
parameter = "S10m",
lead_time = seq(0, 12, 3),
members = list(
MEPS_prod = seq(0, 3),
CMEPS_prod = c(0, 3, 5)
),
lags = list(
CMEPS_prod = c(0, 2, 1)
),
file_path = here("data/vfld"),
file_template = "vfld_eps",
return_data = TRUE
)
## • CMEPS_prod
## ::ensemble point forecast:: # A tibble: 12,030 × 13
## fcst_model fcst_dttm lead_time parameter SID lat lon units
## <chr> <dttm> <dbl> <chr> <int> <dbl> <dbl> <chr>
## 1 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1001 70.9 -8.67 m/s
## 2 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1010 69.3 16.1 m/s
## 3 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1014 69.2 17.9 m/s
## 4 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1015 69.6 17.8 m/s
## 5 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1018 69.2 16.0 m/s
## 6 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1021 70.0 18.7 m/s
## 7 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1022 70 18.7 m/s
## 8 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1023 69.1 18.5 m/s
## 9 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1025 69.7 18.9 m/s
## 10 CMEPS_prod 2019-02-17 00:00:00 0 S10m 1026 69.7 18.9 m/s
## # ℹ 12,020 more rows
## # ℹ 5 more variables: valid_dttm <dttm>, fcst_cycle <chr>,
## # CMEPS_prod_mbr000 <dbl>, CMEPS_prod_mbr003_lag2h <dbl>,
## # CMEPS_prod_mbr005_lag1h <dbl>
##
## • MEPS_prod
## ::ensemble point forecast:: # A tibble: 12,030 × 14
## fcst_model fcst_dttm lead_time parameter SID lat lon units
## <chr> <dttm> <dbl> <chr> <int> <dbl> <dbl> <chr>
## 1 MEPS_prod 2019-02-17 00:00:00 0 S10m 1001 70.9 -8.67 m/s
## 2 MEPS_prod 2019-02-17 00:00:00 0 S10m 1010 69.3 16.1 m/s
## 3 MEPS_prod 2019-02-17 00:00:00 0 S10m 1014 69.2 17.9 m/s
## 4 MEPS_prod 2019-02-17 00:00:00 0 S10m 1015 69.6 17.8 m/s
## 5 MEPS_prod 2019-02-17 00:00:00 0 S10m 1018 69.2 16.0 m/s
## 6 MEPS_prod 2019-02-17 00:00:00 0 S10m 1021 70.0 18.7 m/s
## 7 MEPS_prod 2019-02-17 00:00:00 0 S10m 1022 70 18.7 m/s
## 8 MEPS_prod 2019-02-17 00:00:00 0 S10m 1023 69.1 18.5 m/s
## 9 MEPS_prod 2019-02-17 00:00:00 0 S10m 1025 69.7 18.9 m/s
## 10 MEPS_prod 2019-02-17 00:00:00 0 S10m 1026 69.7 18.9 m/s
## # ℹ 12,020 more rows
## # ℹ 6 more variables: valid_dttm <dttm>, fcst_cycle <chr>,
## # MEPS_prod_mbr000 <dbl>, MEPS_prod_mbr001 <dbl>, MEPS_prod_mbr002 <dbl>,
## # MEPS_prod_mbr003 <dbl>