# Attach libraries
# Read Forecasts
# Scale
# Select Common cases
# Read observations
# Join
# Observation errors
# Verify
# Save
Build a Point Verification Script
In this tutorial we are going to build a basic script that could be used to run verification tasks in a production environment.
Basic Skeleton
We will start our script from a skeleton of comments that describe the basic point verification workflow, and then populate it with the variable assignments and functions. First this is the basic workflow.
We can now add the function calls for each section
# Attach libraries
library(harp)
library(here)
# Read Forecasts
<- read_point_forecast(
fcst dttm = seq_dttm(...),
fcst_model = c("..."),
fcst_type = "det",
parameter = "...",
lead_time = seq(...),
file_path = "..."
)
# Scale
<- scale_param(fcst, ..., "...")
fcst
# Select Common cases
<- common_cases(fcst)
fcst
# Read observations
<- read_point_obs(
obs dttm = unique_valid_dttm(fcst),
parameter = "...",
stations = unique_stations(fcst),
obs_path = "...",
min_allowed = ...,
max_allowed = ...
)
# Scale
<- scale_param(obs, ..., "...", col = ...)
obs
# Join
<- join_to_fcst(fcst, obs)
fcst
# Observation errors
<- check_obs_against_fcst(fcst, ..., num_sd_allowed = ...)
fcst
# Verify
<- det_verify(fcst, ..., thresholds = ..., groupings = c("..."))
verif
# Save
save_point_verif(verif, "...")
Everything we have marked with ...
is essentially a variable that we can set at the beginning of the script. So we can assign those variables and test the first iteration of the script.
# Attach libraries
library(harp)
library(here)
# Paths
<- here("data", "FCTABLE")
fcst_dir <- here("data", "OBSTABLE")
obs_dir <- here("data", "verification")
verif_dir
# Parameters
<- "T2m"
prm
# Forecast variables
<- seq_dttm(2023080100, 2023083118, "6h")
date_times <- "arome_arctic"
fcst_models <- seq(0, 24, 3)
lt <- -273.15
fc_scaling <- "degC"
fc_units
# Obs variables
<- -273.15
obs_scaling <- "degC"
obs_units <- 223
min_obs <- 333
max_obs <- 4
error_sd
# Verif variables
<- seq(-5, 25, 5)
thresh <- list(
grps "lead_time",
c("lead_time", "fcst_cycle")
)
# Read Forecasts
<- read_point_forecast(
fcst dttm = date_times,
fcst_model = fcst_models,
fcst_type = "det",
parameter = prm,
lead_time = lt,
file_path = fcst_dir
)
# Scale
<- scale_param(fcst, fc_scaling, fc_units)
fcst
# Select Common cases
<- common_cases(fcst)
fcst
# Read observations
<- read_point_obs(
obs dttm = unique_valid_dttm(fcst),
parameter = prm,
stations = unique_stations(fcst),
obs_path = obs_dir,
min_allowed = min_obs,
max_allowed = max_obs
)
# Scale
<- scale_param(obs, obs_scaling, obs_units, col = prm) obs
Error in `[[<-`:
! Assigned data `op(x[[col_name]], scaling)` must be compatible with
existing data.
✖ Existing data has 48353 rows.
✖ Assigned data has 0 rows.
ℹ Only vectors of size 1 are recycled.
Caused by error in `vectbl_recycle_rhs_rows()`:
! Can't recycle input of size 0 to size 48353.
# Join
<- join_to_fcst(fcst, obs) fcst
Error: Join will not be done due to units incompatibility. You can force the join by setting force = TRUE
OR, units imcompatibility can be fixed with the set_units(), or scale_param() functions.
# Observation errors
<- check_obs_against_fcst(fcst, prm, num_sd_allowed = error_sd) fcst
Error in `check_obs_against_fcst()`:
✖ `parameter = prm` not found in data.
ℹ `.fcst` does not include column `prm`.
# Verify
<- det_verify(fcst, prm, thresholds = thresh, groupings = grps) verif
Error: No column found for prm
# Save
save_point_verif(verif, verif_dir)
Error in eval(expr, envir, enclos): object 'verif' not found
We have now run into our first problem as an error has occurred with scale_param()
for the observations data. The error message isn’t that instructive (unfortunately). It is caused because many harp functions use Non Standard Evaluation (NSE).
Non Standard Evaluation
harp takes a lot of inspiration from the tidyverse - a collection of R packages designed to provide a consistent way of working with data in R. Some of the functions that we have already seen, such as mutate()
, are part of the tidyverse. These functions are designed to make working interactively smooth, and one aspect of this is passing arguments that are unquoted. This applies to the names of columns in data frames and harp follows suit. While working interactively is easier and more intuitive with NSE, it comes at the expense of making programming a little bit trickier.
However, the solution is straightforward. When passing the name of a column to a function as a variable rather than the name itself it needs to be embraced. That is to say wrapped in double curly braces {{ }}
. In the case of harp functions, this is often where the parameter name is passed to a function to identify the column that contains observations. We can now modify our basic script to embrace arguments where appropriate.
# Attach libraries
library(harp)
library(here)
# Paths
<- here("data", "FCTABLE")
fcst_dir <- here("data", "OBSTABLE")
obs_dir <- here("data", "verification")
verif_dir
# Parameters
<- "T2m"
prm
# Forecast variables
<- seq_dttm(2023080100, 2023083118, "6h")
date_times <- "arome_arctic"
fcst_models <- seq(0, 24, 3)
lt <- -273.15
fc_scaling <- "degC"
fc_units
# Obs variables
<- -273.15
obs_scaling <- "degC"
obs_units <- 223
min_obs <- 333
max_obs <- 4
error_sd
# Verif variables
<- seq(-5, 25, 2.5)
thresh <- list(
grps "lead_time",
c("lead_time", "fcst_cycle")
)
# Read Forecasts
<- read_point_forecast(
fcst dttm = date_times,
fcst_model = fcst_models,
fcst_type = "det",
parameter = prm,
lead_time = lt,
file_path = fcst_dir
)
# Scale
<- scale_param(fcst, fc_scaling, fc_units)
fcst
# Select Common cases
<- common_cases(fcst)
fcst
# Read observations
<- read_point_obs(
obs dttm = unique_valid_dttm(fcst),
parameter = prm,
stations = unique_stations(fcst),
obs_path = obs_dir,
min_allowed = min_obs,
max_allowed = max_obs
)
# Scale
<- scale_param(obs, obs_scaling, obs_units, col = {{prm}})
obs
# Join
<- join_to_fcst(fcst, obs)
fcst
# Observation errors
<- check_obs_against_fcst(fcst, {{prm}}, num_sd_allowed = error_sd)
fcst
# Verify
<- det_verify(fcst, {{prm}}, thresholds = thresh, groupings = grps)
verif
# Save
save_point_verif(verif, verif_dir)
We now have the beginnings of a verification script. But what if we want to do verification for different parameters. One approach would be for the user to simply change the variable assignments at the top of the script for each use. However, this still requires a lot of manual work. A more efficient approach would be to set up the script to handle multiple parameters.
Script for multiple parameters
When dealing with multiple parameters, each parameter may require different scalings, different thresholds, different error settings, different parameter names for forecasts and observations, or possibly different groupings. This will require a little more setting up. One approach may be to set some defaults and then some specific settings for different parameters. Lists in R provide an excellent mechanism for doing this.
Let’s now make the parameter part of our script more descriptive for multiple parameters.
# Attach libraries
library(harp)
library(here)
# Date times
<- 2023080100
start_dttm <- 2023083118
end_dttm <- "6h"
dttm_step
# Forecast variables
<- seq_dttm(start_dttm, end_dttm, dttm_step)
date_times <- "arome_arctic"
fcst_models <- seq(0, 24, 3)
lt
# Paths
<- here("data", "FCTABLE")
fcst_dir <- here("data", "OBSTABLE")
obs_dir <- here("data", "verification")
verif_dir
<- list(
defaults grps = list(
"lead_time",
c("lead_time", "fcst_cycle")
),error_sd = 6
)
# Parameters
<- list(
params
T2m = list(
fc_scaling = list(
scaling = -273.15,
new_units = "degC"
), obs_scaling = list(
scaling = -273.15,
new_units = "degC"
),min_obs = 223,
max_obs = 333,
error_sd = 4,
thresh = seq(-5, 25, 5)
),
S10m = list(
fc_param = "ws10m",
min_obs = 0,
max_obs = 100,
thresh = c(1, 2, 3, 5, 7.5, 10, 15, 20, 25)
)
)
# Loop over parameters
for (prm in names(params)) {
<- params[[prm]]$fc_param
fc_prm if (is.null(fc_prm)) {
<- prm
fc_prm
}
# Read Forecasts
<- read_point_forecast(
fcst dttm = date_times,
fcst_model = fcst_models,
fcst_type = "det",
parameter = fc_prm,
lead_time = lt,
file_path = fcst_dir
)
# Scale
if (!is.null(params[[prm]]$fc_scaling)) {
<- do.call(
fcst
scale_param, c(list(x = fcst), params[[prm]]$fc_scaling)
)
}
# Select Common cases
<- common_cases(fcst)
fcst
# Read observations
<- params[[prm]]$obs_param
obs_prm if (is.null(obs_prm)) {
<- prm
obs_prm
}
<- read_point_obs(
obs dttm = unique_valid_dttm(fcst),
parameter = obs_prm,
stations = unique_stations(fcst),
obs_path = obs_dir,
min_allowed = params[[prm]]$min_obs,
max_allowed = params[[prm]]$max_obs
)
# Scale
if (!is.null(params[[prm]]$obs_scaling)) {
<- do.call(
obs
scale_param, c(
list(x = obs),
$obs_scaling,
params[[prm]]list(col = {{obs_prm}})
)
)
}
# Join
<- join_to_fcst(fcst, obs)
fcst
# Observation errors
<- params[[prm]]$error_sd
error_sd if (is.null(error_sd)) {
<- defaults$error_sd
error_sd
}
<- check_obs_against_fcst(
fcst
fcst,
{{obs_prm}}, num_sd_allowed = error_sd
)
# Verify
<- params[[prm]]$grps
grps if (is.null(grps)) {
<- defaults$grps
grps
}
<- params[[prm]]$thresh
thresh
<- det_verify(
verif
fcst,
{{obs_prm}}, thresholds = thresh,
groupings = grps
)
# Save
save_point_verif(verif, verif_dir)
}
Function instead of loop
R is designed to be a functional programming language. One of the advantages of this is that a function can be called repeatedly with different arguments making for (hopefully!) more structured and repeatable programs. If a function fails, it is also much easier to move onto the next iteration.
Let’s rewrite the contents of our loop as a function that takes some arguments.
<- function(
run_verif
param_list,
param_name,
fc_models,
dttm,
ld_times,
fc_data_dir,
obs_data_dir,
dflts,
vrf_data_dir
) {
<- param_list$fc_param
fc_prm if (is.null(fc_prm)) {
<- param_name
fc_prm
}
# Read Forecasts
<- read_point_forecast(
fcst dttm = dttm,
fcst_model = fc_models,
fcst_type = "det",
parameter = fc_prm,
lead_time = ld_times,
file_path = fc_data_dir
)
# Scale
if (!is.null(param_list$fc_scaling)) {
<- do.call(
fcst
scale_param, c(list(x = fcst), param_list$fc_scaling)
)
}
# Select Common cases
<- common_cases(fcst)
fcst
# Read observations
<- param_list$obs_param
obs_prm if (is.null(obs_prm)) {
<- param_name
obs_prm
}
<- read_point_obs(
obs dttm = unique_valid_dttm(fcst),
parameter = obs_prm,
stations = unique_stations(fcst),
obs_path = obs_dir,
min_allowed = param_list$min_obs,
max_allowed = param_list$max_obs
)
# Scale
if (!is.null(param_list$obs_scaling)) {
<- do.call(
obs
scale_param, c(
list(x = obs),
$obs_scaling,
param_listlist(col = {{obs_prm}})
)
)
}
# Join
<- join_to_fcst(fcst, obs)
fcst
# Observation errors
<- param_list$error_sd
error_sd if (is.null(error_sd)) {
<- dflts$error_sd
error_sd
}
<- check_obs_against_fcst(
fcst
fcst,
{{obs_prm}}, num_sd_allowed = error_sd
)
# Verify
<- param_list$grps
grps if (is.null(grps)) {
<- dflts$grps
grps
}
<- param_list$thresh
thresh
<- det_verify(
verif
fcst,
{{obs_prm}}, thresholds = thresh,
groupings = grps
)
# Save
save_point_verif(verif, vrf_data_dir)
}
Now instead of using a for loop we can use iwalk()
from the purrr package. iwalk()
calls a function for each element in the list that it is given. The first argument to that function is the list element and the second argument is the element name. iwalk()
only calls the function for its side effects (e.g. writing a file) and returns the input untouched. Here we will call an anonymous function that calls run_verif()
.
library(purrr)
iwalk(
params,run_verif(
\(x, y) param_list = x,
param_name = y,
fc_models = fcst_models,
dttm = date_times,
ld_times = lt,
fc_data_dir = fcst_dir,
obs_data_dir = obs_dir,
dflts = defaults,
vrf_data_dir = verif_dir
) )
Using this approach, the run_verif()
function can be modified to do any manipulations of the data that the user can think of, or using lists in combination with defaults can allow the user to provide specific information without having to edit the run_verif()
function.
Use of harp Point Verification in Production
The following presentations were made during this part of the course.
Use of harp in ACCORD
Use of harp in UWC-West
In the next tutorial we will go through the workflow to verify data on regular grids.