library(harp)
library(here)
library(dplyr)
library(patchwork)

Output interpolated forecasts to sqlite

You simply need to run read_forecast() with output_file_opts = sqlite_opts(path = /path/to/output)

template <- "{fcst_model}/{YYYY}/{MM}/{DD}/{fcst_model}_det_2_5km_{YYYY}{MM}{DD}T{HH}Z.nc"

my_opts  <-  modify_opts(
  netcdf_opts(
    "met_norway_det"
  ), 
  x_dim = "x1", y_dim = "y1"
)

read_forecast(
  dttm                = seq_dttm(2022021200, 2022021300, "1d"),
  fcst_model          = "arome_arctic",
  parameter           = c("RH2m", "T2m"),
  lead_time           = seq(0, 24, 3),
  transformation      = "interpolate", 
  file_path           = here("data/netcdf"),
  file_template       = template,
  file_format_opts    = my_opts,
  output_file_opts    = sqlite_opts(path = file.path(tempdir(), "FCTABLE")) 
)

my_opts <- modify_opts(my_opts, z_var = "pressure")
read_forecast(
  dttm                = seq_dttm(2022021200, 2022021300, "1d"),
  fcst_model          = "arome_arctic",
  parameter           = c("RH", "T"),
  lead_time           = seq(0, 24, 3),
  transformation      = "interpolate", 
  file_path           = here("data/netcdf"),
  file_template       = template,
  file_format_opts    = my_opts,
  vertical_coordinate = "pressure",
  output_file_opts    = sqlite_opts(path = file.path(tempdir(), "FCTABLE")) 
)

dir(file.path(tempdir(), "FCTABLE/arome_arctic/2022/02"))

## [1] "FCTABLE_RH_202202_00.sqlite"   "FCTABLE_RH2m_202202_00.sqlite"
## [3] "FCTABLE_T_202202_00.sqlite"    "FCTABLE_T2m_202202_00.sqlite"

Output point observations to SQLite

You simply need to run read_obs() with output_file_opts = obstable_opts(path = /path/to/output)

read_obs(
  dttm                = seq_dttm(2019021700, 2019021923),
  file_path           = here("data/vobs"),
  output_format_opts  = obstable_opts(
    path = file.path(tempdir(), "OBSTABLE")
  ) 
)

dir(file.path(tempdir(), "OBSTABLE"))

## [1] "OBSTABLE_2019.sqlite"

Read in a forecast sqlite (FCTABLE) files

FCTABLE files are read in with read_point_forecast(). You need to give it times (currently as start_date and end_date soon to be as date_times), what the forecast models are called, whether it is a deterministic (det) or ensemble (eps) forecast, one parameter and a few other things like lead time, stations etc. For all forecast reading functions the default lead times are 0 - 48 every three hours.

Lets read some data - we’re going to read 2m temperature from three different models: “arome_arctic”, “IFSHIRES”, and “MEPS_det” for a period in March 2018.

t2m <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "T2m",  
  file_path  = here("data/FCTABLE")
)

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

t2m

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 105,264 × 11
##    fcst_model  fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>       <dttm>              <dttm>                  <int> <int> <chr>    
##  1 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1002 T2m      
##  3 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1003 T2m      
##  4 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1004 T2m      
##  5 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1006 T2m      
##  6 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1007 T2m      
##  7 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1008 T2m      
##  8 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1009 T2m      
##  9 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1011 T2m      
## # ℹ 105,254 more rows
## # ℹ 5 more variables: z <dbl>, fcst <dbl>, fcst_cycle <chr>,
## #   model_elevation <dbl>, units <chr>
## 
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 79,968 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1002 T2m      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1003 T2m      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1006 T2m      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1007 T2m      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1008 T2m      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1009 T2m      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1011 T2m      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1015 T2m      
## # ℹ 79,958 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 467,976 × 10
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  3 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1014 T2m      
##  4 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1015 T2m      
##  5 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1018 T2m      
##  6 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1023 T2m      
##  7 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1025 T2m      
##  8 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1026 T2m      
##  9 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1027 T2m      
## 10 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1033 T2m      
## # ℹ 467,966 more rows
## # ℹ 4 more variables: z <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>

We can see that we have a list of 3 data frames. If we want to access each data frame individually, we can do so by the name in the list - either via $name or [["name"]].

t2m$IFSHIRES

## ::deterministic point forecast:: # A tibble: 79,968 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1002 T2m      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1003 T2m      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1006 T2m      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1007 T2m      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1008 T2m      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1009 T2m      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1011 T2m      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1015 T2m      
## # ℹ 79,958 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>

t2m[["IFSHIRES"]]

## ::deterministic point forecast:: # A tibble: 79,968 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1002 T2m      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1003 T2m      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1006 T2m      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1007 T2m      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1008 T2m      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1009 T2m      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1011 T2m      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1015 T2m      
## # ℹ 79,958 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>

my_model <- "IFSHIRES"
t2m[[my_model]]

## ::deterministic point forecast:: # A tibble: 79,968 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1002 T2m      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1003 T2m      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1006 T2m      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1007 T2m      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1008 T2m      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1009 T2m      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1011 T2m      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1015 T2m      
## # ℹ 79,958 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>

We can see that each data frame has a different number of rows - this is because all of the models are on different domains so have different stations. For verification we want a fair comparison, so we need to find the common dates and stations. We do this with the common_cases() function.

t2m <- common_cases(t2m)
t2m

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 73,440 × 11
##    fcst_model  fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>       <dttm>              <dttm>                  <int> <int> <chr>    
##  1 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  3 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1015 T2m      
##  4 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1018 T2m      
##  5 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1023 T2m      
##  6 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1025 T2m      
##  7 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1026 T2m      
##  8 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1027 T2m      
##  9 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1033 T2m      
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1035 T2m      
## # ℹ 73,430 more rows
## # ℹ 5 more variables: z <dbl>, fcst <dbl>, fcst_cycle <chr>,
## #   model_elevation <dbl>, units <chr>
## 
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 73,440 × 10
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1015 T2m      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1018 T2m      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1023 T2m      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1025 T2m      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1026 T2m      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1027 T2m      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1033 T2m      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1035 T2m      
## # ℹ 73,430 more rows
## # ℹ 4 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>, z <dbl>
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 73,440 × 10
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  3 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1015 T2m      
##  4 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1018 T2m      
##  5 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1023 T2m      
##  6 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1025 T2m      
##  7 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1026 T2m      
##  8 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1027 T2m      
##  9 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1033 T2m      
## 10 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1035 T2m      
## # ℹ 73,430 more rows
## # ℹ 4 more variables: z <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>

Now we can see that each data frame has the same number of rows.

For precipitation we can use the parameter name AccPcpXh, where X is the precipitation accumulation time in hours. The files contain precipitation since model initialization and by using an Acc parameter name the accumulations are computed for each lead time.

pcp3h <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  lead_time  = seq(0, 24, 3),
  fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "AccPcp3h",  
  file_path  = here("data/FCTABLE")
) %>% 
  common_cases()

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

pcp3h

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 34,560 × 10
##    fcst_model  fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>       <dttm>              <dttm>                  <int> <int> <chr>    
##  1 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1001 Pcp      
##  2 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1010 Pcp      
##  3 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1015 Pcp      
##  4 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1018 Pcp      
##  5 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1023 Pcp      
##  6 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1025 Pcp      
##  7 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1026 Pcp      
##  8 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1027 Pcp      
##  9 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1033 Pcp      
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 03:00:00         3  1035 Pcp      
## # ℹ 34,550 more rows
## # ℹ 4 more variables: fcst <dbl>, fcst_cycle <chr>, model_elevation <dbl>,
## #   units <chr>
## 
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 34,560 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1001 Pcp      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1010 Pcp      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1015 Pcp      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1018 Pcp      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1023 Pcp      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1025 Pcp      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1026 Pcp      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1027 Pcp      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1033 Pcp      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1035 Pcp      
## # ℹ 34,550 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 34,560 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1001 Pcp      
##  2 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1010 Pcp      
##  3 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1015 Pcp      
##  4 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1018 Pcp      
##  5 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1023 Pcp      
##  6 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1025 Pcp      
##  7 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1026 Pcp      
##  8 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1027 Pcp      
##  9 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1033 Pcp      
## 10 MEPS_det   2018-03-08 00:00:00 2018-03-08 03:00:00         3  1035 Pcp      
## # ℹ 34,550 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>

pcp6h <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  lead_time  = seq(0, 24, 3),
  fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "AccPcp6h",  
  file_path  = here("data/FCTABLE")
) %>% 
  common_cases()

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

## Warning: Not enough lead times to compute accumulation - will try to get more data

## Warning: Not enough lead times to compute accumulation - will try to get more data

## Warning: Not enough lead times to compute accumulation - will try to get more data

pcp6h

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 30,240 × 10
##    fcst_model  fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>       <dttm>              <dttm>                  <int> <int> <chr>    
##  1 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1112 Pcp      
##  2 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1105 Pcp      
##  3 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1144 Pcp      
##  4 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1117 Pcp      
##  5 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1088 Pcp      
##  6 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1055 Pcp      
##  7 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1033 Pcp      
##  8 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1102 Pcp      
##  9 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1121 Pcp      
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 06:00:00         6  1107 Pcp      
## # ℹ 30,230 more rows
## # ℹ 4 more variables: fcst <dbl>, fcst_cycle <chr>, model_elevation <dbl>,
## #   units <chr>
## 
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 30,240 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1001 Pcp      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1010 Pcp      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1015 Pcp      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1018 Pcp      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1023 Pcp      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1025 Pcp      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1026 Pcp      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1027 Pcp      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1033 Pcp      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1035 Pcp      
## # ℹ 30,230 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 30,240 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1001 Pcp      
##  2 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1010 Pcp      
##  3 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1015 Pcp      
##  4 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1018 Pcp      
##  5 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1023 Pcp      
##  6 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1025 Pcp      
##  7 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1026 Pcp      
##  8 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1027 Pcp      
##  9 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1033 Pcp      
## 10 MEPS_det   2018-03-08 00:00:00 2018-03-08 06:00:00         6  1035 Pcp      
## # ℹ 30,230 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>

pcp12h <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  lead_time  = seq(0, 24, 3),
  fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "AccPcp12h",  
  file_path  = here("data/FCTABLE")
) %>% 
  common_cases()

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

## Warning: Not enough lead times to compute accumulation - will try to get more data

## Warning: Not enough lead times to compute accumulation - will try to get more data

## Warning: Not enough lead times to compute accumulation - will try to get more data

pcp12h

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 21,600 × 10
##    fcst_model  fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>       <dttm>              <dttm>                  <int> <int> <chr>    
##  1 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1112 Pcp      
##  2 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1105 Pcp      
##  3 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1144 Pcp      
##  4 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1117 Pcp      
##  5 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1088 Pcp      
##  6 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1055 Pcp      
##  7 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1033 Pcp      
##  8 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1102 Pcp      
##  9 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1121 Pcp      
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 12:00:00        12  1107 Pcp      
## # ℹ 21,590 more rows
## # ℹ 4 more variables: fcst <dbl>, fcst_cycle <chr>, model_elevation <dbl>,
## #   units <chr>
## 
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 21,600 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1001 Pcp      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1010 Pcp      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1015 Pcp      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1018 Pcp      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1023 Pcp      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1025 Pcp      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1026 Pcp      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1027 Pcp      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1033 Pcp      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1035 Pcp      
## # ℹ 21,590 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 21,600 × 9
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1001 Pcp      
##  2 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1010 Pcp      
##  3 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1015 Pcp      
##  4 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1018 Pcp      
##  5 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1023 Pcp      
##  6 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1025 Pcp      
##  7 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1026 Pcp      
##  8 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1027 Pcp      
##  9 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1033 Pcp      
## 10 MEPS_det   2018-03-08 00:00:00 2018-03-08 12:00:00        12  1035 Pcp      
## # ℹ 21,590 more rows
## # ℹ 3 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>

It’s a bit advanced how this works but we could plot histograms of the rainfall amounts for the different accumulation periods.

accum_data <- bind(
  list(
    "3h precip"  = bind(pcp3h),
    "6h precip"  = bind(pcp6h),
    "12h precip" = bind(pcp12h)
  ),
  .id = "accum"
)

ggplot(accum_data, aes(x = fcst, fill = accum)) +
  geom_histogram(bins = 50) +
  facet_grid(cols = vars(forcats::fct_inorder(accum)), rows = vars(fcst_model)) +
  scale_y_log10() + 
  theme(legend.position = "none")

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 260 rows containing missing values (`geom_bar()`).

It clearly shows a problem with the IFS data - if we look closer, we will see that the units of IFSHIRES are in Mg/m² whereas all other data are in kg/m². We can fix this with the scale_param() function.

pcp3h$IFSHIRES <- scale_param(
  pcp3h$IFSHIRES, scaling = 1000, new_units = "kg/m^2", mult = TRUE
)
# Since the arguments are all in order, we don't need to name them
pcp6h$IFSHIRES  <- scale_param(pcp6h$IFSHIRES, 1000, "kg/m^2", TRUE)
pcp12h$IFSHIRES <- scale_param(pcp12h$IFSHIRES, 1000, "kg/m^2", TRUE)

accum_data <- bind(
  list(
    "3h precip"  = bind(pcp3h),
    "6h precip"  = bind(pcp6h),
    "12h precip" = bind(pcp12h)
  ),
  .id = "accum"
)

ggplot(accum_data, aes(x = fcst, fill = accum)) +
  geom_histogram(bins = 50) +
  facet_grid(cols = vars(forcats::fct_inorder(accum)), rows = vars(fcst_model)) +
  scale_y_log10() + 
  theme(legend.position = "none")

## Warning: Transformation introduced infinite values in continuous y-axis

## Warning: Removed 197 rows containing missing values (`geom_bar()`).

Read in observations SQLite (OBSTABLE) files

For reading point observations we use the read_point_obs() function. This requires a start time, end time (to be replaced by a sequence of date times), the parameter and the path to the files. Also the stations can be specified.

For verification we only want to read in the data that are common to forecast data and we have a few helper functions to get that information from the forecast data - unique_valid_dttm() and unique_stations() among others.

t2m_obs <- read_point_obs(
  dttm       = unique_valid_dttm(t2m), 
  parameter  = "T2m",
  stations   = unique_stations(t2m),
  obs_path   = here("data/OBSTABLE") 
)

t2m_obs

## # A tibble: 34,731 × 7
##    valid_dttm            SID   lon   lat  elev   T2m units
##    <dttm>              <int> <dbl> <dbl> <dbl> <dbl> <chr>
##  1 2018-03-08 00:00:00  1001 -8.67  70.9     9  272. K    
##  2 2018-03-08 00:00:00  1010 16.1   69.3    13  266. K    
##  3 2018-03-08 00:00:00  1015 17.8   69.6    14  264. K    
##  4 2018-03-08 00:00:00  1018 16.0   69.2   436  262  K    
##  5 2018-03-08 00:00:00  1023 18.5   69.1    77  255  K    
##  6 2018-03-08 00:00:00  1025 18.9   69.7     9  263. K    
##  7 2018-03-08 00:00:00  1026 18.9   69.7   115  262. K    
##  8 2018-03-08 00:00:00  1027 18.9   69.7    20  263. K    
##  9 2018-03-08 00:00:00  1033 19.5   70.2    24  266  K    
## 10 2018-03-08 00:00:00  1035 20.1   69.6   710  256. K    
## # ℹ 34,721 more rows

So now we have our observations, how do we use them for verification? The first thing we need to do is find all of the forecast - observation pairs. We can do that with the join_to_fcst() function, which will do an “inner join” between the forecast and the observations

t2m <- join_to_fcst(t2m, t2m_obs)

## Joining, by = c("valid_dttm", "SID", "units")
## Joining, by = c("valid_dttm", "SID", "units")
## Joining, by = c("valid_dttm", "SID", "units")

NOTE - This section about scatter plots still works, but the function has been deprecated in favour of producing the data for scatter plots at verification time.

So now we can see that a T2m column has been added to the data. We now have our first opportunity for some sort of verification the scatter (or more accurately hexbin) plot. For making a scatter plot we can use the plot_scatter() function. It takes the data, the fcst_model we want a scatter plot for and the name of the observation column.

plot_scatter(t2m, IFSHIRES, T2m)

## Warning: `plot_scatter()` was deprecated in harpVis 0.1.0.
## ℹ Please use `plot_point_verif()` instead.
## ℹ Data for scatter plots are now aggregated at verfication time and can be
##   plotted with plot_point_verif(..., score = hexbin, ...)
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## Warning: Unknown or uninitialised column: `member`.

Unfortunately plot_scatter doesn’t have a faceting functionality (yet), but we can plot all of the models together using the patchwork package - we just need to add the plots together! (Make sure you have done library(patchwork) somewhere before trying this). See here for more info about makin plot layouts.

plot_scatter(t2m, IFSHIRES, T2m) + labs(title = "IFSHIRES") +
  plot_scatter(t2m, arome_arctic, T2m) + labs(title = "arome_arctic") +
  plot_scatter(t2m, MEPS_det, T2m) + labs(title = "MEPS_det") +   plot_layout(nrow = 2)

## Warning: Unknown or uninitialised column: `member`.
## Unknown or uninitialised column: `member`.
## Unknown or uninitialised column: `member`.

Note that each plot uses its own scale, so we need to add a scale that works for each plot. Now it’s easier to write quick a function. Note that the fcst_model and parameter arguments to plot_scatter() are not in quotes, so to make sure that the function evaluates those variables we need to wrap them in {{ }}

plot_fun <- function(fcst_model, .fcst, param) {
  plot_scatter(.fcst, {{fcst_model}}, {{param}}) + 
    labs(title = fcst_model) +
    scale_fill_viridis_c(limits = c(0, 2500), option = "C")
}

We can then loop over the plots and add them together.

for (fcst_model in names(t2m)) {
  if (fcst_model == names(t2m)[1]) {
    my_plot <- plot_fun({{fcst_model}}, t2m, T2m)
  } else {
    my_plot <- my_plot + plot_fun({{fcst_model}}, t2m, T2m)
  }
}

my_plot + plot_layout(nrow = 2, guides = "collect")

A much less cumbersome way is to use functional programming - we won’t go into this here, but R is very suited to functional programming so it’s worth learning.

Reduce(`+`, lapply(names(t2m), plot_fun, t2m, T2m)) +
  plot_layout(nrow = 2, guides = "collect")

Now we have scatter plots we can go on the compute some verification scores. For deterministic forecasts this is done with the det_verify() function. You simply give it the data and the name of the column that holds the observations.

verif <- det_verify(t2m, T2m)

## ::Computing verification for fcst_model `arome_arctic`::

## Det summary for lead_time✔

## Hexbin for lead_time✔

## ::Computing verification for fcst_model `IFSHIRES`::

## Det summary for lead_time✔

## Hexbin for lead_time✔

## ::Computing verification for fcst_model `MEPS_det`::

## Det summary for lead_time✔

## Hexbin for lead_time✔

verif

## ::det_summary_scores:: # A tibble: 51 × 9
##    fcst_model   lead_time num_cases num_stations    bias  rmse   mae  stde
##    <chr>            <int>     <int>        <int>   <dbl> <dbl> <dbl> <dbl>
##  1 arome_arctic         0      4081          180  0.725   3.04  2.11  2.95
##  2 arome_arctic         3      3935          175  0.875   3.24  2.25  3.12
##  3 arome_arctic         6      4244          180  0.636   3.06  2.11  2.99
##  4 arome_arctic         9      4136          175  0.128   2.09  1.44  2.09
##  5 arome_arctic        12      4254          180 -0.0507  1.59  1.14  1.59
##  6 arome_arctic        15      4136          175 -0.128   1.57  1.14  1.56
##  7 arome_arctic        18      4244          180 -0.0687  2.49  1.84  2.49
##  8 arome_arctic        21      4132          175  0.305   3.29  2.40  3.28
##  9 arome_arctic        24      4079          180  0.679   3.81  2.74  3.75
## 10 arome_arctic        27      3933          175  1.02    4.23  2.99  4.10
## # ℹ 41 more rows
## # ℹ 1 more variable: hexbin <list>
## 
## --harp verification for T2m--
##  # for forecasts from 00:00 UTC 08 Mar 2018 to 00:00 UTC 31 Mar 2018
##  # using 180 observation stations
##  # for verification groups: 
##     -> lead_time

## ℹ use `attributes()` to see detailed metadata

By default the summary scores bias, RMSE, MAE and STDE are computed for each lead time.

We can then plot the scores using plot_point_verif(), which needs the verification data and the score to plot.

plot_point_verif(verif, bias)

The default behaviour is to plot the number of cases as well, but that can easily be switched off.

plot_point_verif(verif, rmse, plot_num_cases = FALSE)

You can also change the colours by passing a data frame with colours - now fcst_model has become mname (a relic from the past that will be brought up to date)

my_colours <- data.frame(
  mname = names(t2m),
  colour = c("red", "yellow", "blue")
)
plot_point_verif(verif, stde, colour_table = my_colours)

Or you can add colours as a manual scale using a named vector - only works if plot_num_cases = FALSE.

plot_point_verif(verif, mae, plot_num_cases = FALSE) + 
  scale_colour_manual(values = c(
    arome_arctic = "red", 
    IFSHIRES     = "yellow", 
    MEPS_det     = "blue"
  ))

## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

We don’t just get summary scores. If we give the det_verify() function some thresholds, we also get a whole bunch of categorical scores. But maybe we want to convert temperature to degrees C first. If we had done this before joining using scale_param() on both the forecasts and the observations separately it would be more straightforward, but there is a way around it - scale_param() will scale the forecast columns automatically, but we have to the observations column manually:

t2m <- mutate(
  scale_param(t2m, -273.15, "degC"),
  T2m = T2m - 273.15
)

Now we can verify with thresholds in degrees C.

verif <- det_verify(t2m, T2m, thresholds = seq(-25, 5, 5))

## ::Computing verification for fcst_model `arome_arctic`::

## Det summary for lead_time✔

## Hexbin for lead_time✔

## 
Det categorical for lead_time & threshold ■■■■■■■■■■■■■                     40%…

Det categorical for lead_time & threshold ■■■■■■■■■■■■■■                    43%…

Det categorical for lead_time & threshold ■■■■■■■■■■■■■■■■■■■■■■■■          78%…

Det categorical for lead_time & threshold ■■■■■■■■■■■■■■■■■■■■■■■■■■■       87%…

Det categorical for lead_time & threshold ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■    96%…

                                                                                 
Det categorical for lead_time & threshold✔
## ::Computing verification for fcst_model `IFSHIRES`::
## Det summary for lead_time✔
## Hexbin for lead_time✔
## 
Det categorical for lead_time & threshold ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     94%…

                                                                                 
Det categorical for lead_time & threshold✔
## ::Computing verification for fcst_model `MEPS_det`::
## Det summary for lead_time✔
## Hexbin for lead_time✔
## 
Det categorical for lead_time & threshold ■■■■■■■■■■■■■■■                   48%…

Det categorical for lead_time & threshold ■■■■■■■■■■■■■■■■■■■■■■■■■■■■      91%…

Det categorical for lead_time & threshold ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■    98%…

                                                                                 
Det categorical for lead_time & threshold✔

verif

## ::det_summary_scores:: # A tibble: 51 × 9
##    fcst_model   lead_time num_cases num_stations    bias  rmse   mae  stde
##    <chr>            <int>     <int>        <int>   <dbl> <dbl> <dbl> <dbl>
##  1 arome_arctic         0      4081          180  0.725   3.04  2.11  2.95
##  2 arome_arctic         3      3935          175  0.875   3.24  2.25  3.12
##  3 arome_arctic         6      4244          180  0.636   3.06  2.11  2.99
##  4 arome_arctic         9      4136          175  0.128   2.09  1.44  2.09
##  5 arome_arctic        12      4254          180 -0.0507  1.59  1.14  1.59
##  6 arome_arctic        15      4136          175 -0.128   1.57  1.14  1.56
##  7 arome_arctic        18      4244          180 -0.0687  2.49  1.84  2.49
##  8 arome_arctic        21      4132          175  0.305   3.29  2.40  3.28
##  9 arome_arctic        24      4079          180  0.679   3.81  2.74  3.75
## 10 arome_arctic        27      3933          175  1.02    4.23  2.99  4.10
## # ℹ 41 more rows
## # ℹ 1 more variable: hexbin <list>
## 
## ::det_threshold_scores:: # A tibble: 357 × 41
##    fcst_model   lead_time threshold num_stations num_cases_for_threshold_total
##    <chr>            <int>     <dbl>        <int>                         <int>
##  1 arome_arctic         0       -25          180                          4045
##  2 arome_arctic         0       -20          180                          3962
##  3 arome_arctic         0       -15          180                          3593
##  4 arome_arctic         0       -10          180                          2657
##  5 arome_arctic         0        -5          180                          1339
##  6 arome_arctic         0         0          180                           266
##  7 arome_arctic         0         5          180                             2
##  8 arome_arctic         3       -25          175                          3893
##  9 arome_arctic         3       -20          175                          3791
## 10 arome_arctic         3       -15          175                          3358
## # ℹ 347 more rows
## # ℹ 36 more variables: num_cases_for_threshold_observed <dbl>,
## #   num_cases_for_threshold_forecast <dbl>, cont_tab <list>,
## #   threat_score <dbl>, hit_rate <dbl>, miss_rate <dbl>,
## #   false_alarm_rate <dbl>, false_alarm_ratio <dbl>, heidke_skill_score <dbl>,
## #   pierce_skill_score <dbl>, kuiper_skill_score <dbl>, percent_correct <dbl>,
## #   frequency_bias <dbl>, equitable_threat_score <dbl>, odds_ratio <dbl>, …
## 
## --harp verification for T2m--
##  # for forecasts from 00:00 UTC 08 Mar 2018 to 00:00 UTC 31 Mar 2018
##  # using 180 observation stations
##  # for verification groups: 
##     -> lead_time & threshold

## ℹ use `attributes()` to see detailed metadata

Now we can plot a cateogorical score

plot_point_verif(verif, equitable_threat_score)

Yuck! What’s that? It’s basically plotted all of the thresholds together, so it’s suffering from over plotting. The simplest thing to do would be to just filter the data - we can do that using the filter_by argument and wrapping the conditional expression in vars

plot_point_verif(
  verif, 
  hit_rate, 
  filter_by = vars(threshold == -10)
)

Or we can facet by the thresholds by using the facet_by argument and wrapping the column name in vars

plot_point_verif(
  verif, 
  hit_rate, 
  facet_by = vars(threshold)
)

## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.

You can also plot as a function of threshold instead of lead time

plot_point_verif(
  verif, 
  frequency_bias, 
  x_axis    = threshold,
  filter_by = vars(lead_time %in% seq(12, 48, 12)), 
  facet_by  = vars(lead_time)
)

## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.

Once you’ve completed your verification you can save it - here I’m saving it to a temporary directory that will be removed at the end of the R session.

save_point_verif(verif, tempdir())

## Saving point verification scores to: 
## /tmp/Rtmpn8KNSm/harpPointVerif.harp.T2m.harp.20180308-20180331.harp.arome_arctic.model.IFSHIRES.model.MEPS_det.rds

You can then open the verification in an app in your browser using shiny_plot_point_verif() giving it the path to your data as the only argument

shiny_plot_point_verif(tempdir())

Manipulating `harp_list` objects

In general we don’t have to access individual data frames, as most dplyr verbs work on harp_fcst objects. The exception is group_by() %>% summarize().

We may want to get which stations were available for each forecast model before selecting the common cases. Here we’d have to wrap the group_by in an lapply so that we apply it to each element of the list. So start by making a function…

t2m <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "T2m",  
  file_path  = here("data/FCTABLE"), 
  get_lat_and_lon = TRUE
)

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

group_fun <- function(df) {
  group_by(df, SID) %>% 
    summarize(
      lat = mean(lat), 
      lon = mean(lon),
      count = n()
    )
}

station_count <- lapply(t2m, group_fun)

As our data frames are quite simple we could join them into a single data frame using bind_rows

station_count <- bind_rows(station_count, .id = "fcst_model")

And now we can plot

ggplot(station_count, aes(lon, lat, colour = fcst_model)) +
  geom_polygon(
    aes(x = long, group = group), 
    map_data("world"), 
    fill = NA, 
    colour = "grey50"
  ) +
  geom_point() +
  coord_equal(
    xlim = range(station_count$lon),
    ylim = range(station_count$lat),
  ) + 
  facet_wrap(vars(fcst_model), ncol = 2)

Deterministic Verification

Andrew Singleton

17/02/2022

Output interpolated forecasts to sqlite

Output point observations to SQLite

Read in a forecast sqlite (FCTABLE) files

Read in observations SQLite (OBSTABLE) files

Manipulating `harp_list` objects

Deterministic Verification

Andrew Singleton

17/02/2022

Output interpolated forecasts to sqlite

Output point observations to SQLite

Read in a forecast sqlite (FCTABLE) files

Read in observations SQLite (OBSTABLE) files

Manipulating harp_list objects

Manipulating `harp_list` objects