library(harp)
library(here)
library(dplyr)
library(forcats)

What are verifcation groups?

So far, all of the verification has been done by grouping the data according to lead time and, for categorical scores, according to threshold. However the verification functions allow you specify any grouping variable, as long as it is a column in the forecast data frames. A simple option would be to verify grouped by valid time - we can use the validdate column in the data.

These examples use deterministic data, but the exact same will apply for ensemble verification. The below uses pipes to read and join the data in one go. The . is data from the left hand side of the pipe.

t2m <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "T2m",  
  file_path  = here("data/FCTABLE")
) %>% 
  common_cases() %>% 
  join_to_fcst(
    read_point_obs(
      unique_valid_dttm(.),
      parameter = "T2m",
      stations  = unique_stations(.),
      obs_path  = here("data/OBSTABLE")
    )
  )

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

We now have data that’s ready to verify - we can specify that we want to group the verfication by the validdate using the groupings argument. Unlike other arguments that take column names it must be quoted.

verif <- det_verify(t2m , T2m, groupings = "valid_dttm")

## ::Computing verification for fcst_model `arome_arctic`::

## Det summary for valid_dttm✔

## Hexbin for valid_dttm✔

## ::Computing verification for fcst_model `IFSHIRES`::

## Det summary for valid_dttm✔

## Hexbin for valid_dttm✔

## ::Computing verification for fcst_model `MEPS_det`::

## Det summary for valid_dttm✔

## Hexbin for valid_dttm✔

verif

## ::det_summary_scores:: # A tibble: 597 × 9
##    fcst_model   valid_dttm          num_cases num_stations    bias  rmse   mae
##    <chr>        <dttm>                  <int>        <int>   <dbl> <dbl> <dbl>
##  1 arome_arctic 2018-03-08 00:00:00       178          178  0.503   2.44  1.76
##  2 arome_arctic 2018-03-08 03:00:00       173          173  0.184   2.65  1.96
##  3 arome_arctic 2018-03-08 06:00:00       178          178  0.0543  2.83  2.15
##  4 arome_arctic 2018-03-08 09:00:00       173          173  0.0603  2.04  1.49
##  5 arome_arctic 2018-03-08 12:00:00       178          178  0.346   1.96  1.40
##  6 arome_arctic 2018-03-08 15:00:00       172          172 -0.162   1.77  1.35
##  7 arome_arctic 2018-03-08 18:00:00       175          175 -0.756   2.74  2.14
##  8 arome_arctic 2018-03-08 21:00:00       172          172 -1.15    3.32  2.61
##  9 arome_arctic 2018-03-09 00:00:00       354          177 -0.917   3.14  2.41
## 10 arome_arctic 2018-03-09 03:00:00       344          172 -0.916   3.30  2.53
## # ℹ 587 more rows
## # ℹ 2 more variables: stde <dbl>, hexbin <list>
## 
## --harp verification for T2m--
##  # for forecasts from 00:00 UTC 08 Mar 2018 to 00:00 UTC 31 Mar 2018
##  # using 180 observation stations
##  # for verification groups: 
##     -> valid_dttm

## ℹ use `attributes()` to see detailed metadata

We can now plot using plot_point_verif(), but we need to tell it that the x-axis is now valid_dttm

plot_point_verif(verif, mae, x_axis = valid_dttm)
plot_point_verif(verif, mae, x_axis = valid_dttm, point_size = 0)

Another common grouping is by station characteristic - e.g. coastal, mountain etc. There is a station_groups data frame built in to harp that groups the stations in some way or other by the station ID.

station_groups

## # A tibble: 5,568 × 2
##      SID station_group    
##    <int> <chr>            
##  1  2029 Mountain Sweden  
##  2  2102 Mountain Sweden  
##  3  2209 Mountain Sweden  
##  4  2307 Mountain Sweden  
##  5  2013 Mountain Sweden 2
##  6  2024 Mountain Sweden 2
##  7  2028 Mountain Sweden 2
##  8  2029 Mountain Sweden 2
##  9  2036 Mountain Sweden 2
## 10  2037 Mountain Sweden 2
## # ℹ 5,558 more rows

So we can join that to our forecast data using join_to_fcst(). Note that join_to_fcst() is for joining observations to forecasts so it checks units and will initially fail….

join_to_fcst(t2m, station_groups)

## Warning: .join does not have a units column.

## Error: Join will not be done due to units incompatibility. You can force the join by setting force = TRUE
## OR, units imcompatibility can be fixed with the set_units(), or scale_param() functions.

But we can join anything if we set force = TRUE

t2m <- join_to_fcst(t2m, station_groups, force = TRUE)
t2m

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 250,703 × 16
##    fcst_model  fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>       <dttm>              <dttm>                  <int> <int> <chr>    
##  1 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  3 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  4 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  5 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  6 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  7 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  8 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  9 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
## # ℹ 250,693 more rows
## # ℹ 10 more variables: z <dbl>, fcst <dbl>, fcst_cycle <chr>,
## #   model_elevation <dbl>, units <chr>, lon <dbl>, lat <dbl>, elev <dbl>,
## #   T2m <dbl>, station_group <chr>
## 
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 250,703 × 15
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  3 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  4 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  5 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  6 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  7 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  8 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  9 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
## 10 IFSHIRES   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
## # ℹ 250,693 more rows
## # ℹ 9 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>, z <dbl>,
## #   lon <dbl>, lat <dbl>, elev <dbl>, T2m <dbl>, station_group <chr>
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 250,703 × 15
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  2 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  3 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  4 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T2m      
##  5 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  6 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  7 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  8 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
##  9 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
## 10 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1010 T2m      
## # ℹ 250,693 more rows
## # ℹ 9 more variables: z <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>,
## #   lon <dbl>, lat <dbl>, elev <dbl>, T2m <dbl>, station_group <chr>

Now we can verify using the station_group column, but we still want to group by lead time, so we pass a vector to groupings, meaning that we group by both columns.

verif <- det_verify(t2m, T2m, groupings = c("lead_time", "station_group"))

## ::Computing verification for fcst_model `arome_arctic`::

## Det summary for lead_time & station_group✔

## 
Hexbin for lead_time & station_group ■■■■■■■■■■■■                      35% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■                     41% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■                  48% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■            72% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■         80% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■       85% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     93% |  E…

                                                                                 
Hexbin for lead_time & station_group✔
## ::Computing verification for fcst_model `IFSHIRES`::
## Det summary for lead_time & station_group✔
## 
Hexbin for lead_time & station_group ■■■■■■■■■■■■■                     41% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■                   48% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■          76% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■        83% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■      90% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■    96% |  E…

                                                                                 
Hexbin for lead_time & station_group✔
## ::Computing verification for fcst_model `MEPS_det`::
## Det summary for lead_time & station_group✔
## 
Hexbin for lead_time & station_group ■■■■■■■■■■                        30% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■                      37% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■                    43% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■             66% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■           74% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■         81% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■      89% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■    96% |  E…

                                                                                 
Hexbin for lead_time & station_group✔

verif

## ::det_summary_scores:: # A tibble: 1,173 × 10
##    fcst_model  lead_time station_group num_cases num_stations   bias  rmse   mae
##    <chr>           <int> <chr>             <int>        <int>  <dbl> <dbl> <dbl>
##  1 arome_arct…         0 Coast Norway        447           20  0.275  1.50  1.07
##  2 arome_arct…         0 Coast Sweden        183            8  1.59   3.07  2.10
##  3 arome_arct…         0 EWGLAM              230           10  0.646  2.28  1.68
##  4 arome_arct…         0 Eumou               138            6 -0.200  4.61  3.29
##  5 arome_arct…         0 Finland             805           35  0.545  3.17  2.26
##  6 arome_arct…         0 METCOOP            2659          118  0.744  2.90  1.99
##  7 arome_arct…         0 METCOOP City         23            1 -2.17   2.74  2.22
##  8 arome_arct…         0 METCOOP Coast       630           28  0.656  2.08  1.37
##  9 arome_arct…         0 METCOOP Moun…        46            2 -3.09   4.89  3.53
## 10 arome_arct…         0 METCOOP Moun…       207            9  0.573  4.47  3.23
## # ℹ 1,163 more rows
## # ℹ 2 more variables: stde <dbl>, hexbin <list>
## 
## --harp verification for T2m--
##  # for forecasts from 00:00 UTC 08 Mar 2018 to 00:00 UTC 31 Mar 2018
##  # using 163 observation stations
##  # for verification groups: 
##     -> lead_time & station_group

## ℹ use `attributes()` to see detailed metadata

And we can make a plot faceting by station_group

plot_point_verif(verif, mae, facet_by = vars(station_group))

## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.

But now we don’t have the scores for all stations together. We can get those back passing a list to groupings, which means that we can group by different sets of groupings - an All group is created for grouping variables that are not within a set of groups.

verif <- det_verify(
  t2m, 
  T2m, 
  groupings = list(
    c("lead_time", "station_group"),
    "lead_time"
  )
)

## ::Computing verification for fcst_model `arome_arctic`::

## Det summary for lead_time & station_group✔

## Det summary for lead_time✔

## 
Hexbin for lead_time & station_group ■■■■■■■■■■■■■                     40% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■                   47% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■           74% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■         81% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■       87% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     93% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■   99% |  E…

                                                                                 
Hexbin for lead_time & station_group✔
## Hexbin for lead_time✔
## ::Computing verification for fcst_model `IFSHIRES`::
## Det summary for lead_time & station_group✔
## Det summary for lead_time✔
## 
Hexbin for lead_time & station_group ■■■■■■■■■■■                       34% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■                     39% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■                    43% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■                   47% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■                55% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■               61% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■             68% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■           75% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■          78% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■        82% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■       87% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     92% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■   99% |  E…

                                                                                 
Hexbin for lead_time & station_group✔
## Hexbin for lead_time✔
## ::Computing verification for fcst_model `MEPS_det`::
## Det summary for lead_time & station_group✔
## Det summary for lead_time✔
## 
Hexbin for lead_time & station_group ■■■■■■■■■■■                       34% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■                     41% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■                   46% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■                  50% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■               61% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■            69% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■          77% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■        84% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■     92% |  E…

Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■   99% |  E…

                                                                                 
Hexbin for lead_time & station_group✔
## Hexbin for lead_time✔

verif

## ::det_summary_scores:: # A tibble: 1,224 × 10
##    fcst_model  lead_time station_group num_cases num_stations   bias  rmse   mae
##    <chr>           <int> <chr>             <int>        <int>  <dbl> <dbl> <dbl>
##  1 arome_arct…         0 Coast Norway        447           20  0.275  1.50  1.07
##  2 arome_arct…         0 Coast Sweden        183            8  1.59   3.07  2.10
##  3 arome_arct…         0 EWGLAM              230           10  0.646  2.28  1.68
##  4 arome_arct…         0 Eumou               138            6 -0.200  4.61  3.29
##  5 arome_arct…         0 Finland             805           35  0.545  3.17  2.26
##  6 arome_arct…         0 METCOOP            2659          118  0.744  2.90  1.99
##  7 arome_arct…         0 METCOOP City         23            1 -2.17   2.74  2.22
##  8 arome_arct…         0 METCOOP Coast       630           28  0.656  2.08  1.37
##  9 arome_arct…         0 METCOOP Moun…        46            2 -3.09   4.89  3.53
## 10 arome_arct…         0 METCOOP Moun…       207            9  0.573  4.47  3.23
## # ℹ 1,214 more rows
## # ℹ 2 more variables: stde <dbl>, hexbin <list>
## 
## --harp verification for T2m--
##  # for forecasts from 00:00 UTC 08 Mar 2018 to 00:00 UTC 31 Mar 2018
##  # using 163 observation stations
##  # for verification groups: 
##     -> lead_time & station_group
##     -> lead_time

## ℹ use `attributes()` to see detailed metadata

plot_point_verif(verif, mae, facet_by = vars(station_group))

## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.

Vertical profiles

A less obvious situation in which you need to add a grouping is for vertical profiles. In this situation we must have the vertical level as a grouping variable as well. So, lets start by reading in temperature on pressure levels - we don’t have these data for IFSHIRES…

tprofiles <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  fcst_model = c("arome_arctic", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "T",  
  file_path  = here("data/FCTABLE"),
  vertical_coordinate = "pressure"
)

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

## Reading: /home/andrewts/harp-documentation/md/harp_training_2022/data/FCTABLE/arome_arctic/2018/03/FCTABLE_T_201803_00.sqlite

## Reading: /home/andrewts/harp-documentation/md/harp_training_2022/data/FCTABLE/MEPS_det/2018/03/FCTABLE_T_201803_00.sqlite

tprofiles

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 31,824 × 10
##    fcst_model  fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>       <dttm>              <dttm>                  <int> <int> <chr>    
##  1 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  2 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  3 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  4 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  5 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  6 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  7 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  8 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  9 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
## # ℹ 31,814 more rows
## # ℹ 4 more variables: p <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 15,912 × 10
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  2 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  3 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  4 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  5 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  6 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  7 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  8 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  9 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
## 10 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
## # ℹ 15,902 more rows
## # ℹ 4 more variables: p <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>

When we select the common cases we need to make sure we also include the pressure level - we can do that by adding p (thecolumn with the pressure level) to the call to common_cases()

tprofiles <- common_cases(tprofiles, p)
tprofiles

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 15,912 × 10
##    fcst_model  fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>       <dttm>              <dttm>                  <int> <int> <chr>    
##  1 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  2 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  3 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  4 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  5 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  6 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  7 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  8 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  9 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
## # ℹ 15,902 more rows
## # ℹ 4 more variables: p <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 15,912 × 10
##    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
##    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
##  1 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  2 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  3 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  4 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  5 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  6 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  7 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  8 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
##  9 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
## 10 MEPS_det   2018-03-08 00:00:00 2018-03-08 00:00:00         0  1001 T        
## # ℹ 15,902 more rows
## # ℹ 4 more variables: p <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>

And now we can join the observations

tprofiles <- join_to_fcst(
  tprofiles,
  read_point_obs(
    unique_valid_dttm(tprofiles), 
    parameter = "T", 
    obs_path = here("data/OBSTABLE"), 
    vertical_coordinate = "pressure"
  )
)

## Getting T.

## 
## Reading: /home/andrewts/harp-documentation/md/harp_training_2022/data/OBSTABLE/OBSTABLE_2018.sqlite

## Joining, by = c("valid_dttm", "SID", "p", "units")
## Joining, by = c("valid_dttm", "SID", "p", "units")

Now we verify using the p column as a grouping variable

verif <- det_verify(tprofiles, T, groupings = c("lead_time", "p"))

## ::Computing verification for fcst_model `arome_arctic`::

## Det summary for lead_time & p✔

## Hexbin for lead_time & p✔

## ::Computing verification for fcst_model `MEPS_det`::

## Det summary for lead_time & p✔

## Hexbin for lead_time & p✔

There is a special function for plotting verification of vertical profiles - plot_profile_verif(). It is simply a wrapper around plot_point_verif() that sets the axes for you.

plot_profile_verif(verif, mae)

We have another overplotting problem as all lead times are included, so we can facet by lead_time or filter to a particular lead_time.

plot_profile_verif(verif, mae, facet_by = vars(lead_time))

## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.

plot_profile_verif(verif, mae, filter_by = vars(lead_time == 18))

Grouping by observation values.

We can compute scores only for when observations are within particular bounds. We can use R’s cut function to define the groups. We’ll go back to the 2m temperature data for this.

t2m <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "T2m",  
  file_path  = here("data/FCTABLE")
) %>% 
  common_cases() %>% 
  scale_param(-273.15, "degC") %>% 
  join_to_fcst(
    read_point_obs(
      unique_valid_dttm(.),
      parameter = "T2m",
      stations  = unique_stations(.),
      obs_path  = here("data/OBSTABLE")
    ) %>% 
      scale_param(-273.15, "degC", col = T2m)
  )

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

t2m <- mutate(t2m, temp_range = cut(T2m, breaks = seq(-25, 5, 5)))
select(t2m, SID, T2m, temp_range)

## • arome_arctic
## ::deterministic point forecast:: # A tibble: 70,396 × 3
##      SID     T2m temp_range
##    <int>   <dbl> <fct>     
##  1  1001  -0.950 (-5,0]    
##  2  1010  -7.35  (-10,-5]  
##  3  1015  -9.45  (-10,-5]  
##  4  1018 -11.1   (-15,-10] 
##  5  1023 -18.1   (-20,-15] 
##  6  1025 -10.3   (-15,-10] 
##  7  1026 -10.8   (-15,-10] 
##  8  1027 -10.0   (-15,-10] 
##  9  1033  -7.15  (-10,-5]  
## 10  1035 -17.3   (-20,-15] 
## # ℹ 70,386 more rows
## 
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 70,396 × 3
##      SID     T2m temp_range
##    <int>   <dbl> <fct>     
##  1  1001  -0.950 (-5,0]    
##  2  1010  -7.35  (-10,-5]  
##  3  1015  -9.45  (-10,-5]  
##  4  1018 -11.1   (-15,-10] 
##  5  1023 -18.1   (-20,-15] 
##  6  1025 -10.3   (-15,-10] 
##  7  1026 -10.8   (-15,-10] 
##  8  1027 -10.0   (-15,-10] 
##  9  1033  -7.15  (-10,-5]  
## 10  1035 -17.3   (-20,-15] 
## # ℹ 70,386 more rows
## 
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 70,396 × 3
##      SID     T2m temp_range
##    <int>   <dbl> <fct>     
##  1  1001  -0.950 (-5,0]    
##  2  1010  -7.35  (-10,-5]  
##  3  1015  -9.45  (-10,-5]  
##  4  1018 -11.1   (-15,-10] 
##  5  1023 -18.1   (-20,-15] 
##  6  1025 -10.3   (-15,-10] 
##  7  1026 -10.8   (-15,-10] 
##  8  1027 -10.0   (-15,-10] 
##  9  1033  -7.15  (-10,-5]  
## 10  1035 -17.3   (-20,-15] 
## # ℹ 70,386 more rows

We need to make sure that those cases outside the breaks given by cut are also included, and then make sure the factor levels are in ascending order to make nicer plots.

t2m <- mutate(
  t2m,
  temp_range = as.character(temp_range),
  temp_range = case_when(
    T2m < -25 ~ "< -25",
    T2m >= 5  ~ ">= 5",
    TRUE ~ temp_range
  ),
  temp_range = fct_reorder(factor(temp_range), T2m, min)
)

And then we can verify grouping by the temp_range column.

verif <- det_verify(t2m, T2m, groupings = list(c("lead_time", "temp_range"), "lead_time"))

## ::Computing verification for fcst_model `arome_arctic`::

## Det summary for lead_time & temp_range✔

## Det summary for lead_time✔

## Hexbin for lead_time & temp_range✔

## Hexbin for lead_time✔

## ::Computing verification for fcst_model `IFSHIRES`::

## Det summary for lead_time & temp_range✔

## Det summary for lead_time✔

## Hexbin for lead_time & temp_range✔

## Hexbin for lead_time✔

## ::Computing verification for fcst_model `MEPS_det`::

## Det summary for lead_time & temp_range✔

## Det summary for lead_time✔

## Hexbin for lead_time & temp_range✔

## Hexbin for lead_time✔

And when we plot, the “All” rows have been added so we’ve lost our factor information, but we made sure that the groups were verified in the correct order, so be using fct_inorder() we get the correct order back.

plot_point_verif(verif, bias, facet_by = vars(fct_inorder(temp_range)))

## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.

Verfication for individual stations

We can also set the groupings argument to compute the verification scores at individual stations, using SID as the grouping variable. We’ll do this on 2m temperature again, but will re-read the data due to all of the extra rows created by many stations being in multiple groups in the earlier analysis.

t2m <- read_point_forecast(
  dttm       = seq_dttm(2018030800, 2018033100, "1d"),
  fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
  fcst_type  = "det",
  parameter  = "T2m",  
  file_path  = here("data/FCTABLE")
) %>% 
  common_cases() %>% 
  join_to_fcst(
    read_point_obs(
      unique_valid_dttm(.),
      parameter = "T2m",
      stations  = unique_stations(.),
      obs_path  = here("data/OBSTABLE")
    )
  )

## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.

verif <- det_verify(
  t2m, T2m, groupings = c("SID", "lead_time")
)

There are far too many stations to plot time series of the scores individually, but we can join the station list to the final verification to get the station locations. This is a tiny bit of work as we need a function to do this.

join_func <- function(.verif, stns) {
  if (!is.null(.verif) && nrow(.verif) > 0) {
    .verif <- inner_join(.verif, stns)
  }
  .verif
}

Then we can use lapply to apply the function to each data frame in the list (there is only 1, but if we’d computed categorical scores as well there would be more, or for ensemble scores). But before running lapply we need to store the attributes of the list.

atts <- attributes(verif)
verif <- lapply(verif, join_func, station_list)

## Joining with `by = join_by(SID)`

attributes(verif) <- atts

Now we can plot the data on a map - we need to filter by lead time first, otherwise we’ll get overplotting and facet by mname.

ggplot(
  filter(verif$det_summary_scores, lead_time == 24), 
  aes(lon, lat, colour = bias)
) +
  geom_polygon(
    aes(x = long, group = group), 
    map_data("world"), 
    fill   = NA, 
    colour = "grey40"
  ) + 
  geom_point() +
  scale_colour_distiller(
    palette = "RdBu",
    limits  = ~c(-max(abs(.x)), max(abs(.x))) 
  ) +
  coord_cartesian(
    xlim = range(verif$det_summary_scores$lon),
    ylim = range(verif$det_summary_scores$lat),
  ) +
  facet_wrap(vars(fcst_model), nrow = 2)

We have a lot of white, so want to have dots with a stroke and a fill - which is shape 21 (shapes). We can also make the points a bit bigger. And maybe colour in the land and sea and make the co-ordinates equal in each direction.

ggplot(
  filter(verif$det_summary_scores, lead_time == 24), 
  aes(lon, lat, fill = bias)
) +
  geom_polygon(
    aes(x = long, group = group), 
    map_data("world"), 
    fill   = "seagreen3", 
    colour = "grey40"
  ) + 
  geom_point(shape = 21, colour = "grey70", size = 3) +
  scale_fill_distiller(
    palette = "RdBu",
    limits  = ~ c(-max(abs(.x)), max(abs(.x))) 
  ) +
  coord_equal(
    xlim = range(verif$det_summary_scores$lon),
    ylim = range(verif$det_summary_scores$lat) - c(5, 0),
  ) +
  facet_wrap(vars(fcst_model), nrow = 2) +
  theme(
    axis.ticks = element_blank(),
    axis.text  = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank(),
    panel.background = element_rect(fill = "#ccccff")
  )

Point Verification by Groups

Andrew Singleton

18/02/2022

What are verifcation groups?

Vertical profiles

Grouping by observation values.

Verfication for individual stations