library(harp)
library(here)
library(dplyr)
library(forcats)
So far, all of the verification has been done by grouping the data
according to lead time and, for categorical scores, according to
threshold. However the verification functions allow you specify any
grouping variable, as long as it is a column in the forecast data
frames. A simple option would be to verify grouped by valid time - we
can use the validdate
column in the data.
These examples use deterministic data, but the exact same will apply for ensemble verification. The below uses pipes to read and join the data in one go. The . is data from the left hand side of the pipe.
t2m <- read_point_forecast(
dttm = seq_dttm(2018030800, 2018033100, "1d"),
fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
fcst_type = "det",
parameter = "T2m",
file_path = here("data/FCTABLE")
) %>%
common_cases() %>%
join_to_fcst(
read_point_obs(
unique_valid_dttm(.),
parameter = "T2m",
stations = unique_stations(.),
obs_path = here("data/OBSTABLE")
)
)
## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.
We now have data that’s ready to verify - we can specify that we want
to group the verfication by the validdate using the
groupings
argument. Unlike other arguments that take column
names it must be quoted.
verif <- det_verify(t2m , T2m, groupings = "valid_dttm")
## ::Computing verification for fcst_model `arome_arctic`::
## Det summary for valid_dttm✔
## Hexbin for valid_dttm✔
## ::Computing verification for fcst_model `IFSHIRES`::
## Det summary for valid_dttm✔
## Hexbin for valid_dttm✔
## ::Computing verification for fcst_model `MEPS_det`::
## Det summary for valid_dttm✔
## Hexbin for valid_dttm✔
verif
## ::det_summary_scores:: # A tibble: 597 × 9
## fcst_model valid_dttm num_cases num_stations bias rmse mae
## <chr> <dttm> <int> <int> <dbl> <dbl> <dbl>
## 1 arome_arctic 2018-03-08 00:00:00 178 178 0.503 2.44 1.76
## 2 arome_arctic 2018-03-08 03:00:00 173 173 0.184 2.65 1.96
## 3 arome_arctic 2018-03-08 06:00:00 178 178 0.0543 2.83 2.15
## 4 arome_arctic 2018-03-08 09:00:00 173 173 0.0603 2.04 1.49
## 5 arome_arctic 2018-03-08 12:00:00 178 178 0.346 1.96 1.40
## 6 arome_arctic 2018-03-08 15:00:00 172 172 -0.162 1.77 1.35
## 7 arome_arctic 2018-03-08 18:00:00 175 175 -0.756 2.74 2.14
## 8 arome_arctic 2018-03-08 21:00:00 172 172 -1.15 3.32 2.61
## 9 arome_arctic 2018-03-09 00:00:00 354 177 -0.917 3.14 2.41
## 10 arome_arctic 2018-03-09 03:00:00 344 172 -0.916 3.30 2.53
## # ℹ 587 more rows
## # ℹ 2 more variables: stde <dbl>, hexbin <list>
##
## --harp verification for T2m--
## # for forecasts from 00:00 UTC 08 Mar 2018 to 00:00 UTC 31 Mar 2018
## # using 180 observation stations
## # for verification groups:
## -> valid_dttm
## ℹ use `attributes()` to see detailed metadata
We can now plot using plot_point_verif()
, but we need to
tell it that the x-axis is now valid_dttm
plot_point_verif(verif, mae, x_axis = valid_dttm)
plot_point_verif(verif, mae, x_axis = valid_dttm, point_size = 0)
Another common grouping is by station characteristic - e.g. coastal,
mountain etc. There is a station_groups
data frame built in
to harp that groups the stations in some way or other by the station
ID.
station_groups
## # A tibble: 5,568 × 2
## SID station_group
## <int> <chr>
## 1 2029 Mountain Sweden
## 2 2102 Mountain Sweden
## 3 2209 Mountain Sweden
## 4 2307 Mountain Sweden
## 5 2013 Mountain Sweden 2
## 6 2024 Mountain Sweden 2
## 7 2028 Mountain Sweden 2
## 8 2029 Mountain Sweden 2
## 9 2036 Mountain Sweden 2
## 10 2037 Mountain Sweden 2
## # ℹ 5,558 more rows
So we can join that to our forecast data using
join_to_fcst()
. Note that join_to_fcst()
is
for joining observations to forecasts so it checks units and will
initially fail….
join_to_fcst(t2m, station_groups)
## Warning: .join does not have a units column.
## Error: Join will not be done due to units incompatibility. You can force the join by setting force = TRUE
## OR, units imcompatibility can be fixed with the set_units(), or scale_param() functions.
But we can join anything if we set force = TRUE
t2m <- join_to_fcst(t2m, station_groups, force = TRUE)
t2m
## • arome_arctic
## ::deterministic point forecast:: # A tibble: 250,703 × 16
## fcst_model fcst_dttm valid_dttm lead_time SID parameter
## <chr> <dttm> <dttm> <int> <int> <chr>
## 1 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 2 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 3 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 4 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 5 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 6 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 7 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 8 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 9 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## # ℹ 250,693 more rows
## # ℹ 10 more variables: z <dbl>, fcst <dbl>, fcst_cycle <chr>,
## # model_elevation <dbl>, units <chr>, lon <dbl>, lat <dbl>, elev <dbl>,
## # T2m <dbl>, station_group <chr>
##
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 250,703 × 15
## fcst_model fcst_dttm valid_dttm lead_time SID parameter
## <chr> <dttm> <dttm> <int> <int> <chr>
## 1 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 2 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 3 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 4 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 5 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 6 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 7 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 8 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 9 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 10 IFSHIRES 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## # ℹ 250,693 more rows
## # ℹ 9 more variables: fcst <dbl>, fcst_cycle <chr>, units <chr>, z <dbl>,
## # lon <dbl>, lat <dbl>, elev <dbl>, T2m <dbl>, station_group <chr>
##
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 250,703 × 15
## fcst_model fcst_dttm valid_dttm lead_time SID parameter
## <chr> <dttm> <dttm> <int> <int> <chr>
## 1 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 2 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 3 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 4 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T2m
## 5 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 6 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 7 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 8 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 9 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## 10 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1010 T2m
## # ℹ 250,693 more rows
## # ℹ 9 more variables: z <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>,
## # lon <dbl>, lat <dbl>, elev <dbl>, T2m <dbl>, station_group <chr>
Now we can verify using the station_group column, but we still want
to group by lead time, so we pass a vector to groupings
,
meaning that we group by both columns.
verif <- det_verify(t2m, T2m, groupings = c("lead_time", "station_group"))
## ::Computing verification for fcst_model `arome_arctic`::
## Det summary for lead_time & station_group✔
##
Hexbin for lead_time & station_group ■■■■■■■■■■■■ 35% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■ 41% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■ 48% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■ 72% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■ 80% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 85% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 93% | E…
Hexbin for lead_time & station_group✔
## ::Computing verification for fcst_model `IFSHIRES`::
## Det summary for lead_time & station_group✔
##
Hexbin for lead_time & station_group ■■■■■■■■■■■■■ 41% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■ 48% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■ 76% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■ 83% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 90% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 96% | E…
Hexbin for lead_time & station_group✔
## ::Computing verification for fcst_model `MEPS_det`::
## Det summary for lead_time & station_group✔
##
Hexbin for lead_time & station_group ■■■■■■■■■■ 30% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■ 37% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■ 43% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■ 66% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■ 74% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■ 81% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 89% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 96% | E…
Hexbin for lead_time & station_group✔
verif
## ::det_summary_scores:: # A tibble: 1,173 × 10
## fcst_model lead_time station_group num_cases num_stations bias rmse mae
## <chr> <int> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 arome_arct… 0 Coast Norway 447 20 0.275 1.50 1.07
## 2 arome_arct… 0 Coast Sweden 183 8 1.59 3.07 2.10
## 3 arome_arct… 0 EWGLAM 230 10 0.646 2.28 1.68
## 4 arome_arct… 0 Eumou 138 6 -0.200 4.61 3.29
## 5 arome_arct… 0 Finland 805 35 0.545 3.17 2.26
## 6 arome_arct… 0 METCOOP 2659 118 0.744 2.90 1.99
## 7 arome_arct… 0 METCOOP City 23 1 -2.17 2.74 2.22
## 8 arome_arct… 0 METCOOP Coast 630 28 0.656 2.08 1.37
## 9 arome_arct… 0 METCOOP Moun… 46 2 -3.09 4.89 3.53
## 10 arome_arct… 0 METCOOP Moun… 207 9 0.573 4.47 3.23
## # ℹ 1,163 more rows
## # ℹ 2 more variables: stde <dbl>, hexbin <list>
##
## --harp verification for T2m--
## # for forecasts from 00:00 UTC 08 Mar 2018 to 00:00 UTC 31 Mar 2018
## # using 163 observation stations
## # for verification groups:
## -> lead_time & station_group
## ℹ use `attributes()` to see detailed metadata
And we can make a plot faceting by station_group
plot_point_verif(verif, mae, facet_by = vars(station_group))
## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.
But now we don’t have the scores for all stations together. We can
get those back passing a list to groupings
, which means
that we can group by different sets of groupings - an All
group is created for grouping variables that are not within a set of
groups.
verif <- det_verify(
t2m,
T2m,
groupings = list(
c("lead_time", "station_group"),
"lead_time"
)
)
## ::Computing verification for fcst_model `arome_arctic`::
## Det summary for lead_time & station_group✔
## Det summary for lead_time✔
##
Hexbin for lead_time & station_group ■■■■■■■■■■■■■ 40% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■ 47% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■ 74% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■ 81% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 87% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 93% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 99% | E…
Hexbin for lead_time & station_group✔
## Hexbin for lead_time✔
## ::Computing verification for fcst_model `IFSHIRES`::
## Det summary for lead_time & station_group✔
## Det summary for lead_time✔
##
Hexbin for lead_time & station_group ■■■■■■■■■■■ 34% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■ 39% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■ 43% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■ 47% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■ 55% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■ 61% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■ 68% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■ 75% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■ 78% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■ 82% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■ 87% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 92% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 99% | E…
Hexbin for lead_time & station_group✔
## Hexbin for lead_time✔
## ::Computing verification for fcst_model `MEPS_det`::
## Det summary for lead_time & station_group✔
## Det summary for lead_time✔
##
Hexbin for lead_time & station_group ■■■■■■■■■■■ 34% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■ 41% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■ 46% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■ 50% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■ 61% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■ 69% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■ 77% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■ 84% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 92% | E…
Hexbin for lead_time & station_group ■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■■ 99% | E…
Hexbin for lead_time & station_group✔
## Hexbin for lead_time✔
verif
## ::det_summary_scores:: # A tibble: 1,224 × 10
## fcst_model lead_time station_group num_cases num_stations bias rmse mae
## <chr> <int> <chr> <int> <int> <dbl> <dbl> <dbl>
## 1 arome_arct… 0 Coast Norway 447 20 0.275 1.50 1.07
## 2 arome_arct… 0 Coast Sweden 183 8 1.59 3.07 2.10
## 3 arome_arct… 0 EWGLAM 230 10 0.646 2.28 1.68
## 4 arome_arct… 0 Eumou 138 6 -0.200 4.61 3.29
## 5 arome_arct… 0 Finland 805 35 0.545 3.17 2.26
## 6 arome_arct… 0 METCOOP 2659 118 0.744 2.90 1.99
## 7 arome_arct… 0 METCOOP City 23 1 -2.17 2.74 2.22
## 8 arome_arct… 0 METCOOP Coast 630 28 0.656 2.08 1.37
## 9 arome_arct… 0 METCOOP Moun… 46 2 -3.09 4.89 3.53
## 10 arome_arct… 0 METCOOP Moun… 207 9 0.573 4.47 3.23
## # ℹ 1,214 more rows
## # ℹ 2 more variables: stde <dbl>, hexbin <list>
##
## --harp verification for T2m--
## # for forecasts from 00:00 UTC 08 Mar 2018 to 00:00 UTC 31 Mar 2018
## # using 163 observation stations
## # for verification groups:
## -> lead_time & station_group
## -> lead_time
## ℹ use `attributes()` to see detailed metadata
plot_point_verif(verif, mae, facet_by = vars(station_group))
## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.
A less obvious situation in which you need to add a grouping is for vertical profiles. In this situation we must have the vertical level as a grouping variable as well. So, lets start by reading in temperature on pressure levels - we don’t have these data for IFSHIRES…
tprofiles <- read_point_forecast(
dttm = seq_dttm(2018030800, 2018033100, "1d"),
fcst_model = c("arome_arctic", "MEPS_det"),
fcst_type = "det",
parameter = "T",
file_path = here("data/FCTABLE"),
vertical_coordinate = "pressure"
)
## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.
## Reading: /home/andrewts/harp-documentation/md/harp_training_2022/data/FCTABLE/arome_arctic/2018/03/FCTABLE_T_201803_00.sqlite
## Reading: /home/andrewts/harp-documentation/md/harp_training_2022/data/FCTABLE/MEPS_det/2018/03/FCTABLE_T_201803_00.sqlite
tprofiles
## • arome_arctic
## ::deterministic point forecast:: # A tibble: 31,824 × 10
## fcst_model fcst_dttm valid_dttm lead_time SID parameter
## <chr> <dttm> <dttm> <int> <int> <chr>
## 1 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 2 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 3 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 4 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 5 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 6 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 7 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 8 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 9 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## # ℹ 31,814 more rows
## # ℹ 4 more variables: p <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>
##
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 15,912 × 10
## fcst_model fcst_dttm valid_dttm lead_time SID parameter
## <chr> <dttm> <dttm> <int> <int> <chr>
## 1 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 2 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 3 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 4 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 5 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 6 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 7 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 8 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 9 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 10 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## # ℹ 15,902 more rows
## # ℹ 4 more variables: p <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>
When we select the common cases we need to make sure we also include
the pressure level - we can do that by adding p
(thecolumn
with the pressure level) to the call to common_cases()
tprofiles <- common_cases(tprofiles, p)
tprofiles
## • arome_arctic
## ::deterministic point forecast:: # A tibble: 15,912 × 10
## fcst_model fcst_dttm valid_dttm lead_time SID parameter
## <chr> <dttm> <dttm> <int> <int> <chr>
## 1 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 2 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 3 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 4 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 5 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 6 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 7 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 8 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 9 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 10 arome_arct… 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## # ℹ 15,902 more rows
## # ℹ 4 more variables: p <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>
##
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 15,912 × 10
## fcst_model fcst_dttm valid_dttm lead_time SID parameter
## <chr> <dttm> <dttm> <int> <int> <chr>
## 1 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 2 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 3 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 4 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 5 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 6 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 7 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 8 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 9 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## 10 MEPS_det 2018-03-08 00:00:00 2018-03-08 00:00:00 0 1001 T
## # ℹ 15,902 more rows
## # ℹ 4 more variables: p <dbl>, fcst <dbl>, fcst_cycle <chr>, units <chr>
And now we can join the observations
tprofiles <- join_to_fcst(
tprofiles,
read_point_obs(
unique_valid_dttm(tprofiles),
parameter = "T",
obs_path = here("data/OBSTABLE"),
vertical_coordinate = "pressure"
)
)
## Getting T.
##
## Reading: /home/andrewts/harp-documentation/md/harp_training_2022/data/OBSTABLE/OBSTABLE_2018.sqlite
## Joining, by = c("valid_dttm", "SID", "p", "units")
## Joining, by = c("valid_dttm", "SID", "p", "units")
Now we verify using the p
column as a grouping
variable
verif <- det_verify(tprofiles, T, groupings = c("lead_time", "p"))
## ::Computing verification for fcst_model `arome_arctic`::
## Det summary for lead_time & p✔
## Hexbin for lead_time & p✔
## ::Computing verification for fcst_model `MEPS_det`::
## Det summary for lead_time & p✔
## Hexbin for lead_time & p✔
There is a special function for plotting verification of vertical
profiles - plot_profile_verif()
. It is simply a wrapper
around plot_point_verif()
that sets the axes for you.
plot_profile_verif(verif, mae)
We have another overplotting problem as all lead times are included,
so we can facet by lead_time
or filter to a particular
lead_time
.
plot_profile_verif(verif, mae, facet_by = vars(lead_time))
## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.
plot_profile_verif(verif, mae, filter_by = vars(lead_time == 18))
We can compute scores only for when observations are within
particular bounds. We can use R’s cut
function to define
the groups. We’ll go back to the 2m temperature data for this.
t2m <- read_point_forecast(
dttm = seq_dttm(2018030800, 2018033100, "1d"),
fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
fcst_type = "det",
parameter = "T2m",
file_path = here("data/FCTABLE")
) %>%
common_cases() %>%
scale_param(-273.15, "degC") %>%
join_to_fcst(
read_point_obs(
unique_valid_dttm(.),
parameter = "T2m",
stations = unique_stations(.),
obs_path = here("data/OBSTABLE")
) %>%
scale_param(-273.15, "degC", col = T2m)
)
## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.
t2m <- mutate(t2m, temp_range = cut(T2m, breaks = seq(-25, 5, 5)))
select(t2m, SID, T2m, temp_range)
## • arome_arctic
## ::deterministic point forecast:: # A tibble: 70,396 × 3
## SID T2m temp_range
## <int> <dbl> <fct>
## 1 1001 -0.950 (-5,0]
## 2 1010 -7.35 (-10,-5]
## 3 1015 -9.45 (-10,-5]
## 4 1018 -11.1 (-15,-10]
## 5 1023 -18.1 (-20,-15]
## 6 1025 -10.3 (-15,-10]
## 7 1026 -10.8 (-15,-10]
## 8 1027 -10.0 (-15,-10]
## 9 1033 -7.15 (-10,-5]
## 10 1035 -17.3 (-20,-15]
## # ℹ 70,386 more rows
##
## • IFSHIRES
## ::deterministic point forecast:: # A tibble: 70,396 × 3
## SID T2m temp_range
## <int> <dbl> <fct>
## 1 1001 -0.950 (-5,0]
## 2 1010 -7.35 (-10,-5]
## 3 1015 -9.45 (-10,-5]
## 4 1018 -11.1 (-15,-10]
## 5 1023 -18.1 (-20,-15]
## 6 1025 -10.3 (-15,-10]
## 7 1026 -10.8 (-15,-10]
## 8 1027 -10.0 (-15,-10]
## 9 1033 -7.15 (-10,-5]
## 10 1035 -17.3 (-20,-15]
## # ℹ 70,386 more rows
##
## • MEPS_det
## ::deterministic point forecast:: # A tibble: 70,396 × 3
## SID T2m temp_range
## <int> <dbl> <fct>
## 1 1001 -0.950 (-5,0]
## 2 1010 -7.35 (-10,-5]
## 3 1015 -9.45 (-10,-5]
## 4 1018 -11.1 (-15,-10]
## 5 1023 -18.1 (-20,-15]
## 6 1025 -10.3 (-15,-10]
## 7 1026 -10.8 (-15,-10]
## 8 1027 -10.0 (-15,-10]
## 9 1033 -7.15 (-10,-5]
## 10 1035 -17.3 (-20,-15]
## # ℹ 70,386 more rows
We need to make sure that those cases outside the breaks given by cut are also included, and then make sure the factor levels are in ascending order to make nicer plots.
t2m <- mutate(
t2m,
temp_range = as.character(temp_range),
temp_range = case_when(
T2m < -25 ~ "< -25",
T2m >= 5 ~ ">= 5",
TRUE ~ temp_range
),
temp_range = fct_reorder(factor(temp_range), T2m, min)
)
And then we can verify grouping by the temp_range
column.
verif <- det_verify(t2m, T2m, groupings = list(c("lead_time", "temp_range"), "lead_time"))
## ::Computing verification for fcst_model `arome_arctic`::
## Det summary for lead_time & temp_range✔
## Det summary for lead_time✔
## Hexbin for lead_time & temp_range✔
## Hexbin for lead_time✔
## ::Computing verification for fcst_model `IFSHIRES`::
## Det summary for lead_time & temp_range✔
## Det summary for lead_time✔
## Hexbin for lead_time & temp_range✔
## Hexbin for lead_time✔
## ::Computing verification for fcst_model `MEPS_det`::
## Det summary for lead_time & temp_range✔
## Det summary for lead_time✔
## Hexbin for lead_time & temp_range✔
## Hexbin for lead_time✔
And when we plot, the “All” rows have been added so we’ve lost our
factor information, but we made sure that the groups were verified in
the correct order, so be using fct_inorder()
we get the
correct order back.
plot_point_verif(verif, bias, facet_by = vars(fct_inorder(temp_range)))
## Warning: plot_num_cases = TRUE cannot be used with facet_by. plot_num_cases set
## to FALSE.
We can also set the groupings
argument to compute the
verification scores at individual stations, using SID
as
the grouping variable. We’ll do this on 2m temperature again, but will
re-read the data due to all of the extra rows created by many stations
being in multiple groups in the earlier analysis.
t2m <- read_point_forecast(
dttm = seq_dttm(2018030800, 2018033100, "1d"),
fcst_model = c("arome_arctic", "IFSHIRES", "MEPS_det"),
fcst_type = "det",
parameter = "T2m",
file_path = here("data/FCTABLE")
) %>%
common_cases() %>%
join_to_fcst(
read_point_obs(
unique_valid_dttm(.),
parameter = "T2m",
stations = unique_stations(.),
obs_path = here("data/OBSTABLE")
)
)
## Warning: Only 1 'file_template' defined. Recycling for all 'fcst_model'.
verif <- det_verify(
t2m, T2m, groupings = c("SID", "lead_time")
)
There are far too many stations to plot time series of the scores individually, but we can join the station list to the final verification to get the station locations. This is a tiny bit of work as we need a function to do this.
join_func <- function(.verif, stns) {
if (!is.null(.verif) && nrow(.verif) > 0) {
.verif <- inner_join(.verif, stns)
}
.verif
}
Then we can use lapply to apply the function to each data frame in the list (there is only 1, but if we’d computed categorical scores as well there would be more, or for ensemble scores). But before running lapply we need to store the attributes of the list.
atts <- attributes(verif)
verif <- lapply(verif, join_func, station_list)
## Joining with `by = join_by(SID)`
attributes(verif) <- atts
Now we can plot the data on a map - we need to filter by lead time first, otherwise we’ll get overplotting and facet by mname.
ggplot(
filter(verif$det_summary_scores, lead_time == 24),
aes(lon, lat, colour = bias)
) +
geom_polygon(
aes(x = long, group = group),
map_data("world"),
fill = NA,
colour = "grey40"
) +
geom_point() +
scale_colour_distiller(
palette = "RdBu",
limits = ~c(-max(abs(.x)), max(abs(.x)))
) +
coord_cartesian(
xlim = range(verif$det_summary_scores$lon),
ylim = range(verif$det_summary_scores$lat),
) +
facet_wrap(vars(fcst_model), nrow = 2)
We have a lot of white, so want to have dots with a stroke and a fill - which is shape 21 (shapes). We can also make the points a bit bigger. And maybe colour in the land and sea and make the co-ordinates equal in each direction.
ggplot(
filter(verif$det_summary_scores, lead_time == 24),
aes(lon, lat, fill = bias)
) +
geom_polygon(
aes(x = long, group = group),
map_data("world"),
fill = "seagreen3",
colour = "grey40"
) +
geom_point(shape = 21, colour = "grey70", size = 3) +
scale_fill_distiller(
palette = "RdBu",
limits = ~ c(-max(abs(.x)), max(abs(.x)))
) +
coord_equal(
xlim = range(verif$det_summary_scores$lon),
ylim = range(verif$det_summary_scores$lat) - c(5, 0),
) +
facet_wrap(vars(fcst_model), nrow = 2) +
theme(
axis.ticks = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
panel.grid = element_blank(),
panel.background = element_rect(fill = "#ccccff")
)