Chapter 2 Interpolating model outputs

2.1 Introduction

The “internal” file format for harp is SQLite. SQLite allows fast access to the data and the ability to filter exactly what you want to read. Once our data are in SQLite format, we don’t have to convert them again.

In most cases you also need to interpolate forecast data to station locations, and harp enables you to read the data, interpolate it, and write the interpolated data in one function. This can be done for grib files, FA files, netcdf files (from MET Norway) and vfld files (which are already interpolated). All of the file IO in harp is handled by the package harpIO.

2.2 File templates

NWP files typically have information from any of the date, lead time, parameter, member number (for ensemble forecasts) and potentially more in the file name and / or directory structure. This means that the exact file names are never the same though they will always have the same structure. harp provides functionality for using templates for file names, and also has a number of built in templates for known formats. You can list these using show_file_templates() and see the way that the templates are built.

library(harp)
library(here)
show_file_templates()

## # A tibble: 33 x 2
##    template_name        template                                                
##    <chr>                <chr>                                                   
##  1 arome_arctic_extrac… /lustre/storeB/immutable/archive/projects/metproduction…
##  2 arome_arctic_full    /lustre/storeB/immutable/archive/projects/metproduction…
##  3 arome_arctic_sfx     /lustre/storeB/immutable/archive/projects/metproduction…
##  4 fctable              {file_path}/{fcst_model}/{YYYY}/{MM}/FCTABLE_{parameter…
##  5 fctable_det          {file_path}/{det_model}/{YYYY}/{MM}/FCTABLE_{parameter}…
##  6 fctable_eps          {file_path}/{eps_model}/{YYYY}/{MM}/FCTABLE_{parameter}…
##  7 fctable_eps_all_cyc… {file_path}/{eps_model}/{YYYY}/{MM}/FCTABLE_{parameter}…
##  8 fctable_eps_all_lea… {file_path}/{eps_model}/{YYYY}/{MM}/FCTABLE_{parameter}…
##  9 glameps_grib         {file_path}/{eps_model}/{sub_model}/{YYYY}/{MM}/{DD}/{H…
## 10 harmoneps_grib       {file_path}/{YYYY}/{MM}/{DD}/{HH}/mbr{MBR3}/fc{YYYY}{MM…
## 11 harmoneps_grib_fp    {file_path}/{YYYY}/{MM}/{DD}/{HH}/mbr{MBR3}/fc{YYYY}{MM…
## 12 harmoneps_grib_sfx   {file_path}/{YYYY}/{MM}/{DD}/{HH}/mbr{MBR3}/fc{YYYY}{MM…
## 13 harmonie_grib        {file_path}/{YYYY}/{MM}/{DD}/{HH}/fc{YYYY}{MM}{DD}{HH}+…
## 14 harmonie_grib_fp     {file_path}/{YYYY}/{MM}/{DD}/{HH}/fc{YYYY}{MM}{DD}{HH}+…
## 15 harmonie_grib_sfx    {file_path}/{YYYY}/{MM}/{DD}/{HH}/fc{YYYY}{MM}{DD}{HH}+…
## 16 meps_cntrl_extracted /lustre/storeB/immutable/archive/projects/metproduction…
## 17 meps_cntrl_sfx       /lustre/storeB/immutable/archive/projects/metproduction…
## 18 meps_det             /lustre/storeB/immutable/archive/projects/metproduction…
## 19 meps_extracted       /lustre/storeB/immutable/archive/projects/metproduction…
## 20 meps_full            /lustre/storeB/immutable/archive/projects/metproduction…
## 21 meps_lagged_6h_subs… /lustre/storeB/immutable/archive/projects/metproduction…
## 22 meps_sfx             /lustre/storeB/immutable/archive/projects/metproduction…
## 23 meps_subset          /lustre/storeB/immutable/archive/projects/metproduction…
## 24 obstable             {file_path}/OBSTABLE_{YYYY}.sqlite                      
## 25 vfld                 {file_path}/{fcst_model}/vfld{fcst_model}{YYYY}{MM}{DD}…
## 26 vfld_det             {file_path}/{det_model}/vfld{det_model}{YYYY}{MM}{DD}{H…
## 27 vfld_det_noexp       {file_path}/{det_model}/vfld{YYYY}{MM}{DD}{HH}{LDT2}    
## 28 vfld_eps             {file_path}/{sub_model}/vfld{sub_model}mbr{MBR3}{YYYY}{…
## 29 vfld_eps_noexp       {file_path}/{sub_model}/vfldmbr{MBR3}{YYYY}{MM}{DD}{HH}…
## 30 vfld_multimodel      {file_path}/{sub_model}/vfld{sub_model}mbr{MBR3}{YYYY}{…
## 31 vfld_multimodel_noe… {file_path}/{sub_model}/vfldmbr{MBR3}{YYYY}{MM}{DD}{HH}…
## 32 vfld_noexp           {file_path}/{fcst_model}/vfldmbr{MBR3}{YYYY}{MM}{DD}{HH…
## 33 vobs                 {file_path}/vobs{YYYY}{MM}{DD}{HH}

Since the output is typically truncated you can see one of the templates in full by supplying the row number in the original output. For example, to see the template for vfld files

show_file_templates(23)

## 
## template_name:
##  meps_subset 
##  
## template:
##  /lustre/storeB/immutable/archive/projects/metproduction/MEPS/{YYYY}/{MM}/{DD}/meps_subset_2_5km_{YYYY}{MM}{DD}T{HH}Z.nc

2.3 Deterministic forecast data

Let’s go ahead and read some data. In the data directory you will find a vfld directory and there you will find a directory for AROME_Arctic_prod. Under that directory you will find vfld files for one day of forecasts. To read the date we will use the function read_forecast. Each of the arguments are annotated so that you understand what they are for.

read_forecast(
  start_date    = 2019021700,           # the first forecast for which we have data
  end_date      = 2019021718,           # the last forecast for which we have data
  fcst_model     = "AROME_Arctic_prod", # the name of the deterministic model as in the file name
  parameter     = "T2m",                # We are going to read 2m temperature
  lead_time     = seq(0, 48, 3),        # We have data for lead times 0 - 48 at 3 hour intervals
  by            = "6h",                 # We have forecasts every 6 hours
  file_path     = here("data/vfld"),    # We don't include AROME_Arctic_prod in the path...
  file_template = "vfld",               # ...because it's in the template
  return_data   = TRUE                  # We want to get some data back - by default nothing is returned
)

## ● AROME_Arctic_prod
## # A tibble: 16,796 x 10
##    fcdate              lead_time parameter   SID   lat   lon units
##    <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 2019-02-17 00:00:00         0 T2m        1001  70.9 -8.67 K    
##  2 2019-02-17 00:00:00         0 T2m        1002  80.1 16.2  K    
##  3 2019-02-17 00:00:00         0 T2m        1003  77   15.5  K    
##  4 2019-02-17 00:00:00         0 T2m        1004  78.9 11.9  K    
##  5 2019-02-17 00:00:00         0 T2m        1006  78.3 22.8  K    
##  6 2019-02-17 00:00:00         0 T2m        1007  78.9 11.9  K    
##  7 2019-02-17 00:00:00         0 T2m        1008  78.2 15.5  K    
##  8 2019-02-17 00:00:00         0 T2m        1009  80.7 25.0  K    
##  9 2019-02-17 00:00:00         0 T2m        1010  69.3 16.1  K    
## 10 2019-02-17 00:00:00         0 T2m        1011  80.1 31.5  K    
## # … with 16,786 more rows, and 3 more variables: validdate <dttm>,
## #   fcst_cycle <chr>, AROME_Arctic_prod_det <dbl>

We don’t have to only handle one model at a time, as long as they have the same file format and fit the same template. We also have data for MEPS, which is an ensemble, but we can just read member 0 by setting the correct template. When there are different options for each fcst_model they can be passed as a named list as below:

read_forecast(
  start_date    = 2019021700,           
  end_date      = 2019021718,           
  fcst_model    = c("AROME_Arctic_prod", "MEPS_prod"),  
  parameter     = "T2m",                
  lead_time     = seq(0, 48, 3), 
  by            = "6h",                
  file_path     = here("data/vfld"),    
  file_template = list(
    AROME_Arctic_prod = "vfld", 
    MEPS_prod         = "{fcst_model}/vfld{fcst_model}mbr000{YYYY}{MM}{DD}{HH}{LDT2}"
  ),
  return_data   = TRUE                  
)

## ● AROME_Arctic_prod
## # A tibble: 16,796 x 10
##    fcdate              lead_time parameter   SID   lat   lon units
##    <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 2019-02-17 00:00:00         0 T2m        1001  70.9 -8.67 K    
##  2 2019-02-17 00:00:00         0 T2m        1002  80.1 16.2  K    
##  3 2019-02-17 00:00:00         0 T2m        1003  77   15.5  K    
##  4 2019-02-17 00:00:00         0 T2m        1004  78.9 11.9  K    
##  5 2019-02-17 00:00:00         0 T2m        1006  78.3 22.8  K    
##  6 2019-02-17 00:00:00         0 T2m        1007  78.9 11.9  K    
##  7 2019-02-17 00:00:00         0 T2m        1008  78.2 15.5  K    
##  8 2019-02-17 00:00:00         0 T2m        1009  80.7 25.0  K    
##  9 2019-02-17 00:00:00         0 T2m        1010  69.3 16.1  K    
## 10 2019-02-17 00:00:00         0 T2m        1011  80.1 31.5  K    
## # … with 16,786 more rows, and 3 more variables: validdate <dttm>,
## #   fcst_cycle <chr>, AROME_Arctic_prod_det <dbl>
## 
## ● MEPS_prod
## # A tibble: 78,132 x 10
##    fcdate              lead_time parameter   SID   lat   lon units
##    <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 2019-02-17 00:00:00         0 T2m        1001  70.9 -8.67 K    
##  2 2019-02-17 00:00:00         0 T2m        1010  69.3 16.1  K    
##  3 2019-02-17 00:00:00         0 T2m        1014  69.2 17.9  K    
##  4 2019-02-17 00:00:00         0 T2m        1015  69.6 17.8  K    
##  5 2019-02-17 00:00:00         0 T2m        1018  69.2 16.0  K    
##  6 2019-02-17 00:00:00         0 T2m        1023  69.1 18.5  K    
##  7 2019-02-17 00:00:00         0 T2m        1025  69.7 18.9  K    
##  8 2019-02-17 00:00:00         0 T2m        1026  69.7 18.9  K    
##  9 2019-02-17 00:00:00         0 T2m        1027  69.7 18.9  K    
## 10 2019-02-17 00:00:00         0 T2m        1033  70.2 19.5  K    
## # … with 78,122 more rows, and 3 more variables: validdate <dttm>,
## #   fcst_cycle <chr>, MEPS_prod_det <dbl>

If we want to write the data to sqlite files we need to tell it a path to write the data to. If the directories do not exist, they will be created. Let’s write the data to “data/FCTABLE”. We use FCTABLE as the directory name since in harp we refer to the SQLite files created by this process as FCTABLE files. We tell read_forecast that we want to output the data to files by setting output_file_opts to something. For SQLite files we can use the sqlite_opts function to set the options we need - usually you only need to set the path to the data using the path argument. Let’s get the vertical temperature profiles as well as the 2m temperature. You can see the parameter names that harp understands by running the function show_harp_parameters

show_harp_parameters()

## # A tibble: 21 x 2
##    harp_parameter_name description                                              
##    <chr>               <chr>                                                    
##  1 AccPcp12h           Accumulated precipitation over e.g. 12 hours             
##  2 Cbase               Height of cloud base                                     
##  3 CChigh              High level cloud cover                                   
##  4 CClow               Low level cloud cover                                    
##  5 CCmed               Medium level cloud cover                                 
##  6 CCtot               Total cloud cover                                        
##  7 D10m                10m wind direction                                       
##  8 G10m                10m wind gust - period depends on input data             
##  9 Gmax                10m maximum wind gust - period depends on input data     
## 10 Pcp                 Precipitation direct from model - usually accumulated fr…
## 11 Pmsl                Pressure at mean sea level                               
## 12 Ps                  Pressure at surface                                      
## 13 Q2m                 2m specific humidity                                     
## 14 RH2m                2m relative humidity                                     
## 15 S10m                10m wind speed                                           
## 16 Smax                Maximum 10m wind speed - period depends on input data    
## 17 T2m                 2m temperature                                           
## 18 Td2m                2m dewpoint temperature                                  
## 19 Tmax                Maximum 2m temperature                                   
## 20 Tmin                Minimum 2m temperature                                   
## 21 vis                 Horizontal visibility                                    
## 
##  For upper air parameters, Z, T, RH, D, S, Q, and Td are available. Follow the
##  letter with a number to denote pressure level, e.g. T850, S925, Z300 etc.

So the upper air parameter for temperture is T.

read_forecast(
  start_date       = 2019021700,           
  end_date         = 2019021718,           
  fcst_model       = c("AROME_Arctic_prod", "MEPS_prod"),  
  parameter        = c("T2m", "T"),                
  lead_time        = seq(0, 48, 3),
  by               = "6h",                
  file_path        = here("data/vfld"),    
  file_template    = list(
    AROME_Arctic_prod = "vfld", 
    MEPS_prod         = "{fcst_model}/vfld{fcst_model}mbr000{YYYY}{MM}{DD}{HH}{LDT2}"
  ),
  output_file_opts = sqlite_opts(path = here("data/FCTABLE/deterministic"))
)

The function tells you where the data were written to, but you can also look under the ./data/FCTABLE/deterministic directory and see for yourself. By default read_forecast uses the “fctable” template, but this can be changed using the template argument in sqlite_opts. However, you should always keep the parameter name in the template.

There are a number of options in read_forecast that can be explored by looking at the help page for the function.

Your turn:

2m temperature is, by default, corrected for the difference in elevation between the model and observation site. Create sqlite files for uncorrected 2m temperature for AROME_Arctic_prod and MEPS_prod (member 0)
Create sqlite files for 10m wind speed and dew point temperature for the upper air for AROME_Arctic_prod and MEPS_prod (member 0)

Solutions

Create sqlite files for uncorrected 2m temperature for AROME_Arctic_prod and MEPS_prod (member 0)

read_forecast(
  start_date          = 2019021700,           
  end_date            = 2019021718,           
  fcst_model          = c("AROME_Arctic_prod", "MEPS_prod"),  
  parameter           = "T2m",                
  lead_time           = seq(0, 48, 3), 
  by                  = "6h",                
  file_path           = here("data/vfld"),    
  file_template       = list(
    AROME_Arctic_prod = "vfld", 
    MEPS_prod         = "{fcst_model}/vfld{fcst_model}mbr000{YYYY}{MM}{DD}{HH}{LDT2}"
  ),
  output_file_opts    = sqlite_opts(path = here("data/FCTABLE/deterministic")),
  transformation_opts = interpolate_opts(keep_model_t2m = TRUE)
)

Create sqlite files for 10m wind speed and dew point temperature for the upper air for AROME_Arctic_prod and MEPS_prod (member 0)

read_forecast(
  start_date       = 2019021700,           
  end_date         = 2019021718,           
  fcst_model       = c("AROME_Arctic_prod", "MEPS_prod"),  
  parameter        = c("S10m", "Td"),                
  lead_time        = seq(0, 48, 3),        
  by               = "6h",                
  file_path        = here("data/vfld"),    
  file_template    = list(
    AROME_Arctic_prod = "vfld", 
    MEPS_prod         = "{fcst_model}/vfld{fcst_model}mbr000{YYYY}{MM}{DD}{HH}{LDT2}"
  ),
  output_file_opts =  sqlite_opts(path = here("data/FCTABLE/deterministic"))
)

2.4 Ensemble forecast data

The interpolation of EPS model data works in much the same way as for deterministic model data, using the read_forecast function. By adding the members argument, we tell read_forecast that we are reading ensemble data.

2.4.1 Read and interpolate 2m temperature

As mentioned in the deterministic section, there are vfld files for MEPS in the data directory. There are 10 members numbered from 0 to 9, but we only have up to a lead time of 12 hours. As for the deterministic model, let’s first get the 2m temperature

library(tidyverse)
library(here)
library(harpIO)

read_forecast(
  start_date    = 2019021700,           
  end_date      = 2019021718,           
  fcst_model    = c("MEPS_prod"),  
  parameter     = "T2m",                
  lead_time     = seq(0, 12, 3), 
  members       = seq(0, 9), 
  by            = "6h",                
  file_path     = here("data/vfld"),    
  file_template = "vfld_eps",
  return_data   = TRUE                  
)

## ● MEPS_prod
## # A tibble: 22,980 x 19
##    fcdate              lead_time parameter   SID   lat   lon units
##    <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 2019-02-17 00:00:00         0 T2m        1001  70.9 -8.67 K    
##  2 2019-02-17 00:00:00         0 T2m        1010  69.3 16.1  K    
##  3 2019-02-17 00:00:00         0 T2m        1014  69.2 17.9  K    
##  4 2019-02-17 00:00:00         0 T2m        1015  69.6 17.8  K    
##  5 2019-02-17 00:00:00         0 T2m        1018  69.2 16.0  K    
##  6 2019-02-17 00:00:00         0 T2m        1023  69.1 18.5  K    
##  7 2019-02-17 00:00:00         0 T2m        1025  69.7 18.9  K    
##  8 2019-02-17 00:00:00         0 T2m        1026  69.7 18.9  K    
##  9 2019-02-17 00:00:00         0 T2m        1027  69.7 18.9  K    
## 10 2019-02-17 00:00:00         0 T2m        1033  70.2 19.5  K    
## # … with 22,970 more rows, and 12 more variables: validdate <dttm>,
## #   fcst_cycle <chr>, MEPS_prod_mbr000 <dbl>, MEPS_prod_mbr001 <dbl>,
## #   MEPS_prod_mbr002 <dbl>, MEPS_prod_mbr003 <dbl>, MEPS_prod_mbr004 <dbl>,
## #   MEPS_prod_mbr005 <dbl>, MEPS_prod_mbr006 <dbl>, MEPS_prod_mbr007 <dbl>,
## #   MEPS_prod_mbr008 <dbl>, MEPS_prod_mbr009 <dbl>

You will see that there are a few extra columns in the data. There is both fcst_model and sub_model. This is in case we are dealing with a multi model ensemble. There is also a column for member, and members_out. This is if you want to renumber the members for the sqlite files. Finally there is a lag column. This has information for lagged ensembles.

Let’s now try reading two models - the AROME_Arctic_prod deterministic model and the MEPS_prod ensemble. But how do we specify the members for more than one model? In this case we use a named list, with one element of the list for each model - and we can do the same for the file template. By setting the members to NA for AROME_Arctic_prod we tell read_forecast that it is deterministic - you could also set it to a numeric value and it would treat the AROME_Arctic_prod as a single member ensemble. We’ll just take the first 3 members from MEPS_prod for speed.

read_forecast(
  start_date    = 2019021700,           
  end_date      = 2019021718,           
  fcst_model    = c("MEPS_prod", "AROME_Arctic_prod"),  
  parameter     = "T2m",                
  lead_time     = seq(0, 12, 3), 
  members       = list(MEPS_prod = seq(0, 2), AROME_Arctic_prod = NA), 
  by            = "6h",                
  file_path     = here("data/vfld"),    
  file_template = list(MEPS_prod = "vfld_eps", AROME_Arctic_prod = "vfld"),
  return_data   = TRUE                  
)

## ● AROME_Arctic_prod
## # A tibble: 4,940 x 10
##    fcdate              lead_time parameter   SID   lat   lon units
##    <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 2019-02-17 00:00:00         0 T2m        1001  70.9 -8.67 K    
##  2 2019-02-17 00:00:00         0 T2m        1002  80.1 16.2  K    
##  3 2019-02-17 00:00:00         0 T2m        1003  77   15.5  K    
##  4 2019-02-17 00:00:00         0 T2m        1004  78.9 11.9  K    
##  5 2019-02-17 00:00:00         0 T2m        1006  78.3 22.8  K    
##  6 2019-02-17 00:00:00         0 T2m        1007  78.9 11.9  K    
##  7 2019-02-17 00:00:00         0 T2m        1008  78.2 15.5  K    
##  8 2019-02-17 00:00:00         0 T2m        1009  80.7 25.0  K    
##  9 2019-02-17 00:00:00         0 T2m        1010  69.3 16.1  K    
## 10 2019-02-17 00:00:00         0 T2m        1011  80.1 31.5  K    
## # … with 4,930 more rows, and 3 more variables: validdate <dttm>,
## #   fcst_cycle <chr>, AROME_Arctic_prod_det <dbl>
## 
## ● MEPS_prod
## # A tibble: 22,980 x 12
##    fcdate              lead_time parameter   SID   lat   lon units
##    <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 2019-02-17 00:00:00         0 T2m        1001  70.9 -8.67 K    
##  2 2019-02-17 00:00:00         0 T2m        1010  69.3 16.1  K    
##  3 2019-02-17 00:00:00         0 T2m        1014  69.2 17.9  K    
##  4 2019-02-17 00:00:00         0 T2m        1015  69.6 17.8  K    
##  5 2019-02-17 00:00:00         0 T2m        1018  69.2 16.0  K    
##  6 2019-02-17 00:00:00         0 T2m        1023  69.1 18.5  K    
##  7 2019-02-17 00:00:00         0 T2m        1025  69.7 18.9  K    
##  8 2019-02-17 00:00:00         0 T2m        1026  69.7 18.9  K    
##  9 2019-02-17 00:00:00         0 T2m        1027  69.7 18.9  K    
## 10 2019-02-17 00:00:00         0 T2m        1033  70.2 19.5  K    
## # … with 22,970 more rows, and 5 more variables: validdate <dttm>,
## #   fcst_cycle <chr>, MEPS_prod_mbr000 <dbl>, MEPS_prod_mbr001 <dbl>,
## #   MEPS_prod_mbr002 <dbl>

We could also generate a multimodel ensemble. Here we use a named nested list for members and a named list for fcst_model. Note also that we have to set custom template for AROME_Arctic_prod since for multimodel ensembles to work, {sub_model} needs to be a part of the file template.

read_forecast(
  start_date        = 2019021700,           
  end_date          = 2019021718,           
  fcst_model        = list(awesome_multimodel_eps = c("MEPS_prod", "AROME_Arctic_prod")),  
  parameter         = "T2m",                
  lead_time         = seq(0, 12, 3), 
  members           = list(awesome_multimodel_eps = list(MEPS_prod = seq(0, 2), AROME_Arctic_prod = 0)), 
  by                = "6h",                
  file_path         = here("data/vfld"),    
  file_template     = list(
    awesome_mutlimodel_eps = list(
      MEPS_prod         = "vfld_multimodel", 
      AROME_Arctic_prod = "{sub_model}/vfld{sub_model}{YYYY}{MM}{DD}{HH}{LDT2}"
    )
  ),
  return_data       = TRUE, 
)

## ● awesome_multimodel_eps
## # A tibble: 23,340 x 13
##    fcdate              lead_time parameter   SID   lat   lon units
##    <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 2019-02-17 00:00:00         0 T2m        1001  70.9 -8.67 K    
##  2 2019-02-17 00:00:00         0 T2m        1010  69.3 16.1  K    
##  3 2019-02-17 00:00:00         0 T2m        1014  69.2 17.9  K    
##  4 2019-02-17 00:00:00         0 T2m        1015  69.6 17.8  K    
##  5 2019-02-17 00:00:00         0 T2m        1018  69.2 16.0  K    
##  6 2019-02-17 00:00:00         0 T2m        1023  69.1 18.5  K    
##  7 2019-02-17 00:00:00         0 T2m        1025  69.7 18.9  K    
##  8 2019-02-17 00:00:00         0 T2m        1026  69.7 18.9  K    
##  9 2019-02-17 00:00:00         0 T2m        1027  69.7 18.9  K    
## 10 2019-02-17 00:00:00         0 T2m        1033  70.2 19.5  K    
## # … with 23,330 more rows, and 6 more variables: validdate <dttm>,
## #   fcst_cycle <chr>, AROME_Arctic_prod_mbr000 <dbl>, MEPS_prod_mbr000 <dbl>,
## #   MEPS_prod_mbr001 <dbl>, MEPS_prod_mbr002 <dbl>

You will see that the members are named differently for each of the models in the ensemble. You can also use the split_multimodel function to separate out each of the sub models into their own data frames in the harp_fcst list.

Your turn:

Create sqlite files for all members of MEPS_prod for lead times 0, 3, 6, 9 and 12 for 2m temperature (both corrected and uncorrected), 10m wind speed and temperature and dew point temperature for the upper air.
Make a multimodel ensemble with members 0-5 of MEPS_prod and AROME_Arctic_prod as a single member ensemble and write out the output to sqlite files for the same lead times and parameters as above. Note that to ensure unique rows in the sqlite files we cannot write out the model elevation. See ?sqlite_opts for how to control this behaviour.

Solutions

Create sqlite files for all members of MEPS_prod for lead times 0, 3, 6, 9 and 12 for 2m temperature (both corrected and uncorrected), 10m wind speed and temperature and dew point temperature for the upper air.

read_forecast(
  start_date          = 2019021700,           
  end_date            = 2019021718,           
  fcst_model          = "MEPS_prod",  
  parameter           = c("T2m", "S10m", "T", "Td"),                
  lead_time           = seq(0, 12, 3), 
  members             = seq(0, 9), 
  by                  = "6h",                
  file_path           = here("data/vfld"),    
  file_template       = "vfld_eps",
  transformation_opts = interpolate_opts(keep_model_t2m = TRUE),
  output_file_opts    = sqlite_opts(path = here("data/FCTABLE/ensemble"))
)

Make a multimodel ensemble with members 0-5 of MEPS_prod and AROME_Arctic_prod and write out the output to sqlite files for the same lead times and parameters as above.

read_forecast(
  start_date        = 2019021700,           
  end_date          = 2019021718,           
  fcst_model        = list(awesome_multimodel_eps = c("MEPS_prod", "AROME_Arctic_prod")),  
  parameter         = c("T2m", "S10m", "T", "Td"),                
  lead_time         = seq(0, 12, 3), 
  members           = list(awesome_multimodel_eps = list(MEPS_prod = seq(0, 2), AROME_Arctic_prod = 0)), 
  by                = "6h",                
  file_path         = here("data/vfld"),    
  file_template     = list(
    awesome_multimodel_eps = list(
      MEPS_prod         = "vfld_multimodel",
      AROME_Arctic_prod = "{sub_model}/vfld{sub_model}{YYYY}{MM}{DD}{HH}{LDT2}"
    )
  ),
  transformation_opts = interpolate_opts(keep_model_t2m = TRUE),
  output_file_opts    = sqlite_opts(path = here("data/FCTABLE/ensemble"), remove_model_elev = TRUE)
)

2.4.2 Lagged ensembles

In a number of Hirlam institutes (MetCoOp and DMI) “continuous” ensembles are being run. With these a small number of members are run each hour and the full ensemble is constructed by lagging these hourly members. In MetCoOp this ensemble is known as CMEPS and is currently running in test mode. We have some vfld files for CMEPS_prod in the data directory for the same dates as the other modelling systems. Each member runs every three hours, with members 1 and 2 at 00, 03…, 21; members 5 and 6 at 01, 04…, 22; and members 3 and 4 at 02, 05…, 23. To construct the ensemble for 03, we need members from 03, 02 and 01. We specifiy what lags we need as a named list that is the same length as the members argument.

read_forecast(
  start_date    = 2019021700,           
  end_date      = 2019021718,           
  fcst_model    = "CMEPS_prod", 
  parameter     = "T2m",                
  lead_time     = seq(0, 12, 3), 
  members       = c(0, 1, 3, 4, 5, 6),
  lags          = c(0, 0, 2, 2, 1, 1),
  by            = "3h",                
  file_path     = here("data/vfld"),    
  file_template = "vfld_eps",
  return_data   = TRUE                  
)

## ● CMEPS_prod
## # A tibble: 40,215 x 15
##    fcdate              lead_time parameter   SID   lat   lon units
##    <dttm>                  <dbl> <chr>     <int> <dbl> <dbl> <chr>
##  1 2019-02-17 00:00:00         0 T2m        1001  70.9 -8.67 K    
##  2 2019-02-17 00:00:00         0 T2m        1010  69.3 16.1  K    
##  3 2019-02-17 00:00:00         0 T2m        1014  69.2 17.9  K    
##  4 2019-02-17 00:00:00         0 T2m        1015  69.6 17.8  K    
##  5 2019-02-17 00:00:00         0 T2m        1018  69.2 16.0  K    
##  6 2019-02-17 00:00:00         0 T2m        1023  69.1 18.5  K    
##  7 2019-02-17 00:00:00         0 T2m        1025  69.7 18.9  K    
##  8 2019-02-17 00:00:00         0 T2m        1026  69.7 18.9  K    
##  9 2019-02-17 00:00:00         0 T2m        1027  69.7 18.9  K    
## 10 2019-02-17 00:00:00         0 T2m        1033  70.2 19.5  K    
## # … with 40,205 more rows, and 8 more variables: validdate <dttm>,
## #   fcst_cycle <chr>, CMEPS_prod_mbr000 <dbl>, CMEPS_prod_mbr001 <dbl>,
## #   CMEPS_prod_mbr003_lag2h <dbl>, CMEPS_prod_mbr004_lag2h <dbl>,
## #   CMEPS_prod_mbr005_lag1h <dbl>, CMEPS_prod_mbr006_lag1h <dbl>

Your turn

Get CMEPS_prod data for all of the same parameters as before and write to SQLite files
Do the same for precipitation (“Pcp”) for both CMEPS_prod and MEPS_prod

Solutions

Get CMEPS_prod data for all of the same parameters as before and write to SQLite files

read_forecast(
  start_date          = 2019021700,           
  end_date            = 2019021718,           
  fcst_model          = "CMEPS_prod", 
  parameter           = c("T2m", "S10m", "T", "Td"),                 
  lead_time           = seq(0, 12, 3), 
  members             = c(0, 1, 3, 4, 5, 6),
  lags                = c(0, 0, 2, 2, 1, 1),
  by                  = "3h",                
  file_path           = here("data/vfld"),    
  file_template       = "vfld_eps",
  transformation_opts = interpolate_opts(keep_model_t2m  = TRUE),
  output_file_opts    = sqlite_opts(path = here("data/FCTABLE/ensemble"))
)

Do the same for precipitation (“Pcp”) for both CMEPS_prod and MEPS_prod

read_forecast(
  start_date      = 2019021700,           
  end_date        = 2019021718,           
  fcst_model      = c("MEPS_prod", "CMEPS_prod"), 
  parameter       = "Pcp",                 
  lead_time       = seq(0, 12, 3), 
  members         = list(MEPS_prod = seq(0, 9), CMEPS_prod = c(0, 1, 3, 4, 5, 6)),
  lags            = list(CMEPS_prod = c(0, 0, 2, 2, 1, 1)),
  by              = "3h",                
  file_path       = here("data/vfld"),    
  file_template   = "vfld_eps",
  output_file_opts = sqlite_opts(path = here("data/FCTABLE/ensemble")), 
)