Skip to contents

When transforming gridded data to point data, it is useful to save the point data for future use so that access to the data is fast, and the gridded data files do not have to be read in again. Furthermore, if using vfld files, the IO overhead becomes very cumbersome for large data sets. In harp, it is possible to save point data to SQLite format, which is a portable relational database format. This means that data can be selected and filtered before reading them into memory. In harp, these files are typically referred to as FCTABLE files as FCTABLE is part of the default file template.

To save point data to SQLite format, simply add the output_file_opts argument to read_forecast() and set the options using sqlite_opts(). For most cases, the only option that needs to be set is the path to where you want to save the data. For the example, the data will be saved to tempdir(), which creates a temporary directory under \tmp.

#> Loading required package: harpCore
#> 
#> Attaching package: 'harpCore'
#> The following object is masked from 'package:stats':
#> 
#>     filter
read_forecast(
  dttm             = seq_dttm(2019021700, 2019021718, "6h"),
  fcst_model       = "MEPS_prod",
  parameter        = NULL,
  lead_time        = seq(0, 12, 3),
  members          = seq(0, 3),
  file_path        = system.file("vfld", package = "harpData"),
  file_template    = "vfld_eps",
  output_file_opts = sqlite_opts(path = file.path(tempdir(), "FCTABLE"))
)

Here, we have read in data from vfld files from the MEPS_prod ensemble model for members 0 - 3 and lead times 0 - 12 every three hours. For vfld files, setting parameter = NULL means that all parameters will be read and written from the vfld files (note this behaviour is only possible for vfld files). The data have been saved at tempdir()/FCTABLE, using the default file template, “fctable”, which is “{file_path}/{fcst_model}/{YYYY}/{MM}/FCTABLE_{parameter}{YYYY}{MM}{HH}.sqlite”, where {file_path} is the path set in sqlite_opts().

You can see all the files that have been produced:

dir(file.path(tempdir(), "FCTABLE", "MEPS_prod", "2019", "02"))
#>   [1] "FCTABLE_Cbase_201902_00.sqlite" "FCTABLE_Cbase_201902_06.sqlite"
#>   [3] "FCTABLE_Cbase_201902_12.sqlite" "FCTABLE_Cbase_201902_18.sqlite"
#>   [5] "FCTABLE_CClow_201902_00.sqlite" "FCTABLE_CClow_201902_06.sqlite"
#>   [7] "FCTABLE_CClow_201902_12.sqlite" "FCTABLE_CClow_201902_18.sqlite"
#>   [9] "FCTABLE_CCtot_201902_00.sqlite" "FCTABLE_CCtot_201902_06.sqlite"
#>  [11] "FCTABLE_CCtot_201902_12.sqlite" "FCTABLE_CCtot_201902_18.sqlite"
#>  [13] "FCTABLE_D_201902_00.sqlite"     "FCTABLE_D_201902_06.sqlite"    
#>  [15] "FCTABLE_D_201902_12.sqlite"     "FCTABLE_D_201902_18.sqlite"    
#>  [17] "FCTABLE_D10m_201902_00.sqlite"  "FCTABLE_D10m_201902_06.sqlite" 
#>  [19] "FCTABLE_D10m_201902_12.sqlite"  "FCTABLE_D10m_201902_18.sqlite" 
#>  [21] "FCTABLE_DDP1_201902_00.sqlite"  "FCTABLE_DDP1_201902_06.sqlite" 
#>  [23] "FCTABLE_DDP1_201902_12.sqlite"  "FCTABLE_DDP1_201902_18.sqlite" 
#>  [25] "FCTABLE_DDP2_201902_00.sqlite"  "FCTABLE_DDP2_201902_06.sqlite" 
#>  [27] "FCTABLE_DDP2_201902_12.sqlite"  "FCTABLE_DDP2_201902_18.sqlite" 
#>  [29] "FCTABLE_FFP1_201902_00.sqlite"  "FCTABLE_FFP1_201902_06.sqlite" 
#>  [31] "FCTABLE_FFP1_201902_12.sqlite"  "FCTABLE_FFP1_201902_18.sqlite" 
#>  [33] "FCTABLE_FFP2_201902_00.sqlite"  "FCTABLE_FFP2_201902_06.sqlite" 
#>  [35] "FCTABLE_FFP2_201902_12.sqlite"  "FCTABLE_FFP2_201902_18.sqlite" 
#>  [37] "FCTABLE_Gmax_201902_00.sqlite"  "FCTABLE_Gmax_201902_06.sqlite" 
#>  [39] "FCTABLE_Gmax_201902_12.sqlite"  "FCTABLE_Gmax_201902_18.sqlite" 
#>  [41] "FCTABLE_N75_201902_00.sqlite"   "FCTABLE_N75_201902_06.sqlite"  
#>  [43] "FCTABLE_N75_201902_12.sqlite"   "FCTABLE_N75_201902_18.sqlite"  
#>  [45] "FCTABLE_Pcp_201902_00.sqlite"   "FCTABLE_Pcp_201902_06.sqlite"  
#>  [47] "FCTABLE_Pcp_201902_12.sqlite"   "FCTABLE_Pcp_201902_18.sqlite"  
#>  [49] "FCTABLE_Pmsl_201902_00.sqlite"  "FCTABLE_Pmsl_201902_06.sqlite" 
#>  [51] "FCTABLE_Pmsl_201902_12.sqlite"  "FCTABLE_Pmsl_201902_18.sqlite" 
#>  [53] "FCTABLE_Ps_201902_00.sqlite"    "FCTABLE_Ps_201902_06.sqlite"   
#>  [55] "FCTABLE_Ps_201902_12.sqlite"    "FCTABLE_Ps_201902_18.sqlite"   
#>  [57] "FCTABLE_Q_201902_00.sqlite"     "FCTABLE_Q_201902_06.sqlite"    
#>  [59] "FCTABLE_Q_201902_12.sqlite"     "FCTABLE_Q_201902_18.sqlite"    
#>  [61] "FCTABLE_Q2m_201902_00.sqlite"   "FCTABLE_Q2m_201902_06.sqlite"  
#>  [63] "FCTABLE_Q2m_201902_12.sqlite"   "FCTABLE_Q2m_201902_18.sqlite"  
#>  [65] "FCTABLE_QQP1_201902_00.sqlite"  "FCTABLE_QQP1_201902_06.sqlite" 
#>  [67] "FCTABLE_QQP1_201902_12.sqlite"  "FCTABLE_QQP1_201902_18.sqlite" 
#>  [69] "FCTABLE_QQP2_201902_00.sqlite"  "FCTABLE_QQP2_201902_06.sqlite" 
#>  [71] "FCTABLE_QQP2_201902_12.sqlite"  "FCTABLE_QQP2_201902_18.sqlite" 
#>  [73] "FCTABLE_RH_201902_00.sqlite"    "FCTABLE_RH_201902_06.sqlite"   
#>  [75] "FCTABLE_RH_201902_12.sqlite"    "FCTABLE_RH_201902_18.sqlite"   
#>  [77] "FCTABLE_RH2m_201902_00.sqlite"  "FCTABLE_RH2m_201902_06.sqlite" 
#>  [79] "FCTABLE_RH2m_201902_12.sqlite"  "FCTABLE_RH2m_201902_18.sqlite" 
#>  [81] "FCTABLE_RHP1_201902_00.sqlite"  "FCTABLE_RHP1_201902_06.sqlite" 
#>  [83] "FCTABLE_RHP1_201902_12.sqlite"  "FCTABLE_RHP1_201902_18.sqlite" 
#>  [85] "FCTABLE_RHP2_201902_00.sqlite"  "FCTABLE_RHP2_201902_06.sqlite" 
#>  [87] "FCTABLE_RHP2_201902_12.sqlite"  "FCTABLE_RHP2_201902_18.sqlite" 
#>  [89] "FCTABLE_S_201902_00.sqlite"     "FCTABLE_S_201902_06.sqlite"    
#>  [91] "FCTABLE_S_201902_12.sqlite"     "FCTABLE_S_201902_18.sqlite"    
#>  [93] "FCTABLE_S10m_201902_00.sqlite"  "FCTABLE_S10m_201902_06.sqlite" 
#>  [95] "FCTABLE_S10m_201902_12.sqlite"  "FCTABLE_S10m_201902_18.sqlite" 
#>  [97] "FCTABLE_T_201902_00.sqlite"     "FCTABLE_T_201902_06.sqlite"    
#>  [99] "FCTABLE_T_201902_12.sqlite"     "FCTABLE_T_201902_18.sqlite"    
#> [101] "FCTABLE_T2m_201902_00.sqlite"   "FCTABLE_T2m_201902_06.sqlite"  
#> [103] "FCTABLE_T2m_201902_12.sqlite"   "FCTABLE_T2m_201902_18.sqlite"  
#> [105] "FCTABLE_Td_201902_00.sqlite"    "FCTABLE_Td_201902_06.sqlite"   
#> [107] "FCTABLE_Td_201902_12.sqlite"    "FCTABLE_Td_201902_18.sqlite"   
#> [109] "FCTABLE_Td2m_201902_00.sqlite"  "FCTABLE_Td2m_201902_06.sqlite" 
#> [111] "FCTABLE_Td2m_201902_12.sqlite"  "FCTABLE_Td2m_201902_18.sqlite" 
#> [113] "FCTABLE_TDP1_201902_00.sqlite"  "FCTABLE_TDP1_201902_06.sqlite" 
#> [115] "FCTABLE_TDP1_201902_12.sqlite"  "FCTABLE_TDP1_201902_18.sqlite" 
#> [117] "FCTABLE_TDP2_201902_00.sqlite"  "FCTABLE_TDP2_201902_06.sqlite" 
#> [119] "FCTABLE_TDP2_201902_12.sqlite"  "FCTABLE_TDP2_201902_18.sqlite" 
#> [121] "FCTABLE_Tmax_201902_00.sqlite"  "FCTABLE_Tmax_201902_06.sqlite" 
#> [123] "FCTABLE_Tmax_201902_12.sqlite"  "FCTABLE_Tmax_201902_18.sqlite" 
#> [125] "FCTABLE_Tmin_201902_00.sqlite"  "FCTABLE_Tmin_201902_06.sqlite" 
#> [127] "FCTABLE_Tmin_201902_12.sqlite"  "FCTABLE_Tmin_201902_18.sqlite" 
#> [129] "FCTABLE_TNP1_201902_00.sqlite"  "FCTABLE_TNP1_201902_06.sqlite" 
#> [131] "FCTABLE_TNP1_201902_12.sqlite"  "FCTABLE_TNP1_201902_18.sqlite" 
#> [133] "FCTABLE_TNP2_201902_00.sqlite"  "FCTABLE_TNP2_201902_06.sqlite" 
#> [135] "FCTABLE_TNP2_201902_12.sqlite"  "FCTABLE_TNP2_201902_18.sqlite" 
#> [137] "FCTABLE_TTP1_201902_00.sqlite"  "FCTABLE_TTP1_201902_06.sqlite" 
#> [139] "FCTABLE_TTP1_201902_12.sqlite"  "FCTABLE_TTP1_201902_18.sqlite" 
#> [141] "FCTABLE_TTP2_201902_00.sqlite"  "FCTABLE_TTP2_201902_06.sqlite" 
#> [143] "FCTABLE_TTP2_201902_12.sqlite"  "FCTABLE_TTP2_201902_18.sqlite" 
#> [145] "FCTABLE_TXP1_201902_00.sqlite"  "FCTABLE_TXP1_201902_06.sqlite" 
#> [147] "FCTABLE_TXP1_201902_12.sqlite"  "FCTABLE_TXP1_201902_18.sqlite" 
#> [149] "FCTABLE_TXP2_201902_00.sqlite"  "FCTABLE_TXP2_201902_06.sqlite" 
#> [151] "FCTABLE_TXP2_201902_12.sqlite"  "FCTABLE_TXP2_201902_18.sqlite" 
#> [153] "FCTABLE_vis_201902_00.sqlite"   "FCTABLE_vis_201902_06.sqlite"  
#> [155] "FCTABLE_vis_201902_12.sqlite"   "FCTABLE_vis_201902_18.sqlite"  
#> [157] "FCTABLE_Z_201902_00.sqlite"     "FCTABLE_Z_201902_06.sqlite"    
#> [159] "FCTABLE_Z_201902_12.sqlite"     "FCTABLE_Z_201902_18.sqlite"

With the default template, you get a directory for each forecast model and under that a directory for each year and then month. Each file contains the data for one parameter for one forecast cycle for each month.

You can then read in those data again using read_point_forecast(). Here you have to tell the function whether you are reading ensemble (“eps”) or deterministic (“det”) data.

read_point_forecast(
  dttm       = seq_dttm(2019021700, 2019021718, "6h"),
  fcst_model = "MEPS_prod", 
  fcst_type  = "eps",
  parameter  = "T2m",
  lead_time  = seq(0, 12, 3),
  file_path  = file.path(tempdir(), "FCTABLE")
)
#> ::ensemble point forecast:: # A tibble: 22,980 × 13
#>    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
#>    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
#>  1 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1001 T2m      
#>  2 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1010 T2m      
#>  3 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1014 T2m      
#>  4 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1015 T2m      
#>  5 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1018 T2m      
#>  6 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1023 T2m      
#>  7 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1025 T2m      
#>  8 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1026 T2m      
#>  9 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1027 T2m      
#> 10 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1033 T2m      
#> # ℹ 22,970 more rows
#> # ℹ 7 more variables: MEPS_prod_mbr000 <dbl>, MEPS_prod_mbr001 <dbl>,
#> #   MEPS_prod_mbr002 <dbl>, MEPS_prod_mbr003 <dbl>, fcst_cycle <chr>,
#> #   model_elevation <dbl>, units <chr>

By default, all members that are found are read in, but you call also select which members you want with the members argument:

read_point_forecast(
  dttm       = seq_dttm(2019021700, 2019021718, "6h"),
  fcst_model = "MEPS_prod", 
  fcst_type  = "eps",
  parameter  = "T2m",
  lead_time  = seq(0, 12, 3),
  members    = c(0, 2),
  file_path  = file.path(tempdir(), "FCTABLE")
)
#> ::ensemble point forecast:: # A tibble: 22,980 × 11
#>    fcst_model fcst_dttm           valid_dttm          lead_time   SID parameter
#>    <chr>      <dttm>              <dttm>                  <int> <int> <chr>    
#>  1 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1001 T2m      
#>  2 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1010 T2m      
#>  3 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1014 T2m      
#>  4 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1015 T2m      
#>  5 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1018 T2m      
#>  6 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1023 T2m      
#>  7 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1025 T2m      
#>  8 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1026 T2m      
#>  9 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1027 T2m      
#> 10 MEPS_prod  2019-02-17 00:00:00 2019-02-17 00:00:00         0  1033 T2m      
#> # ℹ 22,970 more rows
#> # ℹ 5 more variables: MEPS_prod_mbr000 <dbl>, MEPS_prod_mbr002 <dbl>,
#> #   fcst_cycle <chr>, model_elevation <dbl>, units <chr>