---
title: "Benchmarking"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Benchmarking}
%\VignetteEncoding{UTF-8}
%\VignetteEngine{knitr::rmarkdown}
editor_options:
chunk_output_type: console
---
```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```
# Timings *hydrorecipes* vs *recipes*
Timings for the *hydrorecipes* package are prefaced with an "h". The first few
comparisons include the **R6** interface in *hydrorecipes* to check if there is
a loss of speed compared to the standard API. Most users are likely to use the
standard API so the remaining benchmarks only present that. Typical speed
improvements are between 2-10x and memory consumption is typically half of the
*recipes* package.
```{r}
#| echo: false
#| warning: false
#| message: false
library(collapse)
library(gslnls)
library(data.table)
library(hydrorecipes)
library(bench)
library(tibble)
library(RcppRoll) # for recipes::step_window
library(splines2)
```
## creating a recipe
```{r}
#| echo: true
#| warning: false
relative <- TRUE
n <- c(1e2, 1e4, 5e6)
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat),
hrec2 = recipe(formula = formula, data = dat),
rec = recipes::recipe(formula = formula, data = dat),
check = FALSE,
relative = relative
)
}
)
results
```
## add a step
```{r}
#| echo: true
#| warning: false
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x)),
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x),
rec = {recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)},
check = FALSE,
relative = relative
)
}
)
results
```
## step_center prep
```{r}
#| echo: true
#| warning: false
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x))
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x)
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)
bench::mark(
hrec1$prep(),
hrec2 |> prep(),
rec |> recipes::prep(),
check = FALSE,
min_iterations = 1L,
relative = relative
)
}
)
results
```
## step_center prep and bake
```{r}
#| echo: true
#| warning: false
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
hrec1 = hydrorecipes:::Recipe$new(formula = formula, data = dat)$
add_step(hydrorecipes:::StepCenter$new(x))
hrec2 = recipe(formula = formula, data = dat) |>
step_center(x)
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x)
bench::mark(
hrec1$prep()$bake(),
hrec2 |> prep() |> bake(),
rec |> recipes::prep() |> recipes::bake(new_data = NULL),
check = FALSE,
min_iterations = 1L,
relative = relative
)
}
)
results
```
## step_center
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_center(x) |>
plate())[["x"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_center(x) |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["x"]],
check = TRUE,
min_iterations = 1L,
relative = relative
)
}
)
results
```
## step_scale
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_scale(x, fun = fsd, n_sd = 2L) |>
plate())[["x"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_scale(x, factor = 2L) |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["x"]],
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
results
```
## step_intercept
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_intercept() |>
plate("tbl"))[["intercept"]],
rec = (recipes::recipe(formula = formula, data = dat) |>
recipes::step_intercept() |>
recipes::prep() |>
recipes::bake(new_data = NULL))[["intercept"]],
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
results
```
## step_normalize
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows),
z = rnorm(rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_normalize(c(x, z, y)) |>
plate("tbl"))[, c("x", "z", "y")],
hrec2 = (recipe(formula = formula, data = dat) |>
step_center(c(x, z, y)) |>
step_scale(c(x, z, y)) |>
plate("tbl"))[, c("x", "z", "y")],
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_normalize(x, y, z) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
relative = relative,
min_iterations = 1L,
check = TRUE
)
}
)
results
```
## step_drop_columns
```{r}
#| echo: true
#| warning: false
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows),
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_drop_columns(z) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_rm(z) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = TRUE,
relative = relative
)
}
)
results
```
## step_subset_na_omit
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(1e6, 1e7),
{
dat <- tibble(x = rnorm(rows),
z = rnorm(rows),
y = rnorm(rows))
dat[1:5, "x"] <- NA_real_
dat[100:150, "z"] <- NA_real_
dat[10000:15000, "y"] <- NA_real_
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_subset_na_omit(terms = x) |>
prep() |>
bake())$get_result("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_naomit(x) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
relative = FALSE,
min_iterations = 1L,
check = TRUE
)
}
)
results
```
## step_subset_rows
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(1e6, 1e7),
{
dat <- tibble(x = rnorm(rows),
z = rnorm(rows),
y = rnorm(rows))
sub <- sample(1:rows, size = 5e5)
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_subset_rows(row_numbers = sub) |>
prep() |>
bake())$get_result("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_slice(sub) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
base = dat[sub, ],
relative = FALSE,
min_iterations = 1L,
check = TRUE
)
}
)
results
```
## step_subset_sample
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(1e6, 1e7),
{
dat <- data.frame(x = rnorm(rows),
z = rnorm(rows),
y = rnorm(rows))
bench::mark(
h <- {hrec1 = recipe(formula = formula, data = dat) |>
step_subset_sample(size = 10000L) |>
prep() |>
bake()
h = nrow(hrec1$get_result("tbl"))},
rec = nrow(recipes::recipe(formula = formula, data = dat) |>
recipes::step_sample(size = 10000 / rows) |>
recipes::prep() |>
recipes::bake(new_data = NULL)),
relative = FALSE,
min_iterations = 1L,
check = TRUE
)
}
)
results
```
## step_cross_correlation
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = rnorm(rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_cross_correlation(c(x, z, y), lag_max = 1000) |>
plate("tbl"),
min_iterations = 1L,
)
}
)
results
x <- rnorm(5e5)
y <- rnorm(5e5)
lag_max <- 1000
results <- bench::mark(fft_ccf <- hydrorecipes:::convolve_correlation(x, y, lag_max),
ccf_base <- as.numeric(ccf(x, y, lag.max = lag_max, plot = FALSE)$acf),
min_iterations = 1L,
check = TRUE
)
results
```
## step_lag
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = as.numeric(1:rows),
z = rnorm(rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_lead_lag(x, lag = 1:30) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_lag(x, lag = 1:30) |>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
results
```
## step_distributed_lag
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = c(5e5, 5e6, 1e7),
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_distributed_lag(x, knots = log_lags(5, 86401)) |>
prep() |> bake(),
check = FALSE,
relative = FALSE,
min_iterations = 1L
)
}
)
results
```
## step_harmonic
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_value = 0.0) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_val = 0.0,
keep_original_cols = TRUE) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
# sin and cos terms order is different
check = FALSE,
relative = relative,
min_iterations = 1L
)
}
)
results
# rows <- 1e6
# dat <- data.frame(x = rnorm(rows),
# y = 1:rows,
# z = rnorm(rows))
# bench::mark(
#
# {hrec = recipe(formula = formula, data = dat) |>
# step_harmonic(x,
# frequency = c(1.0, 2.0, 3.0),
# cycle_size = 0.1,
# starting_value = 0.0,
# varying = "cycle_size") |>
# step_harmonic(x,
# frequency = c(1.0, 2.0, 3.0),
# cycle_size = 0.1,
# starting_value = 0.0) |>
# step_intercept() |>
# step_center(x) |>
# prep() |>
# bake()},
#
# {hrec$steps[[2]]$update_step("cycle_size", 0.2)
# hrec$bake()
# },
# check = FALSE
# )
```
## step_pca
```{r}
#| echo: true
#| warning: false
set.seed(1)
formula <- as.formula(x~a + b + c + d + e + f + g + h + i + j + k + l)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
a = rnorm(rows),
b = rnorm(rows),
c = rnorm(rows),
d = rnorm(rows),
e = rnorm(rows),
f = rnorm(rows),
g = rnorm(rows),
h = rnorm(rows),
i = rnorm(rows),
j = rnorm(rows),
k = rnorm(rows),
l = rnorm(rows)
)
bench::mark(
hrec1 = recipe(formula = formula, data = dat)|>
step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l), n_comp = 10L) |>
plate(),
hrec2 = recipe(formula = formula, data = dat)|>
step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l), n_comp = 5L) |>
plate(),
hrec3 = recipe(formula = formula, data = dat)|>
step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l),
n_comp = 10L,
center = FALSE,
scale = FALSE) |>
plate(),
hrec4 = recipe(formula = formula, data = dat)|>
step_pca(c(a,b,c,d,e,f,g,h,i,j,k,l),
n_comp = 5L,
center = FALSE,
scale = FALSE) |>
plate(),
rec1 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 10L,
options = list(center = TRUE, scale. = TRUE))|>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec2 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 5L,
options = list(center = TRUE, scale. = TRUE)) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec3 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 10L) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
rec4 = recipes::recipe(formula = formula, data = dat) |>
recipes::step_pca(recipes::all_predictors(),
num_comp = 5L) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 1L
)
}
)
print(results, n = 100)
```
## step_dummy
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = qF(sample(1:10, rows, replace = TRUE)),
z = rnorm(rows))
bench::mark(
hrec = unname(recipe(formula = formula, data = dat) |>
step_dummy(y) |>
plate("tbl"))[,3:11],
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_dummy(y, keep_original_cols = TRUE) |>
recipes::prep() |>
recipes::bake(new_data = NULL))[,3:11],
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
results
```
## step_find_interval
- no direct comparison so compare to step_cut
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_find_interval(x, vec = c(-0.1, 0, 0.1)) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_cut(x, breaks = c(-0.1, 0, 0.1)) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 1L
)
}
)
results
```
## step_varying
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x+z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rep(1, rows),
y = 1:rows,
z = rnorm(rows))
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_varying(c(x, y, z)) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_zv(x, y, z) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = TRUE,
relative = relative,
min_iterations = 1L
)
}
)
results
```
## step_kernel_filter
*step_kernel_filter* uses an Fast Fourier Transform (FFT) based convolution instead of an explicit sliding window. This should be much faster for large datasets and particularly when the kernel size is also large.
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(2e4, 2e5),
{
dat <- data.frame(x = rep(1, rows),
y = 1:rows,
z = cumsum(rnorm(rows)))
bench::mark(
hrec = unname((recipe(formula = formula, data = dat) |>
step_kernel_filter(z, kernel = list(rep(1, 5001L)/5001L), align = "center") |>
plate("tbl"))[10000, "kernel_filter_z"]),
{rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_window(z, size = 5001L, statistic = "mean") |>
recipes::prep() |>
recipes::bake(new_data = NULL)
unname(rec[10000, "z"])},
min_iterations = 1L,
relative = relative,
check = TRUE
)
}
)
results
```
## step_convolve_gamma
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x+z)
results <- bench::press(
rows = c(2e4, 2e6),
{
dat <- data.frame(x = rep(1, rows),
y = 1:rows,
z = cumsum(rnorm(rows)))
bench::mark(
hrec = (recipe(formula = formula, data = dat) |>
step_convolve_gamma(z, amplitude = 1, k = 1, theta = 1) |>
plate("tbl")),
min_iterations = 1,
relative = FALSE,
check = TRUE
)
}
)
results
```
## step_compare_columns
## multiple steps
*step_harmonic* dominates these results.
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = 1:rows)
bench::mark(
hrec = recipe(formula = formula, data = dat) |>
step_lead_lag(x, lag = 1:20) |>
step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_value = 0.0) |>
step_center(x) |>
plate("tbl"),
rec = recipes::recipe(formula = formula, data = dat) |>
recipes::step_lag(x, lag = 1:20, keep_original_cols = TRUE) |>
recipes::step_harmonic(x,
frequency = c(1.0, 2.0, 3.0),
cycle_size = 0.1,
starting_val = 0.0,
keep_original_cols = TRUE) |>
recipes::step_center(x) |>
recipes::prep() |>
recipes::bake(new_data = NULL),
check = FALSE,
relative = relative,
min_iterations = 1
)
}
)
results
```
## step_spline_b
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
n <- c(100, 1e4, 5e6)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec = unname(recipe(formula = formula, data = dat) |>
step_spline_b(x, df = 13) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_spline_b(x, deg_free = 13, keep_original_cols = TRUE)|>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = relative,
min_iterations = 2
)
}
)
results
```
## step_spline_n
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
bench::mark(
hrec = unname(recipe(formula = formula, data = dat) |>
step_spline_n(x, df = 11L) |>
plate("tbl")),
rec = unname(recipes::recipe(formula = formula, data = dat) |>
recipes::step_spline_natural(x, deg_free = 11L, keep_original_cols = TRUE)|>
recipes::prep() |>
recipes::bake(new_data = NULL)),
check = TRUE,
relative = relative,
min_iterations = 2L
)
}
)
results
```
## step_add_noise
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_add_noise(y) |>
plate("dt"))
}
)
results
```
## step_aquifer_grf & step_aquifer_theis
The Theis solution is a subset of the grf solution.
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_aquifer_grf(time = x, flow_rate = y) |>
plate("dt"),
hrec2 = recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x, flow_rate = y) |>
plate("dt"),
check = TRUE)
}
)
results
```
## step_aquifer_theis_aniso
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis_aniso(time = x,
flow_rate = y,
distance_x = 0,
distance_y = 100,
hydraulic_conductivity_major = 1e-4,
hydraulic_conductivity_minor = 1e-4) |>
plate("dt")),
hrec2 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x, flow_rate = y,) |>
plate("dt")),
check = TRUE)
}
)
results
```
## step_aquifer_leaky
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_leaky(time = x,
flow_rate = y,
leakage = 100000000) |>
plate("dt")),
hrec2 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x,
flow_rate = y) |>
plate("dt")),
check = TRUE)
}
)
results
```
## step_aquifer_patch
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = c(1e4, 1e5, 1e6),
{
dat <- data.frame(x = as.numeric(1:rows),
y = rep(0.01, rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_aquifer_grf(time = x, flow_rate = y) |>
plate("dt")),
hrec3 = (recipe(formula = formula, data = dat) |>
step_aquifer_patch(time = x,
flow_rate = 0.01,
thickness = 1.0,
radius = 100.0,
radius_patch = 200.0,
specific_storage_inner = 1e-6,
specific_storage_outer = 1e-6,
hydraulic_conductivity_inner = 1e-4,
hydraulic_conductivity_outer = 1e-4,
n_stehfest = 8L
) |>
plate("dt")),
check = FALSE,
relative = relative)
}
)
results
```
## step_aquifer_wellbore_storage
- currently this is slow for long series.
```{r}
#| echo: true
#| warning: false
results <- bench::press(
rows = c(1e3, 1e4, 1e5),
{
dat <- data.frame(x = as.numeric(1:rows),
y = as.numeric(1:rows))
bench::mark(
hrec1 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_wellbore_storage(time = x,
flow_rate = 0.01,
hydraulic_conductivity = 1e-4,
specific_storage = 1e-6,
radius = 100,
radius_casing = 1e-15,
radius_well = 1e-15, n_terms = 18) |>
plate("dt")),
hrec2 = unname(recipe(formula = formula, data = dat) |>
step_aquifer_theis(time = x,
flow_rate = y) |>
plate("dt")),
check = FALSE
)
}
)
results
```
## step_vadose_weeks
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = as.numeric(1:rows),
y = as.numeric(1:rows))
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_vadose_weeks(time = x,
air_diffusivity = 0.8,
thickness = 5,
precision = 1e-12) |>
plate("dt")),
check = FALSE,
min_iterations = 2
)
}
)
results
```
## step_transport_ogata_banks
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(expand.grid(as.numeric(1:rows),
as.numeric(1:10)))
names(dat) <- c('x', 'y')
bench::mark(
hrec1 = (recipe(formula = formula, data = dat) |>
step_transport_ogata_banks(time = x,
distance = y) |>
plate("dt")),
check = FALSE,
min_iterations = 2
)
}
)
results
```
## step_transport_fractures_solute
```{r}
#| echo: true
#| warning: false
formula <- as.formula(~time+z+x)
dat <- setDT(expand.grid(10^(3:8),
seq(0.0, 10, 1),
c(0.0)))
names(dat) <- c("time", "z", "x")
results <-
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_transport_fractures_solute(time = time,
distance_fracture = z,
distance_matrix = x) |>
plate("dt"),
check = FALSE,
min_iterations = 2
)
results
```
## step_transport_fractures_heat
```{r}
#| echo: true
#| warning: false
formula <- as.formula(~time+z+x)
dat <- setDT(expand.grid(10^(3:8),
seq(0.0, 100, 1),
c(0.0, 0.05)))
names(dat) <- c("time", "z", "x")
results <-
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_transport_fractures_heat(time = time,
distance_fracture = z,
distance_matrix = x) |>
plate("dt"),
check = FALSE,
min_iterations = 2
)
results
```
## step_fft_pgram, step_fft_welch
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x + z)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows), y = rnorm(rows), z = rnorm(rows),
q = rnorm(rows), r = rnorm(rows), s = rnorm(rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_fft_pgram(c(x, y),
3,
TRUE,
TRUE,
FALSE,
0.1,
time_step = 1) |>
prep() |>
bake(),
hrec2 = recipe(formula = formula, data = dat) |>
step_fft_pgram(c(x, y),
3,
TRUE,
TRUE,
TRUE,
0.1,
time_step = 1) |>
prep() |>
bake(),
hrec3 = recipe(formula = formula, data = dat) |>
step_fft_welch(c(x, y),
length_subset = nrow(dat) / 10,
overlap = 0.60,
window = window_nuttall(nrow(dat) / 10),
time_step = 1) |>
prep() |>
bake(),
check = FALSE,
min_iterations = 1
)
}
)
results
```
## step_fft_transfer_welch and step_fft_transfer_pgram, step_fft_transfer_experimental
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = c(1e5, 1e6, 1e7),
{
dat <- data.frame(x = rnorm(rows), y = rnorm(rows))
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_fft_transfer_pgram(c(x, y),
3,
TRUE,
TRUE,
0.1,
time_step = 1) |>
prep() |>
bake(),
hrec2 = recipe(formula = formula, data = dat) |>
step_fft_transfer_welch(c(x, y),
length_subset = nrow(dat) / 10,
overlap = 0.60,
window = window_nuttall(nrow(dat) / 10),
time_step = 1) |>
prep() |>
bake(),
hrec3 <- recipe(formula = formula, data = dat) |>
step_fft_transfer_experimental(c(x, y),
spans = 3,
taper = 0.1,
n_groups = 300,
time_step = 1) |>
prep() |>
bake(),
check = FALSE,
min_iterations = 1
)
}
)
results
```
## step_ols
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~.)
results <- bench::press(
rows = c(1e5, 1e6, 1e7),
{
dat <- data.frame(
y = rnorm(rows),
x = rnorm(rows),
z = rnorm(rows),
a = rnorm(rows),
b = rnorm(rows),
d = rnorm(rows),
e = rnorm(rows),
f = rnorm(rows),
g = rnorm(rows))
m <- qM(dat)
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_ols(formula = as.formula(y~.),
do_response = FALSE) |>
prep() |>
bake(),
hrec2 = recipe(formula = formula, data = dat) |>
step_ols(formula = as.formula(y~.),
do_response = TRUE) |>
prep() |>
bake(),
lm = lm(y~. - 1, dat),
lm.fit(x = m[, c(2:ncol(dat))], y = m[, 1]),
check = FALSE,
relative = FALSE
)
}
)
results
# formula <- as.formula(y~x+z)
#
#
# results <- bench::press(
# rows = n,
# {
# dat <- data.frame(x = rnorm(rows),
# y = rnorm(rows),
# z = rnorm(rows))
# bench::mark(
# hrec = recipe(formula = formula, data = dat) |>
# step_intercept() |>
# step_nls(formula = as.formula(y~.)) |>
# prep() |>
# bake(),
# check = FALSE,
# relative = FALSE
# )
# }
# )
```
## step_nls
```{r}
#| echo: true
#| warning: false
n0 <- 5e5
n <- 2e4
n2 <- 1e4
b <- cumsum(rnorm(n0))
b <- b - mean(b)
max_t <- 720 * ceiling(2.554)
a <- hydrorecipes:::convolve_overlap_save(x = b,
y = hydrorecipes:::gamma_3(0:max_t, 0.816, 9.221, 2.554),
0)
max_t <- 720 * ceiling(2.554)
dat <- data.frame(a = a, b = b)
formula <- formula(a~b)
# for gsl_nls
f <- function(z, x) {
max_t <- 720 * ceiling(z[3])
hydrorecipes:::convolve_overlap_save(x = x,
y = hydrorecipes:::gamma_3(0:max_t, z[1], z[2], z[3]),
align = 0)[-(1:7200)]
}
results <- bench::mark(
gsl_fun <- unname(round(coef(gsl_nls(
fn = f, ## model function
y = a[-(1:7200)], ## response vector
x = b,
start = c(A = 0.5, n = 2.0, a = 2.0), ## starting values
lower = c(A = 0.01, n = 1.0, a = 1.0),
upper = c(A = 1.0, n = 10.0, a = 10.0),
control = gsl_nls_control(xtol = 1e-8),
trace = FALSE,
algorithm = "lm" ## algorithm
)), 3)),
h_1 = {h = recipe(formula = formula, data = dat) |>
step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0,
varying = list(name = c("amplitude","k", "theta"),
start = c(0.5, 2.0, 2.0),
lower = c(0.01, 1.0, 1.0),
upper = c(1.0, 10.0, 10.0))) |>
step_nls(formula = formula(a~b), n_subset = 1L,
trace = FALSE,
algorithm = "lm",
control = gsl_nls_control(xtol = 1e-8))
h$prep()$bake()
unname(round(coef(h$steps[[3]]$fit), 3))},
h_10 = {h = recipe(formula = formula, data = dat) |>
step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0,
varying = list(name = c("amplitude","k", "theta"),
start = c(0.5, 2.0, 2.0),
lower = c(0.01, 1.0, 1.0),
upper = c(1.0, 10.0, 10.0))) |>
step_nls(formula = formula(a~b), n_subset = 10L,
trace = FALSE,
algorithm = "lm",
control = gsl_nls_control(xtol = 1e-8))
h$prep()$bake()
unname(round(coef(h$steps[[3]]$fit), 3))},
h_100 = {h = recipe(formula = formula, data = dat) |>
step_convolve_gamma(b, amplitude = 0.5, k = 2.0, theta = 2.0,
varying = list(name = c("amplitude","k", "theta"),
start = c(0.5, 2.0, 2.0),
lower = c(0.01, 1.0, 1.0),
upper = c(1.0, 10.0, 10.0))) |>
step_nls(formula = formula(a~b), n_subset = 100L,
trace = FALSE,
algorithm = "lm",
control = gsl_nls_control(xtol = 1e-8))
h$prep()$bake()
unname(round(coef(h$steps[[3]]$fit), 3))},
check = TRUE
)
results
```
## step_ols_gap_fill
```{r}
#| echo: true
#| warning: false
set.seed(123)
n <- 100000
frm <- formula(x ~ y + z)
x <- cumsum(rnorm(n))
dat <- data.table(x = x, y = x, z = as.numeric(1:n))
dat[, x := x + c(rep(20, n/2), rep(0, n/2))]
dat[, x := x + 3.0 * sin(z * 1/n)]
tmp <- copy(dat$x)
# Set value to NA. These values will be estimated.
dat[60000:70000, x := NA_real_]
dat <- unclass(dat)
bench::mark(
{h = recipe(formula = frm, data = dat) |>
step_find_interval(z, vec = c(0, n/2, n)) |>
step_intercept() |>
step_spline_b(z, df = 4) |>
step_drop_columns(z)
hrec = recipe(formula = frm, data = dat) |>
step_ols_gap_fill(c(x, y, z), recipe = h) |>
prep() |>
bake()},
check = FALSE
)
```
# check
## step_check_spacing
```{r}
#| echo: true
#| warning: false
formula <- as.formula(y~x)
results <- bench::press(
rows = n,
{
dat <- data.frame(x = rnorm(rows),
y = 1:rows)
dat[9:50, "x"] <- NA
dat[9L, "y"] <- NA
bench::mark(
hrec1 = recipe(formula = formula, data = dat) |>
step_check_spacing(y) |>
step_check_na(y) |>
prep() |>
bake(),
hrec2 =recipe(formula = formula, data = dat) |>
step_check_spacing(x) |>
step_check_na(x) |>
prep() |>
bake(),
check = FALSE,
relative = FALSE,
min_iterations = 2
)
}
)
results
```
```{r session}
sessionInfo()
```