---
title: "Barometric pressure steps"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Barometric pressure steps}
  %\VignetteEncoding{UTF-8}
  %\VignetteEngine{knitr::rmarkdown}
editor_options: 
  chunk_output_type: console
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

```{r setup, results = "hide", include = FALSE}
library(hydrorecipes)
```

```{r data}
# minute data
data(kennel_2020)
```

### Time domain

#### Single value

- Clark's 1967 method.  Because Clark's method is based on differences, the spacing between samples can affect the results. The following example uses differences of 1 minute, 1 hour, and 1 day for the Clark method.
- Least Squares method. The least squares method can be based on differenced or non-differenced data.  In the following example the non-differenced data is used in addition to the 1 minute, 1 hour, and 1 day differences.

If the values vary based on the interval used in the difference calculation, it may suggest that the system does not behave like an ideal confined system or observational noise is high relative to true changes.

```{r static_time}
static_time <- recipe(wl~., kennel_2020) |>
  step_baro_clark(wl,
                  baro,
                  lag_space = 1,
                  inverse = FALSE) |> # 1 minutes (every minute differences)
  step_baro_clark(wl,
                  baro, 
                  lag_space = 60,
                  inverse = FALSE) |> # 60 minutes (hourly differences)
  step_baro_clark(wl, 
                  baro, 
                  lag_space = 1440,
                  inverse = FALSE) |> # 1440 minutes (daily differences)
  step_baro_least_squares(wl, baro, 
                          inverse = FALSE) |> 
  step_baro_least_squares(wl, baro, 
                          lag_space = 1L,
                          inverse = FALSE,
                          differences = TRUE) |> # 1 minutes (every minute differences)
  step_baro_least_squares(wl, baro, 
                          lag_space = 60L,
                          inverse = FALSE,
                          differences = TRUE) |> # 60 minutes (hourly differences)
  step_baro_least_squares(wl, baro, 
                          lag_space = 1440L,
                          inverse = FALSE,
                          differences = TRUE) |> # 1440 minutes (daily differences)
  prep() |>
  bake()

# The barometric efficiency is stored in the step (warning: this may change)
static_time$get_step_data("barometric_efficiency",
                          additional_columns = c("step_name", 
                                                 "lag_space", 
                                                 "differences"),
                          type = "dt")

```

#### Response functions

Standard lags

```{r lag}

formula <- as.formula(wl~.)

lag_standard <- recipe(formula = formula, data = kennel_2020) |>
  step_lead_lag(baro, lag = 0:1440) |> 
  step_spline_b(datetime, df = 11, intercept = TRUE) |> 
  step_drop_columns(c(baro, datetime, et)) |>
  step_ols(formula = formula) |>
  prep() |>
  bake()
resp_l <- lag_standard$get_response_data()

```

Distributed lags

```{r distlag, fig.alt = "Barometric response functions with standard leading and lagging method compared to the distributed lag method.  Both methods are generally the same but the distributed lag method is smoother."}
lag_dl <- recipe(wl~., kennel_2020) |>
  step_distributed_lag(baro, knots = log_lags(15, 1440), intercept = TRUE) |> 
  step_spline_b(datetime, df = 11, intercept = TRUE) |> 
  step_drop_columns(c(baro, datetime, et)) |>
  step_ols(wl~.) |> 
  prep() |>
  bake()
resp_dl <- lag_dl$get_response_data()
resp_dl <- lag_dl$get_step_data("response_data", 
                                type = "dt")

plot(value~x, resp_l[resp_l$term == "lead_lag" & resp_l$variable == "cumulative", ],
     type = "l", ylim = c(0, 1),
     xlab = "Lag in minutes",
     ylab = "Cumulative Response"
     )
points(value~x, resp_dl[resp_dl$term == "distributed_lag_interpolated" & resp_dl$variable == "cumulative", ],
       type = "l", col = "red")

```

### Frequency domain

#### Single value

These methods are based on the 2 cpd frequency which may not be indicative of the static barometric efficiency, in cases with a frequency dependent response (wellbore storage or non-confined conditions).

```{r ac}
harmonic <- recipe(wl~., kennel_2020) |>
  step_baro_harmonic(datetime, wl, baro, et) |> 
  prep() |>
  bake()

harmonic$get_step_data("barometric_efficiency")

```


#### Transfer function

Three methods:

- pgram based
  - assumes smooth periodogram
- Welch's 
  - loss of low frequency information
  - optimal values dependent on the subset length
- Experimental 
  - assumes smooth periodogram
  - smoothing across multiple frequencies
  - many higher frequecies discarded
  - should be faster
  - least squares fit of multiple frequencies

```{r tf, fig.alt = "Transfer functions."}

# this formula determines the variables to use for the transfer functions
formula_tf <- as.formula(wl ~ baro + et)

pg = recipe(wl~., data = kennel_2020) |>
  step_fft_transfer_welch(formula = formula_tf, 
                          length_subset = 40000,
                          window = hydrorecipes:::window_rectangle(40000),
                          time_step = 60) |>
  step_fft_transfer_pgram(formula = formula_tf, 
                          spans = c(3, 3), 
                          time_step = 60) |>
  step_fft_transfer_experimental(formula = formula_tf, 
                                 n_groups = 150,
                                 spans = c(3, 3), 
                                 time_step = 60) |>
  prep("df") |>
  bake()

tf <- pg$get_transfer_data(type = "dt")

# plot the barometric transfer functions for the different methods
plot(Mod(value)~frequency, 
     data = tf[grepl("fft_transfer_experimental", id) & variable == "wl_baro_et_1"], 
     type = "l", 
     log = "x", 
     xlim = c(0.01, 200),
     ylim = c(0, 1),
     col = "#00900080")
points(Mod(value)~frequency, 
       data = tf[grepl("fft_transfer_welch", id) & variable == "wl_baro_et_1"], 
       type = "l",
       col = "#90000050")
points(Mod(value)~frequency, 
       tf[grepl("fft_transfer_pgram", id) & variable == "wl_baro_et_1"],
       type = "l", 
       col = "#00009050")


# plot the amplitude
plot(Mod(value)~frequency, 
     data = tf[grepl("fft_transfer_experimental", id) & variable == "wl_baro_et_1"], 
     type = "l", 
     log = "x", 
     xlim = c(0.01, 200),
     ylim = c(0, 1),
     col = "#00900080")

# plot the phase
plot(Arg(value)~frequency, 
     data = tf[grepl("fft_transfer_experimental", id) & variable == "wl_baro_et_1"], 
     type = "l", 
     log = "x", 
     xlim = c(0.01, 200),
     col = "#00900080")

```


```{r frftobrf}

# tf <- pg$get_transfer_data(type = "dt")
# tmp <-  tf[grepl("fft_transfer_experimental", id) & variable == "wl_baro_et_1"]
# tmp[, frequency_log := log10(frequency)]
# tmp <- tmp[-c(1, .N)]
# fit_r <- lm(Re(value)~splines2::bSpline(frequency_log, df = 10), tmp)
# fit_i <- lm(Im(value)~splines2::bSpline(frequency_log, df = 10), tmp)
# plot(Re(value)~frequency_log, tmp)
# points(fit_r$fitted.values~frequency_log, tmp, type = "l", col = "red")
# plot(fit_r$fitted.values~frequency, tmp, type = "l", col = "red")
# 
# plot(Im(value)~frequency_log, tmp)
# points(fit_i$fitted.values~frequency_log, tmp, type = "l", col = "red")
# 
# plot(Im(value)~frequency, tmp)
# points(fit_i$fitted.values~frequency, tmp, type = "l", col = "red")
# 
# brf_from_frf <- function(x) {
# 
#   n <- floor(length(x) / 2) + 1
# 
#   imp <- fft(x, inverse = TRUE) / length(x)
#   imp <- Mod(imp) * sign(Re(imp))
# 
#   cumsum(rev(imp)[1:n] + (imp)[1:n])
# 
# }
# 
# 
# x <- tmp$frequency
# y <- tmp$value
# frequency_to_time_domain <- function(x, y) {
# 
#   dc1 <- y[1] # DC values
#   dc2 <- y[length(y)] # DC values
#   x_n <- x[-c(1, length(x))] # DC indices
#   y_n <- y[-c(1, length(y))] # DC values
#   
#   # knots <- hydrorecipes:::log_lags(20, max(x_n))
#   # knots <- knots[-c(1, length(knots))]
#   # len <- min(knots):max(knots)
#   len <- seq(min(x_n), max(x_n), length.out = 10000)
# # this is used to smooth the complex response
#   sp_in  <- splines2::bSpline(x_n, df = 10)
#   sp_out <- splines2::bSpline(len, df = 10)
# 
# 
#     # out <- matrix(NA_real_,
#     #               nrow = length(len),
#     #               ncol = 1)
#     # out[1] <- len
# 
# # loop through each transfer function - smooth the response - estimate brf
#     # for (i in 1:ncol(y_n)) {
# 
# # smooth the real and imaginary components separately
#   re <- Re(y_n)
#   im <- Im(y_n)
#   
#   fit_r <- RcppEigen::fastLm(X = sp_in, y = re)$coefficients
#   fit_i <- RcppEigen::fastLm(X = sp_in, y = im)$coefficients
#   
#   re <- as.vector(sp_out %*% fit_r)
#   im <- as.vector(sp_out %*% fit_i)
# 
# # estimate the brf from the smoothed uniform spaced frf
#         out <- brf_from_frf(complex(real = re, imaginary = im))
#     # }
# 
#         plot(out, type = "l", log = "x")
# 
#     out
# }
# tmp <- tf[grepl("fft_transfer_experimental", id) & variable == "wl_baro_et_1"]
# frequency_to_time_domain(x = tmp$frequency, 
#                          y = tmp$value)

```

### References


```{r session}
sessionInfo()
```