.gen_data <- \(n_group, n_row, n_col_value, .seed = 1) {
groups <- seq_len(n_group) |>
rep_len(n_row) |>
as.character()
set.seed(.seed)
runif(n_row * n_col_value, min = 0, max = 100) |>
round() |>
matrix(ncol = n_col_value) |>
tibble::as_tibble(
.name_repair = \(x) paste0("col_value_", seq_len(n_col_value))
) |>
dplyr::mutate(col_group = groups, .before = 1)
}
.use_dplyr <-
function(.data) {
.data |>
dplyr::summarise(
value = sum(col_value_1, na.rm = TRUE),
.by = col_group
) |>
dplyr::arrange(col_group)
}
.use_duckplyr <-
function(.data) {
.data |>
duckplyr::as_duckdb_tibble() |>
dplyr::summarise(
value = sum(col_value_1, na.rm = TRUE),
.by = col_group
) |>
dplyr::arrange(col_group)
}
n_col_value <- 1
res_sum <- bench::press(
n_row = c(1e5, 1e6),
n_group = c(1e4, 1e5),
{
dat <- .gen_data(n_group, n_row, n_col_value)
bench::mark(
check = FALSE,
min_iterations = 5,
dplyr = .use_dplyr(dat),
duckplyr = .use_duckplyr(dat)
)
}
)
res_sum |>
ggplot2::autoplot("violin")