## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 3.5, fig.align = "center", warning = FALSE, message = FALSE ) ## ----libraries---------------------------------------------------------------- library(betaARMA) library(forecast) library(ggplot2) library(gridExtra) library(zoo) library(scales) library(moments) ## ----config_plot, echo=FALSE-------------------------------------------------- # Shared ggplot configuration sample_size <- length(brasilia_ts) end_time_series <- time(brasilia_ts)[sample_size] + 1 ggplot_size_font <- 9 ggplot_theme <- theme_minimal() + theme( legend.position = "bottom", title = element_text(size = ggplot_size_font), axis.text = element_text(size = ggplot_size_font), axis.title = element_text(size = ggplot_size_font), legend.text = element_text(size = ggplot_size_font), legend.title = element_text(size = ggplot_size_font) ) ggplot_scale_y <- scale_y_continuous( breaks = seq(0.0, 1.00, 0.10) ) ggplot_scale_x <- scale_x_continuous( breaks = seq(1999, end_time_series, 2), limits = c(1999, end_time_series) ) ## ----subsample---------------------------------------------------------------- y_subsample_ts <- window( brasilia_ts, start = c(2006, 02), end = c(2019, 07) ) y_train <- window( y_subsample_ts, end = c(2018, 07) ) y_test <- window( y_subsample_ts, start = c(2018, 08) ) ## ----ggplot_subsample, echo=FALSE--------------------------------------------- len_y_ts <- length(y_train) start_subsample <- time(y_train)[1] end_subsample <- time(y_train)[len_y_ts] ggplot_subsample <- ggplot(brasilia_df, aes(time, y)) + geom_point(size = 0.5) + geom_line(aes(group = 1)) + geom_vline(xintercept = start_subsample, linetype = "dashed", linewidth = 0.5, color = "red") + geom_vline(xintercept = end_subsample, linetype = "dashed", linewidth = 0.5, color = "red") + labs(x = "Year", y = "Relative humidity (proportion)") + ggplot_scale_x + ggplot_scale_y + ggplot_theme ## ----print_ggplot_subsample, echo=FALSE, fig.cap="Relative humidity in Brasília. Dashed red lines delimit the training period used in the instability analysis."---- print(ggplot_subsample) ## ----descriptive_subsample, echo=FALSE---------------------------------------- descriptive_sub_df <- data.frame( Min = min(y_subsample_ts), Max = max(y_subsample_ts), Median = median(y_subsample_ts), Mean = mean(y_subsample_ts), SD = sd(y_subsample_ts), Skewness = moments::skewness(y_subsample_ts), Excess_kurtosis = moments::kurtosis(y_subsample_ts) ) descriptive_sub_df <- round(descriptive_sub_df, 2) ## ----print_descriptive_subsample, echo=FALSE---------------------------------- knitr::kable( descriptive_sub_df, caption = "Descriptive statistics of the monthly relative humidity in Brasília — subsample (February 2006 to July 2019)." ) ## ----regressors--------------------------------------------------------------- freq <- frequency(y_train) n_test <- length(y_test) trend_index_train <- seq_along(y_train) trend_index_test <- (max(trend_index_train) + 1): (max(trend_index_train) + n_test) hs_train <- sin(2 * pi * trend_index_train / freq) hc_train <- cos(2 * pi * trend_index_train / freq) hs_test <- sin(2 * pi * trend_index_test / freq) hc_test <- cos(2 * pi * trend_index_test / freq) X_train <- cbind(hs = hs_train, hc = hc_train) X_test <- cbind(hs = hs_test, hc = hc_test) ## ----fit_cmle----------------------------------------------------------------- ar_lags <- c(10, 18) ma_lags <- c(1, 11, 13) fit_cmle <- barma( y = y_train, ar = ar_lags, ma = ma_lags, penalty = FALSE, xreg = X_train ) ## ----summary_cmle------------------------------------------------------------- summary(fit_cmle) ## ----fit_pcmle---------------------------------------------------------------- fit_pcmle <- barma( y = y_train, ar = ar_lags, ma = ma_lags, penalty = TRUE, xreg = X_train ) ## ----summary_pcmle------------------------------------------------------------ summary(fit_pcmle) ## ----diagnostics_pcmle, fig.height=7.0, fig.cap="Diagnostic plots for the PCMLE fit: AR=(10,18), MA=(1,11,13)."---- plot(fit_pcmle, which = "all") ## ----fit_cmle_reduced--------------------------------------------------------- ar_lags_reduced <- c(10, 18) ma_lags_reduced <- c(1, 13) fit_cmle_reduced <- barma( y = y_train, ar = ar_lags_reduced, ma = ma_lags_reduced, penalty = FALSE, xreg = X_train ) ## ----summary_cmle_reduced----------------------------------------------------- summary(fit_cmle_reduced) ## ----diagnostics_all, fig.height=7.0, fig.cap="Diagnostic plots for the reduced CMLE model AR=(10,18), MA=(1,13)."---- plot(fit_cmle_reduced, which = "all") ## ----diagnostics_qq, fig.height=4.0, fig.width=4.5, fig.cap="Normal Q-Q plot of quantile residuals for the reduced CMLE model."---- plot(fit_cmle_reduced, which = "qq") ## ----forecast_setup----------------------------------------------------------- # Define the translation dictionary month_map <- c( "Jan" = "Jan", "Fev" = "Feb", "Mar" = "Mar", "Abr" = "Apr", "Mai" = "May", "Jun" = "Jun", "Jul" = "Jul", "Ago" = "Aug", "Set" = "Sep", "Out" = "Oct", "Nov" = "Nov", "Dez" = "Dec" ) dates <- as.Date(zoo::as.yearmon(time(y_test))) pt_months <- tools::toTitleCase(format(dates, "%b")) months <- unname(month_map[pt_months]) forecasts_df <- data.frame( observed = as.numeric(y_test), forecast = forecast(fit_cmle_reduced, xreg = X_test, h = n_test)$mean, month = months ) forecasts_df$time <- seq_len(n_test) ## ----ggplot_forecast, echo=FALSE---------------------------------------------- ggplot_forecast <- ggplot(forecasts_df, aes(x = time)) + geom_line(aes(y = forecast, colour = "Forecast"), linewidth = 0.8) + geom_point(aes(y = observed, shape = "Observed"), color = "#00BFC4", size = 3) + scale_colour_manual(name = " ", values = c("Forecast" = "#C77CFF")) + scale_shape_manual(name = " ", values = c("Observed" = 16)) + scale_x_continuous(breaks = seq_len(n_test), limits = c(1, n_test), labels = forecasts_df$month) + scale_y_continuous(breaks = seq(0.3, 1.0, 0.1), limits = c(0.3, 1.0), labels = number_format(accuracy = 0.1)) + labs(x = "Time (months)", y = "Relative humidity (proportion)") + ggplot_theme ## ----print_forecast, echo=FALSE, fig.cap="Out-of-sample forecasts from the reduced CMLE model against the observed test data (teal dots)."---- print(ggplot_forecast) ## ----forecast_accuracy-------------------------------------------------------- mae <- function(obs, pred) mean(abs(obs - pred)) rmse <- function(obs, pred) sqrt(mean((obs - pred)^2)) obs <- forecasts_df$observed accuracy_table <- data.frame( Model = "betaARMA CMLE (reduced)", MAE = round(mae(obs, forecasts_df$forecast) * 100, 2), RMSE = round(rmse(obs, forecasts_df$forecast) * 100, 2) ) knitr::kable( accuracy_table, caption = paste0( "MAE and RMSE ($\\times 100$) over the ", n_test, "-month forecast horizon." ) ) ## ----session_info, echo=FALSE------------------------------------------------- cat("=================================================================\n") cat("Session Information\n") cat("=================================================================\n") cat("This report was generated at:", format(Sys.time(), "%B %d, %Y at %I:%M %p"), "\n\n") print(sessionInfo())