--- title: "How-to in the Tidyverse" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{How-to in the Tidyverse} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup, eval=TRUE, echo=FALSE, warning=FALSE, message=FALSE} library(FFdownload) library(dplyr) library(ggplot2) library(tidyr) outd <- paste0(tempdir(),"/",format(Sys.time(), "%F_%H-%M")) outfile <- paste0(outd,"FFData_tbl.RData") ``` ```{r setup2, eval=FALSE, echo=TRUE} library(FFdownload) library(tidyverse) outd <- paste0("data/", format(Sys.time(), "%F_%H-%M")) outfile <- paste0(outd, "FFData_tbl.RData") ``` This vignette covers the tidyverse workflow. For the step-by-step xts workflow (with separate download and processing stages), see `vignette("FFD-xts-how-to")`. --- ## Quick start: `FFget()` (new in v1.2.0) `FFget()` is the simplest way to get a single dataset into your session. It returns a tibble directly — no intermediate `.RData` file, no `load()` call. Missing-value sentinels (`-99`, `-999`, `-99.99`) are converted to `NA` by default. ```{r tbl_ffget, eval=FALSE} # Monthly FF 3-factor data as a tibble, NAs handled automatically ff3 <- FFget("F-F_Research_Data_Factors", subtable = "Temp2") head(ff3) #> # A tibble: 6 × 5 #> date Mkt.RF SMB HML RF #> #> 1 Jul 1926 2.89 -2.55 -2.39 0.22 #> ... ``` Use `frequency = NULL` to get all frequencies at once, or `subtable = NULL` to get all sub-tables within a frequency: ```{r tbl_ffget_all, eval=FALSE} # All sub-tables for the monthly frequency ff3_monthly <- FFget("F-F_Research_Data_Factors", subtable = NULL) names(ff3_monthly) # e.g. "Temp2" # Annual data as xts ff3_ann_xts <- FFget("F-F_Research_Data_Factors", frequency = "annual", format = "xts") ``` --- ## Dataset discovery: `FFlist()` and `FFmatch()` Before downloading, browse all available datasets and check that your search strings match the intended files: ```{r tbl_fflist, eval=FALSE} fl <- FFlist() nrow(fl) # 100+ non-daily datasets fl |> filter(grepl("5_Factors|Momentum", name)) ``` ```{r tbl_ffmatch, eval=FALSE} FFmatch(c("Research_Data_Factors", "Momentum_Factor")) #> # A tibble: 2 × 4 #> requested matched edit_distance similarity #> #> 1 Research_Data_Factors F-F_Research_Data_Factors 3 0.87 #> 2 Momentum_Factor F-F_Momentum_Factor 4 0.78 ``` --- ## Bulk download with `FFdownload()` (classic API) For downloading multiple datasets in one call and/or saving a dated snapshot for reproducible research, use `FFdownload()` directly with `format = "tibble"`. For a detailed explanation of the download / process separation, see `vignette("FFD-xts-how-to")`. Here we download and process in one step: ```{r tbl_all} inputlist <- c("F-F_Research_Data_Factors_CSV","F-F_Momentum_Factor_CSV") FFdownload(exclude_daily=TRUE, tempd=outd, download=TRUE, download_only=FALSE, inputlist=inputlist, output_file=outfile, format="tibble") ``` ### New parameters Replace French's missing-value codes with `NA` during processing: ```{r tbl_na_values, eval=FALSE} FFdownload(exclude_daily=TRUE, tempd=outd, download=TRUE, download_only=FALSE, inputlist=inputlist, output_file=outfile, format="tibble", na_values=c(-99, -999, -99.99)) ``` Return the data list directly in addition to saving the file: ```{r tbl_return_data, eval=FALSE} FFdata <- FFdownload(exclude_daily=TRUE, tempd=outd, download=TRUE, download_only=FALSE, inputlist=inputlist, output_file=outfile, format="tibble", return_data=TRUE) ``` --- ## Working with the result ```{r tbl_load} load(outfile) ls.str(FFdata) ``` Verify that the sub-tables are tibbles: ```{r tbl_check} str(FFdata$`x_F-F_Research_Data_Factors`$monthly$Temp2) ``` Merge the two datasets (the `date` column is a `yearmon` object and serves as the join key): ```{r tbl_merge} FFfour <- FFdata$`x_F-F_Research_Data_Factors`$monthly$Temp2 %>% left_join(FFdata$`x_F-F_Momentum_Factor`$monthly$Temp2, by="date") FFfour %>% head() ``` Plot cumulative wealth indices using `pivot_longer()` and `ggplot2`: ```{r FFFourPic, out.width="100%", fig.width=8, fig.height=4} FFfour %>% pivot_longer(Mkt.RF:Mom, names_to="FFVar", values_to="FFret") %>% mutate(FFret=FFret/100, date=as.Date(date)) %>% filter(date>="1960-01-01", !FFVar=="RF") %>% group_by(FFVar) %>% arrange(FFVar, date) %>% mutate(FFret=ifelse(date=="1960-01-01",1,FFret), FFretv=cumprod(1+FFret)-1) %>% ggplot(aes(x=date, y=FFretv, col=FFVar, type=FFVar)) + geom_line(lwd=1.2) + scale_y_log10() + labs(title="FF4 Factors", subtitle="Cumulative wealth plots", ylab="cum. returns") + scale_colour_viridis_d("FFvar") + theme_bw() + theme(legend.position="bottom") ```