## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE) library(ggplot2) library(sf) library(dplyr) library(tidyr) library(tidycensuskr) library(biscale) library(cowplot) library(grid) library(ggspatial) ## ----load-data_1-------------------------------------------------------------- # Load 2020 boundaries data(adm2_sf_2020) ## ----filter-merge_1----------------------------------------------------------- # Load census population data for 2010 and 2020 df_2010_pop <- anycensus(year = 2010, codes = c("Gyeongsangnam-do", "Busan", "Ulsan"), type = "population") df_2020_pop <- anycensus(year = 2020, codes = c("Gyeongsangnam-do", "Busan", "Ulsan"), type = "population") # Merge with spatial data and compute population change sf_target <- adm2_sf_2020 |> inner_join(df_2010_pop, by = "adm2_code") |> inner_join(df_2020_pop, by = "adm2_code") |> mutate(change = `all households_total_prs.y` - `all households_total_prs.x`) ## ----choropleth_1, fig.width=7, fig.height=5---------------------------------- # Choropleth map for population change map <- ggplot(sf_target) + geom_sf(aes(fill = change), color = "gray80", size = 0.1) + labs(title = "Population change between 2010 and 2020") + scale_fill_gradient2( low = "#2C7BB6", mid = "white", high = "#D7191C", midpoint = 0, name = "Change" ) + theme_void() + annotation_scale(location = "tr", width_hint = 0.25, text_cex = 0.7, line_width = 0.7) # National boundary (union of all districts) sf_korea_boundary <- adm2_sf_2020 |> summarise(geometry = st_union(geometry)) # Target region boundary (union of selected provinces/cities) sf_target_boundary <- sf_target |> summarise(geometry = st_union(geometry)) # Inset map: whole Korea + highlighted target region korea_inset <- ggplot() + geom_sf(data = sf_korea_boundary, fill = "grey90", color = "grey90") + geom_sf(data = sf_target_boundary, fill = "grey30", color = "grey30") + theme_void() # Combine main map and inset cowplot::ggdraw() + cowplot::draw_plot(map) + cowplot::draw_plot(korea_inset, x = 0.7, y = 0.05, width = 0.25, height = 0.25) + draw_grob(grid::rectGrob(gp = gpar(col = "black", fill = NA, lwd = 0.6)), x = 0.7, y = 0.05, width = 0.25, height = 0.25) ## ----filter-merge_2----------------------------------------------------------- # Load census data df_2020_pop <- anycensus(year = 2020, type = "population") df_2020_tax <- anycensus(year = 2020, type = "tax") # Merge population with boundaries adm2_sf_2020_pop <- adm2_sf_2020 |> left_join(df_2020_pop, by = "adm2_code") |> mutate( adm2_code_chr = as.character(adm2_code), adm2_prefix4 = substr(adm2_code_chr, 1, 4), last_digit = substr(adm2_code_chr, 5, 5) ) # Aggregate smaller units (adm2_code ending not with 0) into 4-digit groups sf_union_needed <- adm2_sf_2020_pop |> filter(last_digit != "0") |> group_by(adm2_prefix4) |> summarise( across(where(is.numeric), ~ sum(.x, na.rm = TRUE)), geometry = st_union(geometry), .groups = "drop" ) |> mutate(adm2_code = as.numeric(paste0(adm2_prefix4, "0"))) # Combine aggregated units with existing "0"-ending districts adm2_sf_2020_unioned <- adm2_sf_2020_pop |> filter(last_digit == "0") |> bind_rows(sf_union_needed) # Join with tax data sf_final <- adm2_sf_2020_unioned |> left_join(df_2020_tax, by = "adm2_code") ## ----bivariate_2, fig.width=7, fig.height=5----------------------------------- # Create 3x3 bivariate classes (population vs tax) bi_data <- bi_class( sf_final, x = `all households_total_prs`, y = income_general_mkr, style = "quantile", dim = 3 ) # Bivariate legend legend <- bi_legend( pal = "DkCyan", dim = 3, xlab = "Low to High population", ylab = "Low to High tax", size = 7 ) # Mapping cowplot::ggdraw() + cowplot::draw_plot( ggplot() + geom_sf(data = bi_data, aes(fill = bi_class), color = NA) + bi_scale_fill(pal = "DkCyan", dim = 3, guide = "none") + bi_theme() + labs(title = "Population vs Tax (2020)") + theme(plot.title = element_text(size = 10)) ) + cowplot::draw_plot(legend, x = 0.7, y = 0.1, width = 0.3, height = 0.3) ## ----data_load_3, include=TRUE------------------------------------------------ # Load population data for the Seoul Metropolitan Area (SMA) df_sma <- anycensus( year = 2020, codes = c("Seoul", "Gyeonggi-do", "Incheon"), type = "population" ) # Calculate sex ratio (males per 100 females) df_sma <- df_sma |> mutate(sex_ratio = `all households_male_prs` / `all households_female_prs` * 100) # Extract overall distribution (all SMA combined) df_all <- df_sma |> select(sex_ratio) ## ----histogram_3, include=TRUE, fig.width=7, fig.height=3--------------------- ggplot() + # Background: overall distribution across all SMA geom_histogram( data = df_all, aes(x = sex_ratio, y = after_stat(density)), bins = 20, fill = "grey80", color = NA, alpha = 1 ) + # Regional distributions geom_histogram( data = df_sma, aes(x = sex_ratio, y = after_stat(density), fill = adm1), bins = 20, alpha = 0.7, color = NA, position = "identity" ) + facet_wrap(~ adm1, ncol = 3, scales = "free_y") + scale_fill_manual(values = c( "Seoul" = "darkorange", "Incheon" = "purple", "Gyeonggi-do" = "cyan4" )) + labs( title = "Sex ratio distribution by region (SMA, 2020)", x = "Sex ratio (male per 100 females)", y = "Density" ) + theme_bw() ## ----choropleth_3, fig.width=7, fig.height=3.5-------------------------------- # Merge SMA population data with boundaries adm2_sf_2020_sma <- adm2_sf_2020 |> inner_join(df_sma, by = "adm2_code") # Choropleth map for sex ratio ggplot(adm2_sf_2020_sma) + geom_sf(aes(fill = sex_ratio), color = "gray", size = 0.01) + scale_fill_gradient2( low = "#2C7BB6", mid = "white", high = "#D7191C", midpoint = 100, # 100 = equal male/female name = "Sex ratio\n(males per 100 females)" ) + labs(title = "Sex ratio in the Seoul Metropolitan Area (2020)") + theme_void() ## ----pca_demo_1, fig.width=7.5, fig.height=7---------------------------------- library(tidycensuskr) library(dplyr) library(ggplot2) library(janitor) # Load data sf_2020 <- data(adm2_sf_2020) # housing df_hou <- anycensus(year = 2020, type = "housing", level = "adm2") df_hou <- df_hou |> dplyr::group_by(adm1_code, adm2_code, year, type) |> dplyr::mutate(dplyr::across( dplyr::everything(), ~ ifelse(is.na(.), .[which(!is.na(.))], .) )) |> dplyr::ungroup() |> dplyr::distinct() # population df_pop <- anycensus(year = 2020, type = "population", level = "adm2") # mortality df_mort <- anycensus(year = 2020, type = "mortality", level = "adm2") # social security df_ss <- anycensus(year = 2020, type = "social security", level = "adm2") # Combine data frames df_wide <- Reduce( function(x, y) left_join( x, y, by = c("adm1", "adm1_code", "adm2", "adm2_code", "year") ), list( df_hou, df_pop, df_mort, df_ss ) ) |> dplyr::select(-dplyr::starts_with("type")) # reorganize the variables by basic local governments df_wide_re <- df_wide |> dplyr::mutate(adm2_code_ = paste0(substr(adm2_code, 1, 4), "0")) |> dplyr::group_by(adm2_code_) |> dplyr::summarize( dplyr::across( dplyr::matches("households|income|housing|grdp|security"), ~ sum(.x, na.rm = TRUE) ), dplyr::across( dplyr::matches("fertility|causes"), ~ mean(.x, na.rm = TRUE) ), adm2 = dplyr::first(adm2) ) |> dplyr::ungroup() |> dplyr::transmute( adm2_code_ = adm2_code_, adm2 = adm2, persons_per_housing = `all households_total_prs` / `housing types_total_cnt`, sex_ratio = 100 * `all households_male_prs` / `all households_female_prs`, mortality_rate = `all causes_total_p1p`, fertility_rate = fertility_total_brt, security_rate = 100 * (`basic living security_female_prs` + `basic living security_male_prs`) / `all households_total_prs` ) ## ----pca_demo_2, fig.width=7.5, fig.height=7, dpi=200------------------------- # Run PCA prc_df <- df_wide_re |> dplyr::select(3:7) |> as.data.frame() |> prcomp(scale = TRUE) # Rotation by variables prc_df$rotation |> as.data.frame() |> round(3) ## ----pca_demo_3, fig.width=7.5, fig.height=7---------------------------------- # Proper labeling for biplot for BLGs adm2labels <- paste0(df_wide_re$adm2, " (", df_wide_re$adm2_code_, ")") rownames(prc_df$x) <- adm2labels # Biplot with PC1 and PC2 biplot(prc_df, choices = c(1, 2), cex = 0.5, arrow.len = 0.2)