## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 8, fig.height = 6, warning = FALSE, message = FALSE ) ## ----install, eval = FALSE---------------------------------------------------- # if (!"BiocManager" %in% rownames(installed.packages())) # install.packages("BiocManager") # BiocManager::install("BiocBuildReporter", dependencies=TRUE) ## ----library, results='hide', warning=FALSE, message=FALSE-------------------- library(BiocBuildReporter) ## ----setup-------------------------------------------------------------------- library(BiocBuildReporter) library(dplyr) library(ggplot2) library(tidyr) ## ----get_all_tables----------------------------------------------------------- # Download all available tables # This will cache the tables for quick subsequent access get_all_bbs_tables() ## ----get_individual_table----------------------------------------------------- # Get the build summary table build_summary <- get_bbs_table("build_summary") # Get the info table info <- get_bbs_table("info") # Get the propagation status table propagation_status <- get_bbs_table("propagation_status") ## ----read_remote-------------------------------------------------------------- info <- get_bbs_table("info", useLocal=FALSE) ## ----updateTables------------------------------------------------------------- info <- get_bbs_table("info", useLocal=TRUE, updateLocal=TRUE) ## ----package_release_info----------------------------------------------------- # Get release information for BiocFileCache bfc_releases <- get_package_release_info("BiocFileCache") bfc_releases ## ----get_package_build_results------------------------------------------------ # Get build results for BiocFileCache on branch RELEASE_3_22 get_package_build_results("BiocFileCache", branch="RELEASE_3_22") ## ----package_error_count------------------------------------------------------ # Get error counts for BiocFileCache bfc_errors <- package_error_count("BiocFileCache") bfc_errors # Filter to a specific branch bfc_errors_release <- package_error_count("BiocFileCache", branch = "RELEASE_3_22") bfc_errors_release # Filter to a specific builder bfc_errors_builder <- package_error_count("BiocFileCache", builder = "nebbiolo2", branch = "RELEASE_3_22") bfc_errors_builder ## ----filter_devel_errors------------------------------------------------------ # Get devel errors dev_errors <- package_error_count("BiocFileCache", branch = "devel") # Filter to current devel version dev_errors |> filter(version == max(version)) ## ----package_failure_over_time------------------------------------------------ # Get failure events for BiocFileCache on nebbiolo1 and # group events in a 24 hour period package_failures_over_time("BiocFileCache", "nebbiolo1", 24) ## ----package_growth----------------------------------------------------------- # Get info table info <- get_bbs_table("info") # Count unique packages by branch package_counts <- info |> group_by(git_branch) |> summarise( n_packages = n_distinct(Package), .groups = "drop" ) |> arrange(desc(n_packages)) # Display the counts package_counts # Visualize package counts by branch ggplot(package_counts, aes(x = reorder(git_branch, n_packages), y = n_packages)) + geom_col(fill = "steelblue") + coord_flip() + labs( title = "Number of Packages by Bioconductor Branch", x = "Branch", y = "Number of Packages" ) + theme_minimal() ## ----build_status------------------------------------------------------------- # Get build summary table build_summary <- get_bbs_table("build_summary") # Count build statuses status_counts <- build_summary |> count(status) |> arrange(desc(n)) status_counts # Visualize status distribution ggplot(status_counts, aes(x = reorder(status, n), y = n)) + geom_col(aes(fill = status)) + scale_fill_manual(values = c( "OK" = "green3", "WARNING" = "orange", "ERROR" = "red", "TIMEOUT" = "darkred" )) + coord_flip() + labs( title = "Distribution of Build Statuses", x = "Status", y = "Count" ) + theme_minimal() + theme(legend.position = "none") ## ----platform_analysis-------------------------------------------------------- # Analyze build status by platform (node) platform_status <- build_summary |> group_by(node, status) |> summarise(count = n(), .groups = "drop") |> group_by(node) |> mutate( total = sum(count), percentage = count / total * 100 ) |> ungroup() # Show error rates by platform error_rates <- platform_status |> filter(status %in% c("ERROR", "TIMEOUT")) |> group_by(node) |> summarise( error_count = sum(count), total = first(total), error_rate = sum(percentage), .groups = "drop" ) |> arrange(desc(error_rate)) head(error_rates, 10) ## ----stage_analysis----------------------------------------------------------- # Analyze failures by stage stage_failures <- build_summary |> filter(status %in% c("ERROR", "TIMEOUT")) |> count(stage, status) |> arrange(desc(n)) stage_failures # Visualize ggplot(stage_failures, aes(x = stage, y = n, fill = status)) + geom_col() + scale_fill_manual(values = c("ERROR" = "red", "TIMEOUT" = "darkred")) + labs( title = "Build Failures by Stage", x = "Build Stage", y = "Number of Failures", fill = "Status" ) + theme_minimal() ## ----problematic_packages----------------------------------------------------- # Find packages with most errors package_errors <- build_summary |> filter(status %in% c("ERROR", "TIMEOUT")) |> count(package, status) |> group_by(package) |> summarise( total_errors = sum(n), .groups = "drop" ) |> arrange(desc(total_errors)) # Top 10 packages with most errors head(package_errors, 10) ## ----maintainer_analysis------------------------------------------------------ # Get unique packages per maintainer maintainer_packages <- info |> group_by(Maintainer) |> summarise( n_packages = n_distinct(Package), packages = paste(unique(Package), collapse = ", "), .groups = "drop" ) |> arrange(desc(n_packages)) # Top maintainers by number of packages head(maintainer_packages, 10) # Distribution of packages per maintainer ggplot(maintainer_packages, aes(x = n_packages)) + geom_histogram(binwidth = 1, fill = "steelblue", color = "white") + labs( title = "Distribution of Packages per Maintainer", x = "Number of Packages", y = "Number of Maintainers" ) + theme_minimal() ## ----temporal_analysis-------------------------------------------------------- # Analyze build patterns over time build_summary <- build_summary |> mutate( date = as.Date(startedat), month = format(startedat, "%Y-%m") ) # Build activity by month monthly_builds <- build_summary |> count(month) |> mutate(month_date = as.Date(paste0(month, "-01"))) ggplot(monthly_builds, aes(x = month_date, y = n)) + geom_line(color = "steelblue", linewidth = 1) + geom_point(color = "steelblue") + labs( title = "Build Activity Over Time", x = "Month", y = "Number of Builds" ) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Error rate over time monthly_errors <- build_summary |> group_by(month) |> summarise( total = n(), errors = sum(status %in% c("ERROR", "TIMEOUT")), error_rate = errors / total * 100, .groups = "drop" ) |> mutate(month_date = as.Date(paste0(month, "-01"))) ggplot(monthly_errors, aes(x = month_date, y = error_rate)) + geom_line(color = "red", linewidth = 1) + geom_point(color = "red") + labs( title = "Build Error Rate Over Time", x = "Month", y = "Error Rate (%)" ) + theme_minimal() + theme(axis.text.x = element_text(angle = 45, hjust = 1)) ## ----get_build_report--------------------------------------------------------- # Retrieves the build report for all packages on December 29, 2025 # Filtering also for RELEASE_3_22 branch and linux "nebbiolo1" build machine get_build_report("2025-12-29", branch="RELEASE_3_22", builder="nebbiolo2") ## ----get_failing_packages----------------------------------------------------- # returns all failing packages for RELEASE_3_22 branch # for build machine nebbolo2 get_failing_packages("RELEASE_3_22", "nebbiolo2") ## ----sessionInfo-------------------------------------------------------------- sessionInfo()