## ----echo=FALSE, out.width = "800px"------------------------------------------
knitr::include_graphics("../man/figures/new_SE_usage-01.png")

## ----echo=FALSE, include=FALSE------------------------------------------------
library(knitr)
# knitr::opts_chunk$set(cache = TRUE, warning = FALSE,
#                       message = FALSE, cache.lazy = FALSE)

library(dplyr)
library(tidyr)
library(tibble)
library(magrittr)
library(ggplot2)
library(ggrepel)
library(tidybulk)
library(tidySummarizedExperiment)

my_theme = 	
	theme_bw() +
	theme(
		panel.border = element_blank(),
		axis.line = element_line(),
		panel.grid.major = element_line(size = 0.2),
		panel.grid.minor = element_line(size = 0.1),
		text = element_text(size=12),
		legend.position="bottom",
		aspect.ratio=1,
		strip.background = element_blank(),
		axis.title.x  = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10)),
		axis.title.y  = element_text(margin = margin(t = 10, r = 10, b = 10, l = 10))
	)

data(se_mini)
tibble_counts = tidybulk::se_mini |> tidybulk() |> as_tibble()


## ----eval=FALSE---------------------------------------------------------------
# BiocManager::install("tidybulk")

## ----eval=FALSE---------------------------------------------------------------
# devtools::install_github("stemangiola/tidybulk")

## -----------------------------------------------------------------------------
se_mini

## -----------------------------------------------------------------------------
class(se_mini)

## ----eval=FALSE---------------------------------------------------------------
# se_mini |>	get_bibliography()

## ----aggregate, message=FALSE, warning=FALSE, results='hide', class.source='yellow'----
rowData(se_mini)$gene_name = rownames(se_mini)
se_mini.aggr = se_mini |> aggregate_duplicates(.transcript = gene_name)

## ----aggregate long, eval=FALSE-----------------------------------------------
# temp = data.frame(
# 	symbol = dge_list$genes$symbol,
# 	dge_list$counts
# )
# dge_list.nr <- by(temp,	temp$symbol,
# 	function(df)
# 		if(length(df[1,1])>0)
# 			matrixStats:::colSums(as.matrix(df[,-1]))
# )
# dge_list.nr <- do.call("rbind", dge_list.nr)
# colnames(dge_list.nr) <- colnames(dge_list)

## ----normalise----------------------------------------------------------------
se_mini.norm = se_mini.aggr |> identify_abundant(factor_of_interest = condition) |> scale_abundance()

## ----normalise long, eval=FALSE-----------------------------------------------
# library(edgeR)
# 
# dgList <- DGEList(count_m=x,group=group)
# keep <- filterByExpr(dgList)
# dgList <- dgList[keep,,keep.lib.sizes=FALSE]
# [...]
# dgList <- calcNormFactors(dgList, method="TMM")
# norm_counts.table <- cpm(dgList)

## ----include=FALSE------------------------------------------------------------
se_mini.norm |> select(`count`, count_scaled, .abundant, everything())

## ----plot_normalise-----------------------------------------------------------
se_mini.norm |>
	ggplot(aes(count_scaled + 1, group=.sample, color=`Cell.type`)) +
	geom_density() +
	scale_x_log10() +
	my_theme

## ----filter variable----------------------------------------------------------
se_mini.norm.variable = se_mini.norm |> keep_variable()

## ----filter variable long, eval=FALSE-----------------------------------------
# library(edgeR)
# 
# x = norm_counts.table
# 
# s <- rowMeans((x-rowMeans(x))^2)
# o <- order(s,decreasing=TRUE)
# x <- x[o[1L:top],,drop=FALSE]
# 
# norm_counts.table = norm_counts.table[rownames(x)]
# 
# norm_counts.table$cell_type = tibble_counts[
# 	match(
# 		tibble_counts$sample,
# 		rownames(norm_counts.table)
# 	),
# 	"Cell.type"
# ]

## ----mds----------------------------------------------------------------------
se_mini.norm.MDS =
  se_mini.norm |>
  reduce_dimensions(method="MDS", .dims = 3)


## ----eval = FALSE-------------------------------------------------------------
# library(limma)
# 
# count_m_log = log(count_m + 1)
# cmds = limma::plotMDS(ndim = .dims, plot = FALSE)
# 
# cmds = cmds %$%	
# 	cmdscale.out |>
# 	setNames(sprintf("Dim%s", 1:6))
# 
# cmds$cell_type = tibble_counts[
# 	match(tibble_counts$sample, rownames(cmds)),
# 	"Cell.type"
# ]

## ----plot_mds, eval=FALSE-----------------------------------------------------
# se_mini.norm.MDS |> pivot_sample()  |> select(contains("Dim"), everything())
# 
# se_mini.norm.MDS |>
# 	pivot_sample() |>
#   GGally::ggpairs(columns = 9:11, ggplot2::aes(colour=`Cell.type`))
# 
# 

## ----pca, message=FALSE, warning=FALSE, results='hide'------------------------
se_mini.norm.PCA =
  se_mini.norm |>
  reduce_dimensions(method="PCA", .dims = 3)

## ----eval=FALSE---------------------------------------------------------------
# count_m_log = log(count_m + 1)
# pc = count_m_log |> prcomp(scale = TRUE)
# variance = pc$sdev^2
# variance = (variance / sum(variance))[1:6]
# pc$cell_type = counts[
# 	match(counts$sample, rownames(pc)),
# 	"Cell.type"
# ]

## ----plot_pca, eval=FALSE-----------------------------------------------------
# 
# se_mini.norm.PCA |> pivot_sample() |> select(contains("PC"), everything())
# 
# se_mini.norm.PCA |>
# 	 pivot_sample() |>
#   GGally::ggpairs(columns = 11:13, ggplot2::aes(colour=`Cell.type`))

## ----tsne, message=FALSE, warning=FALSE, results='hide'-----------------------
se_mini.norm.tSNE =
	breast_tcga_mini_SE |>
	identify_abundant() |>
	reduce_dimensions(
		method = "tSNE",
		perplexity=10,
		pca_scale =TRUE
	)

## ----eval=FALSE---------------------------------------------------------------
# count_m_log = log(count_m + 1)
# 
# tsne = Rtsne::Rtsne(
# 	t(count_m_log),
# 	perplexity=10,
# 		pca_scale =TRUE
# )$Y
# tsne$cell_type = tibble_counts[
# 	match(tibble_counts$sample, rownames(tsne)),
# 	"Cell.type"
# ]

## -----------------------------------------------------------------------------
se_mini.norm.tSNE |>
	pivot_sample() |>
	select(contains("tSNE"), everything()) 

se_mini.norm.tSNE |>
	pivot_sample() |>
	ggplot(aes(x = `tSNE1`, y = `tSNE2`, color=Call)) + geom_point() + my_theme

## ----rotate-------------------------------------------------------------------
se_mini.norm.MDS.rotated =
  se_mini.norm.MDS |>
	rotate_dimensions(`Dim1`, `Dim2`, rotation_degrees = 45, action="get")

## ----eval=FALSE---------------------------------------------------------------
# rotation = function(m, d) {
# 	r = d * pi / 180
# 	((bind_rows(
# 		c(`1` = cos(r), `2` = -sin(r)),
# 		c(`1` = sin(r), `2` = cos(r))
# 	) |> as_matrix()) %*% m)
# }
# mds_r = pca |> rotation(rotation_degrees)
# mds_r$cell_type = counts[
# 	match(counts$sample, rownames(mds_r)),
# 	"Cell.type"
# ]

## ----plot_rotate_1------------------------------------------------------------
se_mini.norm.MDS.rotated |>
	ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type` )) +
  geom_point() +
  my_theme

## ----plot_rotate_2------------------------------------------------------------
se_mini.norm.MDS.rotated |>
	pivot_sample() |>
	ggplot(aes(x=`Dim1_rotated_45`, y=`Dim2_rotated_45`, color=`Cell.type` )) +
  geom_point() +
  my_theme

## ----de, message=FALSE, warning=FALSE, results='hide'-------------------------
se_mini.de =
	se_mini |>
	test_differential_abundance( ~ condition, action="get")
se_mini.de

## ----eval=FALSE---------------------------------------------------------------
# library(edgeR)
# 
# dgList <- DGEList(counts=counts_m,group=group)
# keep <- filterByExpr(dgList)
# dgList <- dgList[keep,,keep.lib.sizes=FALSE]
# dgList <- calcNormFactors(dgList)
# design <- model.matrix(~group)
# dgList <- estimateDisp(dgList,design)
# fit <- glmQLFit(dgList,design)
# qlf <- glmQLFTest(fit,coef=2)
# topTags(qlf, n=Inf)

## ----de contrast, message=FALSE, warning=FALSE, results='hide', eval=FALSE----
# se_mini.de =
# 	se_mini |>
# 	identify_abundant(factor_of_interest = condition) |>
# 	test_differential_abundance(
# 		~ 0 + condition,
# 		.contrasts = c( "conditionTRUE - conditionFALSE"),
# 		action="get"
# 	)

## ----adjust, message=FALSE, warning=FALSE, results='hide'---------------------
se_mini.norm.adj =
	se_mini.norm 	|> adjust_abundance(	.factor_unwanted = time, .factor_of_interest = condition, method="combat")



## ----eval=FALSE---------------------------------------------------------------
# library(sva)
# 
# count_m_log = log(count_m + 1)
# 
# design =
# 		model.matrix(
# 			object = ~ factor_of_interest + batch,
# 			data = annotation
# 		)
# 
# count_m_log.sva =
# 	ComBat(
# 			batch =	design[,2],
# 			mod = design,
# 			...
# 		)
# 
# count_m_log.sva = ceiling(exp(count_m_log.sva) -1)
# count_m_log.sva$cell_type = counts[
# 	match(counts$sample, rownames(count_m_log.sva)),
# 	"Cell.type"
# ]
# 

## ----cibersort----------------------------------------------------------------
se_mini.cibersort =
	se_mini |>
	deconvolve_cellularity(action="get", cores=1, prefix = "cibersort__") 


## ----eval=FALSE---------------------------------------------------------------
# 
# source(‘CIBERSORT.R’)
# count_m |> write.table("mixture_file.txt")
# results <- CIBERSORT(
# 	"sig_matrix_file.txt",
# 	"mixture_file.txt",
# 	perm=100, QN=TRUE
# )
# results$cell_type = tibble_counts[
# 	match(tibble_counts$sample, rownames(results)),
# 	"Cell.type"
# ]
# 

## ----plot_cibersort, eval=FALSE-----------------------------------------------
# se_mini.cibersort |>
# 	pivot_longer(
# 		names_to= "Cell_type_inferred",
# 		values_to = "proportion",
# 		names_prefix ="cibersort__",
# 		cols=contains("cibersort__")
# 	) |>
#   ggplot(aes(x=Cell_type_inferred, y=proportion, fill=`Cell.type`)) +
#   geom_boxplot() +
#   facet_wrap(~`Cell.type`) +
#   my_theme +
#   theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), aspect.ratio=1/5)

## ----DC, eval=FALSE-----------------------------------------------------------
# 
# 	se_mini |>
# 	test_differential_cellularity(. ~ condition )
# 

## ----DC_censored, eval=FALSE--------------------------------------------------
# 
# 	se_mini |>
# 	test_differential_cellularity(survival::Surv(time, dead) ~ .)
# 

## ----cluster------------------------------------------------------------------
se_mini.norm.cluster = se_mini.norm.MDS |>
  cluster_elements(method="kmeans",	centers = 2, action="get" )

## ----eval=FALSE---------------------------------------------------------------
# count_m_log = log(count_m + 1)
# 
# k = kmeans(count_m_log, iter.max = 1000, ...)
# cluster = k$cluster
# 
# cluster$cell_type = tibble_counts[
# 	match(tibble_counts$sample, rownames(cluster)),
# 	c("Cell.type", "Dim1", "Dim2")
# ]
# 

## ----plot_cluster-------------------------------------------------------------
 se_mini.norm.cluster |>
	ggplot(aes(x=`Dim1`, y=`Dim2`, color=`cluster_kmeans`)) +
  geom_point() +
  my_theme

## ----SNN, eval=FALSE, cache=TRUE, message=FALSE, warning=FALSE, results='hide'----
# se_mini.norm.SNN =
# 	se_mini.norm.tSNE |>
# 	cluster_elements(method = "SNN")

## ----eval=FALSE---------------------------------------------------------------
# library(Seurat)
# 
# snn = CreateSeuratObject(count_m)
# snn = ScaleData(
# 	snn, display.progress = TRUE,
# 	num.cores=4, do.par = TRUE
# )
# snn = FindVariableFeatures(snn, selection.method = "vst")
# snn = FindVariableFeatures(snn, selection.method = "vst")
# snn = RunPCA(snn, npcs = 30)
# snn = FindNeighbors(snn)
# snn = FindClusters(snn, method = "igraph", ...)
# snn = snn[["seurat_clusters"]]
# 
# snn$cell_type = tibble_counts[
# 	match(tibble_counts$sample, rownames(snn)),
# 	c("Cell.type", "Dim1", "Dim2")
# ]
# 

## ----SNN_plot, eval=FALSE-----------------------------------------------------
# se_mini.norm.SNN |>
# 	pivot_sample() |>
# 	select(contains("tSNE"), everything())
# 
# se_mini.norm.SNN |>
# 	pivot_sample() |>
# 	gather(source, Call, c("cluster_SNN", "Call")) |>
# 	distinct() |>
# 	ggplot(aes(x = `tSNE1`, y = `tSNE2`, color=Call)) + geom_point() + facet_grid(~source) + my_theme
# 
# 
# # Do differential transcription between clusters
# se_mini.norm.SNN |>
# 	mutate(factor_of_interest = `cluster_SNN` == 3) |>
# 	test_differential_abundance(
#     ~ factor_of_interest,
#     action="get"
#    )

## ----drop---------------------------------------------------------------------
se_mini.norm.non_redundant =
	se_mini.norm.MDS |>
  remove_redundancy(	method = "correlation" )

## ----eval=FALSE---------------------------------------------------------------
# library(widyr)
# 
# .data.correlated =
# 	pairwise_cor(
# 		counts,
# 		sample,
# 		transcript,
# 		rc,
# 		sort = TRUE,
# 		diag = FALSE,
# 		upper = FALSE
# 	) |>
# 	filter(correlation > correlation_threshold) |>
# 	distinct(item1) |>
# 	rename(!!.element := item1)
# 
# # Return non redudant data frame
# counts |> anti_join(.data.correlated) |>
# 	spread(sample, rc, - transcript) |>
# 	left_join(annotation)
# 
# 
# 

## ----plot_drop----------------------------------------------------------------
se_mini.norm.non_redundant |>
	pivot_sample() |>
	ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type`)) +
  geom_point() +
  my_theme


## ----drop2--------------------------------------------------------------------
se_mini.norm.non_redundant =
	se_mini.norm.MDS |>
  remove_redundancy(
  	method = "reduced_dimensions",
  	Dim_a_column = `Dim1`,
  	Dim_b_column = `Dim2`
  )

## ----plot_drop2---------------------------------------------------------------
se_mini.norm.non_redundant |>
	pivot_sample() |>
	ggplot(aes(x=`Dim1`, y=`Dim2`, color=`Cell.type`)) +
  geom_point() +
  my_theme


## ----eval=FALSE---------------------------------------------------------------
# counts = tidybulk_SAM_BAM(
# 	file_names,
# 	genome = "hg38",
# 	isPairedEnd = TRUE,
# 	requireBothEndsMapped = TRUE,
# 	checkFragLength = FALSE,
# 	useMetaFeatures = TRUE
# )

## ----ensembl------------------------------------------------------------------
counts_ensembl |> ensembl_to_symbol(ens)

## ----description--------------------------------------------------------------
se_mini |> 
	describe_transcript() |> 
	select(feature, description, everything())

## -----------------------------------------------------------------------------
sessionInfo()