## ----load_library_fake, eval=FALSE--------------------------------------- # ## Install # source("http://bioconductor.org/biocLite.R") # biocLite("gaucho") # ## Load # library(gaucho) ## ----demontrate_proportions, eval=TRUE----------------------------------- ## First sample is reprented by first block of three digits ## First sample is 100% clone 1, 0% clone 2, 0% clone 3 c(5,0,0)/sum(c(5,0,0)) ## Second sample is reprented by second block of three digits ## Second sample is 30% clone 1, 70% clone 2, 0% clone 3 c(3,7,0)/sum(c(3,7,0)) ## Third sample is reprented by third block of three digits ## Third sample is 0% clone 1, 20% clone 2, 80% clone 3 c(0,1,4)/sum(c(0,1,4)) ## ----emit_results, eval=FALSE-------------------------------------------- # ## Write results to files # gauchoReport(gaucho_input_dataframe,gaucho_output_object) ## ----gaucho_simple_data_show, eval=TRUE---------------------------------- ## Load library library(gaucho) ## Load simple data set gaucho_simple_data = read.table(file.path(system.file("extdata",package="gaucho"),"gaucho_simple_data.txt"),header=TRUE,row.names=1) ## There are three columns (time points T0, T1 and T2) ## and three rows (mutations M1, M2, M3) gaucho_simple_data ## ----gaucho_simple_data_execute, eval=FALSE------------------------------ # ## Execute gaucho() function on the simple data set # simpleDataSolution=gaucho(gaucho_simple_data, number_of_clones=3, nroot=1,iterations=1000) ## ----gaucho_simple_data_report, eval=FALSE------------------------------- # ## Produce plots for the phylogeny, heatmap and proportions in turn # gauchoReport(gaucho_simple_data,simpleDataSolution,outType="phylogeny") # gauchoReport(gaucho_simple_data,simpleDataSolution,outType="heatmap") # gauchoReport(gaucho_simple_data,simpleDataSolution,outType="proportion") # # ## Create output files representing the solution(s) in the current working directory # gauchoReport(gaucho_simple_data,simpleDataSolution,outType="complete") # # ## In case you want to know the current working directory, it can be reported using this function: # getwd() ## ----gaucho_hidden_data_load, eval=TRUE---------------------------------- ## Load library library(gaucho) ## Load hidden data set gaucho_hidden_data = read.table(file.path(system.file("extdata",package="gaucho"),"gaucho_hidden_data.txt"),header=TRUE,row.names=1) ## There are three columns (time points T0, T1 and T2) ## and five rows (mutations M1, M2, M3, M4 and M5) gaucho_hidden_data ## ----gaucho_hidden_data_results, eval=FALSE------------------------------ # ## Produce plots for the phylogeny, heatmap and proportions in turn # gauchoReport(gaucho_hidden_data,hiddenDataSolution,outType="phylogeny") # gauchoReport(gaucho_hidden_data,hiddenDataSolution,outType="heatmap") # gauchoReport(gaucho_hidden_data,hiddenDataSolution,outType="proportion") # # ## Create output files representing the solution(s) in the current working directory # gauchoReport(gaucho_simple_data,simpleDataSolution,outType="complete") ## ----gaucho_synthetic_data, eval=FALSE----------------------------------- # ## Load library # library(gaucho) # # ## Load synthetic data set and the same data with added jitter # gaucho_synth_data = read.table(file.path(system.file("extdata",package="gaucho"),"gaucho_synth_data.txt"),header=TRUE,row.names=1) # gaucho_synth_data_jittered = read.table(file.path(system.file("extdata",package="gaucho"),"gaucho_synth_data_jittered.txt"),header=TRUE,row.names=1) # # ## Execute gaucho() function on the synthetic data set - we know that # ## there are 6 clones # s=gaucho(gaucho_synth_data,number_of_clones=6,iterations=3000) # # ## Access solution slot in returned object to show highest scoring solution(s) # ## The optimum solution's score is -2.22E-15, which is a rounding error from zero # s@solution # # ## Create output files representing the solution(s) # ## in the current working directory # gauchoReport(gaucho_synth_data,s) ## ----gaucho_yeast_data, eval=FALSE--------------------------------------- # ## Load library # library(gaucho) # # ## Load yeast data set # BYB1_G07_pruned = read.table(file.path(system.file("extdata",package="gaucho"),"BYB1_G07_pruned.txt"),header=TRUE,row.names=1) # # ## Execute gaucho() function on the yeast data set # ## The paper claims that there are 6 clones with multiple roots # yDataSolution=gaucho(BYB1_G07_pruned, number_of_clones=6, contamination=1, iterations=3000) # # ## Access solution slot in returned object to show highest scoring solution(s) # yDataSolution@solution # # ## Create output files representing the solution(s) in the current working directory # gauchoReport(BYB1_G07_pruned,yDataSolution) ## ----gaucho_find_clones, eval=FALSE-------------------------------------- # ## Load library # library(gaucho) # # ## Load simple data set # gaucho_synth_data = read.table(file.path(system.file("extdata",package="gaucho"),"gaucho_synth_data.txt"),header=TRUE,row.names=1) # # ## We know that there are 6 clones in this data set. # ## Let's loop from number_of_clones=4 to number_of_clones=8 to illustrate this # ## Also, assume we know nothing about phylogeny, so leave nroot as the default value # # ## Assign parameters and create an empty data frame to hold the results # clonerange=4:8 # n=5 # results=matrix(NA,nrow=iterations,ncol=length(clonerange)) # colnames(results)=paste0(rep("clone",length(clonerange)),clonerange) # # ## Execute gaucho n times for each number of clones # for(c in clonerange){ # for(thisn in 1:n){ # message(paste("Iteration",thisn,"using",c,"clones")) # s=gaucho(gaucho_synth_data, number_of_clones=c,iterations=1000) # ## Assign best fitness value to an element in the matrix # results[thisn,which(c==clonerange)]=s@fitnessValue # } # } # # ## Plot the results # boxplot(results) ## ----gaucho_multi_runs, eval=FALSE--------------------------------------- # ## Load library # library(gaucho) # # ## Load simple data set # gaucho_synth_data = read.table(file.path(system.file("extdata",package="gaucho"),"gaucho_synth_data.txt"),header=TRUE,row.names=1) # # ## Execute 20 runs # n=20 # # ## Create an empty list to hold all results # gauchoResults = list() # # ## Execute gaucho n times, pushing each resulting object into a list # for(thisn in 1:n){ # message(paste("Iteration",thisn)) # syntheticDataSolution=gaucho(gaucho_synthetic_data, number_of_clones=6, iterations=10000) # gauchoResults[thisn]=syntheticDataSolution # } # # ## Find lowest fitness values of all runs # bestScore = max(sapply(gauchoResults, function(x) x@fitnessValue)) # # ## Which run was it? # which(sapply(gauchoResults, function(x) x@fitnessValue)==bestScore) ## ----sessionInfo, eval=TRUE---------------------------------------------- sessionInfo()