### R code from vignette source 'FunChIP.Rnw' ################################################### ### code chunk number 1: style ################################################### BiocStyle::latex() ################################################### ### code chunk number 2: options ################################################### options(width=90) ################################################### ### code chunk number 3: preliminaries ################################################### library(FunChIP) ################################################### ### code chunk number 4: fragment_length ################################################### # load the GRanges object associated # to the ChIP-Seq experiment on the # transcription factor c-Myc in murine cells data(GR100) # name of the .bam file (the # .bam.bai index file must also be present) bamf <- system.file("extdata", "test.bam", package="FunChIP", mustWork=TRUE) # compute d d <- compute_fragments_length(GR, bamf, min.d = 0, max.d = 300) d ################################################### ### code chunk number 5: pileup_peak ################################################### # associate to each peak # of the GRanges object the correspondent # coverage function peaks <- pileup_peak(GR, bamf, d = d) peaks ################################################### ### code chunk number 6: figureGCV ################################################### # the method smooth_peak # removes the background and defines the spline # approximation from the previously computed peaks # with lambda estimated from the # GCV on derivatives. The method spans a non-uniform # grid for lambda from 10^-4 to 10^12. # ( the grid is uniform for log10(lambda) ) peaks.smooth <- smooth_peak(peaks, lambda = 10^(-4:12), subsample.data = 50, GCV.derivatives = TRUE, plot.GCV = TRUE, rescale = FALSE ) ################################################### ### code chunk number 7: smooth ################################################### # the automatic choice is lambda = 10^6 peaks.smooth <- smooth_peak(peaks, lambda = 10^6, plot.GCV = FALSE) head(peaks.smooth) # mantaining this choice of lambda smooth_peak # can also define the scaled approximation # of the spline peaks.smooth.scaled <- smooth_peak(peaks, lambda = 10^6, plot.GCV = FALSE, rescale = TRUE) head(peaks.smooth.scaled) ################################################### ### code chunk number 8: summit ################################################### # peaks.summit identifies the maximum point # of the smoothed peaks peaks.summit <- summit_peak(peaks.smooth) head(peaks.summit) # peaks.summit can identify also the maximum # point of the scaled approximation peaks.summit.scaled <- summit_peak(peaks.smooth.scaled, rescale = TRUE) head(peaks.summit.scaled) ################################################### ### code chunk number 9: alignment ################################################### # representation of two peaks par (mfrow = c(1,2)) plot_peak(peaks.summit, index = c(6,7), col=c('red',2)) aligned.peaks <- cluster_peak(peaks.summit[c(6,7)], parallel = FALSE , n.clust = 1, seeds = 1, shift.peak = TRUE, weight = 1, alpha = 1, p = 2, t.max = 2, plot.graph.k = FALSE, verbose = FALSE) aligned.peaks # shift coefficients aligned.peaks$coef_shift plot_peak(aligned.peaks, col = 'forestgreen', shift = TRUE, k = 1, cluster.peak = TRUE, line.plot = 'spline') ################################################### ### code chunk number 10: weight ################################################### # compute the weight from the first 10 peaks dist_matrix <- distance_peak(peaks.summit) # dist matrix contains the two matrices d_0(i,j) # and d_1(i,j), used to compute w names(dist_matrix) ratio_norm <- dist_matrix$dist_matrix_d0 / dist_matrix$dist_matrix_d1 ratio_norm_upper_tri <- ratio_norm[upper.tri(ratio_norm)] summary(ratio_norm_upper_tri) # suggestion: use the median as weight w <- median(ratio_norm_upper_tri) w ################################################### ### code chunk number 11: figurek ################################################### # classification of the smooth peaks in different # numbers of clusters, from 1 ( no distinction, only shift ) # to 6. # here the analysis is run on the spline approximation # without scaling peaks.cluster <- cluster_peak(peaks.summit, parallel = FALSE , seeds=1:6, n.clust = 1:6, shift = NULL, weight = 1, alpha = 1, p = 2, t.max = 2, plot.graph.k = TRUE, verbose = FALSE) head(peaks.cluster) ################################################### ### code chunk number 12: figurekScaled ################################################### # here the analysis is run on the spline approximation # with scaling peaks.cluster.scaled <- cluster_peak(peaks.summit.scaled, parallel = FALSE , seeds=1:6, n.clust = 1:6, shift = NULL, weight = 1, alpha = 1, p = 2, t.max = 2, plot.graph.k = TRUE, verbose = FALSE, rescale = TRUE) head(peaks.cluster.scaled) ################################################### ### code chunk number 13: choose_k ################################################### # select the results for k = 3 with alignment peaks.classified.short <- choose_k(peaks.cluster, k = 3, shift = TRUE, cleaning = TRUE) head(peaks.classified.short) peaks.classified.extended <- choose_k(peaks.cluster, k = 3, shift = TRUE, cleaning = FALSE) # and for the scaled version for k =2 and alignment peaks.classified.scaled.short <- choose_k(peaks.cluster.scaled, k = 2, shift = TRUE, cleaning = TRUE) head(peaks.classified.scaled.short) peaks.classified.scaled.extended <- choose_k(peaks.cluster.scaled, k = 2, shift = TRUE, cleaning = FALSE) ################################################### ### code chunk number 14: plot1 ################################################### # plot of the first 10 peaks (raw data) plot_peak(peaks, index = 1:10, line.plot = 'count') ################################################### ### code chunk number 15: plot2 ################################################### # plot of the smoothed approximation of the first 10 peaks plot_peak(peaks.smooth, index = 1:10, line.plot = 'spline') ################################################### ### code chunk number 16: plot3 ################################################### # plot of the smoothed approximation of the first 10 peaks, # centering peaks around their summits plot_peak(peaks.summit, index = 1:10, line.plot = 'spline') ################################################### ### code chunk number 17: plot2bis ################################################### # plot of the smoothed approximation of the first 10 peaks; # the scaled functions are plotted. plot_peak(peaks.smooth.scaled, index = 1:10, line.plot = 'spline', rescale = TRUE) ################################################### ### code chunk number 18: plot3bis ################################################### # plot of the scaled approximation of the first 10 peaks, # centering peaks around their summits plot_peak(peaks.summit.scaled, index = 1:10, line.plot = 'spline', rescale = TRUE) ################################################### ### code chunk number 19: plotBOTH ################################################### # plot of a peak comparing its raw structure and # its spline-smoothed version. plot_peak(peaks.summit, index = 3, line.plot = 'both', col = 'darkblue') ################################################### ### code chunk number 20: plot4 ################################################### # plot of the results of the kmean alignment. # Peaks are plotted in three different panels # according to the clustering results. plot_peak(peaks.cluster, index = 1:100, line.plot = 'spline', shift = TRUE, k = 3, cluster.peak = TRUE, col = topo.colors(3)) ################################################### ### code chunk number 21: plot4bis ################################################### # plot of the results of the kmean alignment. # Scaled peaks are plotted in three different panels # according to the clustering results. plot_peak(peaks.cluster.scaled, index = 1:100, line.plot = 'spline', shift = TRUE, k = 2, cluster.peak = TRUE, rescale = TRUE, col = heat.colors(2))