## ----setup, include=TRUE------------------------------------------------------ # Standard setup chunk knitr::opts_chunk$set(echo = TRUE, collapse = TRUE) # Load libraries required for the vignette to build library(PMScanR) library(ggseqlogo) library(seqinr) library(plotly) ## ----installation-loading, eval=FALSE----------------------------------------- # if (!require("BiocManager", quietly = TRUE)) # install.packages("BiocManager") # # BiocManager::install("PMScanR") # # library(PMScanR) ## ----quickStart, eval=FALSE--------------------------------------------------- # fasta_file <- system.file("extdata", "hemoglobins.fasta", package = "PMScanR") # # runPsScan(in_file = fasta_file, out_file = "results.gff", out_format = "gff") # # gff_data <- as.data.frame(rtracklayer::import.gff("results.gff")) # motif_matrix <- gff2matrix(gff_data) # # matrix2OP(motif_matrix) ## ----run-shiny-app, eval=FALSE------------------------------------------------ # # This command launches the interactive Shiny app # runPMScanRShiny() ## ----set-working-directory---------------------------------------------------- # Setting working directory is user-specific, e.g.: # setwd("/path/to/your/working/directory") ## ----load-example-files------------------------------------------------------- # 1. Load example FASTA file (Input for runPsScan) fasta_file <- system.file("extdata", "hemoglobins.fasta", package = "PMScanR") # 2. Load example GFF output gff_file <- system.file("extdata", "out_Hb_gff.txt", package = "PMScanR") # 3. Load example PSA output psa_file <- system.file("extdata", "out_Hb_psa.txt", package = "PMScanR") # 4. Load example PROSITE text output (Scan format) prosite_txt_file <- system.file("extdata", "out_Hb_PROSITE.txt", package = "PMScanR") ## ----runPsScan, eval=FALSE---------------------------------------------------- # # This command is not evaluated in the vignette as it requires an external # # dependency (Perl) and can be time-consuming during the first run. # # # Example: Generate GFF output # runPsScan(in_file = fasta_file, out_format = 'gff', out_file = "results.gff") ## ----read-gff----------------------------------------------------------------- gff_data <- as.data.frame(rtracklayer::import.gff(gff_file)) # The data frame now contains all necessary columns (including Sequence) head(gff_data) ## ----read-psa----------------------------------------------------------------- psa_data <- readPsa(psa_file) head(psa_data) ## ----read-prosite------------------------------------------------------------- prosite_data <- readProsite(prosite_txt_file) head(prosite_data) ## ----convert-gff-to-matrix---------------------------------------------------- # We can use the data loaded from Option A, B or C. # Here we use 'gff_data' as an example. motif_matrix <- gff2matrix(gff_data) # Display the first few rows of the resulting matrix head(motif_matrix) ## ----generate-occurrence_Plot, fig.show='hold'-------------------------------- # Generate a standard occurrence plot from the motif_matrix occurrencePlot <- matrix2OP(input = motif_matrix) occurrencePlot ## ----generate-square-Occurrence-Plot, fig.show='hold'------------------------- # Generate a square occurrence plot from the motif_matrix squareOccurrencePlot <- matrix2SquareOP(input = motif_matrix) squareOccurrencePlot ## ----generate-pie-chart, eval=TRUE-------------------------------------------- pie_chart <- freqPie(gff_data) print(pie_chart) ## ----extract-motifs-from-psa, fig.show='hold'--------------------------------- # This reads the PROSITE analysis output file from disk and extracts motifs. # The format is detected automatically, but can also be specified explicitly # (e.g., format = "gff"). protein_motifs <- extractProteinMotifs(psa_file) # Check the PROSITE IDs (keys) found in the file head(names(protein_motifs)) # Generate sequence logo for the first motif found in the list ggseqlogo::ggseqlogo(protein_motifs[[1]], seq_type='aa') ## ----extract-segments-from-fasta, fig.show='hold'----------------------------- # Read the FASTA file into a list of sequences sequences <- seqinr::read.fasta(file = fasta_file, seqtype = "AA") # Extract segments from position 10 to 20 from all sequences segments <- extractSegments(sequences, from = 10, to = 20) # Generate the sequence logo from the extracted segments ggseqlogo::ggseqlogo(unlist(segments), seq_type = "aa") ## ----session-info------------------------------------------------------------- sessionInfo()