## ----setup, include=TRUE------------------------------------------------------
# Standard setup chunk
knitr::opts_chunk$set(echo = TRUE, collapse = TRUE)
# Load libraries required for the vignette to build
library(PMScanR)
library(ggseqlogo)
library(seqinr)
library(plotly)

## ----installation-loading, eval=FALSE-----------------------------------------
# if (!require("BiocManager", quietly = TRUE))
#     install.packages("BiocManager")
# 
# BiocManager::install("PMScanR")
# 
# library(PMScanR)

## ----quickStart, eval=FALSE---------------------------------------------------
# fasta_file <- system.file("extdata", "hemoglobins.fasta", package = "PMScanR")
# 
# runPsScan(in_file = fasta_file, out_file = "results.gff", out_format = "gff")
# 
# gff_data <- as.data.frame(rtracklayer::import.gff("results.gff"))
# motif_matrix <- gff2matrix(gff_data)
# 
# matrix2OP(motif_matrix)

## ----run-shiny-app, eval=FALSE------------------------------------------------
# # This command launches the interactive Shiny app
# runPMScanRShiny()

## ----set-working-directory----------------------------------------------------
# Setting working directory is user-specific, e.g.:
# setwd("/path/to/your/working/directory")

## ----load-example-files-------------------------------------------------------
# 1. Load example FASTA file (Input for runPsScan)
fasta_file <- system.file("extdata", "hemoglobins.fasta", package = "PMScanR")

# 2. Load example GFF output
gff_file <- system.file("extdata", "out_Hb_gff.txt", package = "PMScanR")

# 3. Load example PSA output
psa_file <- system.file("extdata", "out_Hb_psa.txt", package = "PMScanR")

# 4. Load example PROSITE text output (Scan format)
prosite_txt_file <- system.file("extdata", "out_Hb_PROSITE.txt", package = "PMScanR")

## ----runPsScan, eval=FALSE----------------------------------------------------
# # This command is not evaluated in the vignette as it requires an external
# # dependency (Perl) and can be time-consuming during the first run.
# 
# # Example: Generate GFF output
# runPsScan(in_file = fasta_file, out_format = 'gff', out_file = "results.gff")

## ----read-gff-----------------------------------------------------------------
gff_data <- as.data.frame(rtracklayer::import.gff(gff_file))
# The data frame now contains all necessary columns (including Sequence)
head(gff_data)

## ----read-psa-----------------------------------------------------------------
psa_data <- readPsa(psa_file)
head(psa_data)

## ----read-prosite-------------------------------------------------------------
prosite_data <- readProsite(prosite_txt_file)
head(prosite_data)

## ----convert-gff-to-matrix----------------------------------------------------
# We can use the data loaded from Option A, B or C. 
# Here we use 'gff_data' as an example.
motif_matrix <- gff2matrix(gff_data)

# Display the first few rows of the resulting matrix
head(motif_matrix)

## ----generate-occurrence_Plot, fig.show='hold'--------------------------------
# Generate a standard occurrence plot from the motif_matrix
occurrencePlot <- matrix2OP(input = motif_matrix)
occurrencePlot

## ----generate-square-Occurrence-Plot, fig.show='hold'-------------------------
# Generate a square occurrence plot from the motif_matrix
squareOccurrencePlot <- matrix2SquareOP(input = motif_matrix)
squareOccurrencePlot

## ----generate-pie-chart, eval=TRUE--------------------------------------------
pie_chart <- freqPie(gff_data)
print(pie_chart)

## ----extract-motifs-from-psa, fig.show='hold'---------------------------------
# This reads the PROSITE analysis output file from disk and extracts motifs.
# The format is detected automatically, but can also be specified explicitly 
# (e.g., format = "gff").
protein_motifs <- extractProteinMotifs(psa_file)

# Check the PROSITE IDs (keys) found in the file
head(names(protein_motifs))

# Generate sequence logo for the first motif found in the list
ggseqlogo::ggseqlogo(protein_motifs[[1]], seq_type='aa')


## ----extract-segments-from-fasta, fig.show='hold'-----------------------------
# Read the FASTA file into a list of sequences
sequences <- seqinr::read.fasta(file = fasta_file, seqtype = "AA")

# Extract segments from position 10 to 20 from all sequences
segments <- extractSegments(sequences, from = 10, to = 20)

# Generate the sequence logo from the extracted segments
ggseqlogo::ggseqlogo(unlist(segments), seq_type = "aa")

## ----session-info-------------------------------------------------------------
sessionInfo()