---
title: "mirTarRnaSeq"
output:
  BiocStyle::pdf_document
vignette: >
  %\VignetteIndexEntry{mirTarRnaSeq}
  %\VignetteEncoding{UTF-8}
  %\VignetteEngine{knitr::rmarkdown}
   \usepackage[utf8]{inputenc}
---
```{r style, echo=FALSE, results='asis'}
BiocStyle::markdown()
```

```{r setup, include = FALSE}
knitr::opts_chunk$set(
  dpi=300,
  warning = FALSE,
  collapse = TRUE,
  error = FALSE,
  comment = "#>",
  echo=TRUE
)
library(mirTarRnaSeq)
library(knitr)
library(rmarkdown)
library(mirTarRnaSeq)
library(dplyr)
library(ggplot2)
library(reshape2)
library(tidyr)
library(viridis)

doctype <- opts_knit$get("rmarkdown.pandoc.to")
```

# Introduction

mirTarRnaSeq is a package for miRNA and mRNA interaction analysis through regression and correlation approaches supporting various modeling approaches (gaussian, poisson, negative binomial, zero inflated poisson or negative binomial models for the data). miRNA and mRNA seq data are analysed from the same experiment (condition or time point data) and mRNA targets of the miRNAs from same experiment will be identified using statistical approaches.

The example data set for the first approach is 25 matching miRNA and mRNA EBV positive samples from TCGA identified as high EBV load based on Movassagh et al, Scientific Reports, 2019 paper. We attempt to identify the EBV miRNA targets on EBV genome (part1).

The second example set set is the simulated mouse fibroblast differntiated to muscle cells in three time points. Here, we try to identify mRNA targets of miRNA experssed at various time points (parts 2 and 3).

## Data upload

### mirTarRnaSeq accepts data in dataframe or table formats

* For the first approach ([Part1](#part1-uploading-data-into-the-application)) we use a table of  expressed mRNA genes in EBV from TCGA stomach cancer samples with high levels of EBV miRNA expression. 
* Next we use a list of normalized (tpm) EBV miRNA experession data from the same samples.The user has the option to use count data and model accordingly or use tzTrans() function for zscore normalization and then model.
* For the second part (Part2, Part3) of the experiment we are using two tables of differentially expressed mRNA and miRNA sequencing fold change results from mouse time point specific differntiation experiments.

# Part1 - miRNA mRNA regressions across sample cohorts

## Uploading data into the application. The example data can be found in the test folder under package.

```{r eval=TRUE, echo=TRUE}
DiffExp<-read.table("test/EBV_mRNA.txt", as.is=TRUE, header=TRUE, row.names=1)
miRNAExp<-read.table("test/EBV_miRNA.txt", as.is=TRUE, header=TRUE, row.names=1)
```

## Get miRanda file

We currently support miRanda runs (potential miRNA target parsing by score,interaction energy, and interaction or miRNA length ) on seven species ("Human", "Mouse", "Drosophila", "C.elegans", "Epstein_Barr" (EBV), "Cytomegalovirus" (CMV) and "Kaposi_Sarcoma" (KSHV)). We also support the viral miRNAs targeting human genes for EBV "Epstein_Barr_Human", CMV "CMV_Human" and KSHV "KSHV_Human". 
1) Here we first import the relevant miRanda file .
2) We only keep targets which are also targets of EBV miRNAs based on our EBV miRanda file.

```{r eval=TRUE, echo=TRUE}
miRanda <- getInputSpecies("Epstein_Barr", threshold = 140)
DiffExpmRNASub <- miRanComp(DiffExp, miRanda)
```

## Select miRNA

```{r eval=TRUE, echo=TRUE}
miRNA_select<-c("ebv-mir-bart9-5p")
```

## Combine the mRNA and miRNA file and define boundaries

```{r eval=TRUE, echo=TRUE}
Combine <- combiner(DiffExp, miRNAExp, miRNA_select)
geneVariant <- geneVari(Combine, miRNA_select)
```

## Run a one to one miRNA/mRNA gaussian regression model 

```{r eval=TRUE, echo=TRUE}
j <- runModel(`LMP-1` ~ `ebv-mir-bart9-5p`, 
              Combine, model = glm_poisson(), 
              scale = 100)
# Association between between mRNA and miRNA
print(modelTermPvalues(j)) 
```

## Running Gausian model over all individual miRNA mRNA models

```{r gauss, eval=TRUE, echo=TRUE,out.width="90%",out.height="90%", fig.align="center"}
blaGaus <- runModels(Combine,
                     geneVariant, miRNA_select,
                     family = glm_gaussian(), 
                     scale = 100)
par(oma=c(2,2,2,2))
par(mfrow=c(2,3),mar=c(4,3,3,2))
plot(blaGaus[["all_models"]][["BHLF1"]])
plot(modelData(blaGaus[["all_models"]][["BHLF1"]]))
#To test AIC model performance
G <- do.call(rbind.data.frame, blaGaus[["AICvalues"]]) 
names(G) <- c("AIC_G")
#Low values seems like a reasonable model
plot(density(G$AIC_G)) 
GM <- melt(G)
```

## Running poisson model over all individual miRNA mRNA models

```{r poisson, eval=TRUE, echo=TRUE, out.width="90%",out.height="90%", fig.align="center"}
blaPois <- runModels(Combine, 
                     geneVariant, miRNA_select, 
                     family = glm_poisson(), 
                     scale = 100)
par(oma=c(2,2,2,2))
par(mfrow=c(2,3),mar=c(4,3,3,2))
plot(blaPois[["all_models"]][["LMP-2A"]])
plot(modelData(blaPois[["all_models"]][["LMP-2A"]]))
P <- do.call(rbind.data.frame, blaPois[["AICvalues"]])
names(P) <- c("AIC_Po")
PM <- melt(P)
```

## Running negative binomial model over all individual miRNA mRNA models

```{r negbin, eval=TRUE, echo=TRUE, out.width="70%",out.height="90%", fig.align="center"}
blaNB <- runModels(Combine, 
                   geneVariant, miRNA_select, 
                   family = glm_nb(), scale = 100)
par(mar=c(4,3,3,2))
plot(modelData(blaNB[["all_models"]][["BALF1"]]))
B <- do.call(rbind.data.frame, blaNB[["AICvalues"]])
names(B) <- c("AIC_NB")
BM <- melt(B)
```

## Running zero inflated negative binomial model over all individual miRNA mRNA models

```{r zeroinfnegbin, eval=TRUE, echo=TRUE, out.width="70%",out.height="90%", fig.align="center"}
blazeroinflNB <- runModels(Combine, geneVariant,
                            miRNA_select,
                            family = glm_zeroinfl(dist = "negbin"),
                            scale = 100)
# To test AIC model performance
ZNB <- do.call(rbind.data.frame, blazeroinflNB[["AICvalues"]])
names(ZNB) <- c("AIC_ZNB")
par(mar=c(4,3,3,2))
plot(density(ZNB$AIC_ZNB))
ZNBM<-melt(ZNB)
```

## Running zero inflated poisson binomial model over all individual miRNA mRNA models

```{r zeroinfpoisson, message=FALSE, echo=TRUE, cache=FALSE, results=TRUE, out.width="70%",out.height="90%", fig.align="center"}
blazeroinfl <- runModels(Combine, geneVariant,
                         miRNA_select, 
                         family = glm_zeroinfl(), 
                         scale = 100)
# To test AIC model performance
Zp <- do.call(rbind.data.frame, blazeroinfl[["AICvalues"]])
names(Zp) <- c("AIC_Zp")
par(mar=c(4,3,3,2))
plot(density(Zp$AIC_Zp))
ZpM <- melt(Zp)
```

## Including Plots for all models to decide which to use 

```{r plots, eval=TRUE, echo=TRUE, fig.width = 5, fig.height=5, out.width="80%", dpi=300, fig.align="center"}
bindM <- rbind(PM, BM, GM, ZpM, ZNBM)
p2 <- ggplot(data = bindM, aes(x = value, group = variable, 
                               fill = variable)) +
  geom_density(adjust = 1.5, alpha = .3) +
  xlim(-400, 2000)+
  ggtitle("Plot of of AIC for ebv-mir-bart9-5p regressed all mRNAs ")+
  ylab("Density")+ xlab ("AIC Value")
p2
```

## The user can decide to use runModels() with glm_multi() 
where all available models will be run, the AICs will be compared and the best model will be chosen based on the miRNA-mRNA mocel AIC score. In the example bellow we are using the mode= "multi" option for combination of 2 miRNAs (multivariate model) for interaction model the user can choose the mode= "inter" option.

```{r message=FALSE,echo=TRUE, cache=FALSE, results=TRUE, warning=FALSE, comment=TRUE, warning=FALSE}
miRNA_select<-c("ebv-mir-bart9-5p","ebv-mir-bart6-3p")
Combine <- combiner(DiffExp, miRNAExp, miRNA_select)
geneVariant <- geneVari(Combine, miRNA_select)
MultiModel <- runModels(Combine, geneVariant,
                        miRNA_select, family = glm_multi(), 
                        mode="multi", scale = 10)
#print the name of the models used for the analysis
print(table(unlist(lapply(MultiModel$all_models, modelModelName)))) 
```

## The user can decide to use runModels() with specific models
where all available models will be run, the AICs will be compared and the best model will be chosen based on the miRNA-mRNA mocel AIC score. In the example bellow we are using the mode= "inter" option for combination of 2 miRNAs (multivariate model) for interaction model the user can choose the mode= "multi" option.

```{r message=FALSE,echo=TRUE, cache=FALSE, results=TRUE, warning=FALSE, comment=TRUE, warning=FALSE}
miRNA_select<-c("ebv-mir-bart9-5p","ebv-mir-bart6-3p")
Combine <- combiner(DiffExp, miRNAExp, miRNA_select)
geneVariant <- geneVari(Combine, miRNA_select)
InterModel <- runModels(Combine,
                        geneVariant, miRNA_select,
                        family = glm_multi(
                          models=list(glm_gaussian,
                          glm_poisson())),mode="inter", scale = 10)
#print the name of the models used for the analysis
print(table(unlist(lapply(InterModel$all_models, modelModelName))))
```

## Running all miRNA  and mRNA combinations 
Note for "inter" and "multi" mode options we only support cobinations of 2.

```{r message=FALSE,echo=TRUE, cache=FALSE, results=TRUE, warning=FALSE, comment=TRUE}
vMiRNA<-rownames(miRNAExp)
All_miRNAs_run<-runAllMirnaModels(mirnas =vMiRNA[1:5] ,
                                  DiffExpmRNA = DiffExpmRNASub, 
                                  DiffExpmiRNA = miRNAExp,
                                  miranda_data = miRanda,prob=0.75,
                                  cutoff=0.05,fdr_cutoff = 0.1, method = "fdr",
                                  family = glm_multi(), scale = 2, mode="multi")

hasgenes <- lapply(All_miRNAs_run, function(x) nrow(x$SigFDRGenes)) > 0
All_miRNAs_run <- All_miRNAs_run[hasgenes]
print(table(unlist(lapply(All_miRNAs_run$all_models, modelModelName))))
print(table(unlist(lapply
                   (All_miRNAs_run[[1]][["FDRModel"]][["all_models"]],
                     modelModelName))))
print(
  table(
    unlist(
      lapply(
        All_miRNAs_run[["ebv-mir-bart1-5p and ebv-mir-bart11-3p"]][["FDRModel"]][["all_models"]],
        modelModelName))))
```

# Part2 - Identify miRNA mRNA correlations across 3 or more time points

Load files from test directory or you can load individually and feed them in separately in a list: list[(mRNA1,mRNA2,mRNA)]
```{r eval=TRUE, echo=TRUE}
files <- local({
  filenames <- list.files(path="test", pattern="^.*\\.txt$", full.names=TRUE)
  files <- lapply(filenames, read.table, as.is=TRUE, header=TRUE, sep="\t")
  names(files) <- gsub("^.*/(.*)\\.txt$", "\\1", filenames)
  return(files)
})
```

## Get mRNAs

```{r eval=TRUE, echo=TRUE}
mrna_files <- files[grep("^mRNA", names(files))]
```

## Get mRNAs with particular fold change

```{r eval=TRUE, echo=TRUE}
mrna_files <- files[grep("^mRNA", names(files))]
mrna <- one2OneRnaMiRNA(mrna_files, pthreshold = 0.05)$foldchanges
```

## Get all miRNAs

```{r eval=TRUE, echo=TRUE}
mirna_files <- files[grep("^miRNA", names(files))]
mirna <- one2OneRnaMiRNA(mirna_files)$foldchanges
```

## Get mRNA miRNA correlation

```{r eval=TRUE, echo=TRUE}
corr_0 <- corMirnaRna(mrna, mirna,method="pearson")
```

## Make a background distribution correlation

```{r eval=TRUE, echo=TRUE, results=FALSE}
outs <- sampCorRnaMirna(mrna, mirna,method="pearson",
                        Shrounds = 100, Srounds = 1000)
```

## Plot density plots 
Density plot for background and corrs in our data. Note grey is the background distribution and red is the actual data.
```{r eval=TRUE, echo=TRUE, fig.width = 3, fig.height=3, out.width="80%", dpi=300, fig.align="center"}
#Draw density plot
mirRnaDensityCor(corr_0, outs)
```

## Get correlations below threshold

```{r eval=TRUE, echo=TRUE}
#Identify significant correlation
sig_corrs <- threshSig(corr_0, outs,pvalue = 0.05)
```

## Get mouse miRanda data

```{r eval=TRUE, echo=TRUE}
#Import concordant miRanda file
miRanda <- getInputSpecies("Mouse", threshold = 150)
```

## mRNA miRNA correlation heatmap 

Correlation heatmap for cor equal or less than -0.7.
Note upperbound for heatmap should be always less than the correlation threshold.
```{r eval=TRUE, echo=TRUE, fig.width = 8, fig.height=5, out.width="90%", dpi=300, fig.align="center"}
#Extract your target correlations based on miRanda and correlation threshold.
newcorr <- corMirnaRnaMiranda(mrna, mirna, -0.7, miRanda)
mirRnaHeatmap(newcorr,upper_bound = -0.6)
```

## get intersection of miRanda 

Get miRanda intersection and significant miRNA and mRNA interactions and the plot it.
```{r eval=TRUE, echo=TRUE, fig.width = 5, fig.height=4, out.width="80%", dpi=300, fig.align="center"}
#Make final results file for significant 
#correlations intersecting with miRanda file
results <- miRandaIntersect(sig_corrs, outs, mrna, mirna, miRanda)
#Draw correlation heatmap
p<- mirRnaHeatmap(results$corr,upper_bound =-0.99)
p
```

# Part3 - Identify significant miRNA mRNA relationships for 2 time points

## Import data
```{r eval=TRUE, echo=TRUE}
files <- local({
  filenames <- list.files(path="test", pattern="^.*\\.txt$", full.names=TRUE)
  files <- lapply(filenames, read.table, as.is=TRUE, header=TRUE, sep="\t")
  names(files) <- gsub("^.*/(.*)\\.txt$", "\\1", filenames)
  return(files)
})
```

## Only look for time point difference 0-5
```{r eval=TRUE, echo=TRUE}
mirna_files <- files[grep("^miRNA0_5", names(files))]
mrna_files <- files[grep("^mRNA0_5", names(files))]
```

## Get fold changes above thereshold
```{r eval=TRUE, echo=TRUE}
# Parse Fold Change Files for P value and Fold Change.
mrna <- one2OneRnaMiRNA(mrna_files, pthreshold = 0.05)$foldchanges
mirna <- one2OneRnaMiRNA(mirna_files)$foldchanges
```

## Estimate miRNA mRNA differences based on Fold Change
```{r eval=TRUE, echo=TRUE}
# Estimate the miRNA mRNA FC differences for your dataset
inter0 <- twoTimePoint(mrna, mirna)
```

## Make  background distribution
```{r eval=TRUE, echo=TRUE, message=FALSE, results=FALSE}
#Make a background distribution for your miRNA mRNA FC differences
outs <- twoTimePointSamp(mrna, mirna,Shrounds = 10 )
```

## miRanda data import
```{r eval=TRUE, echo=TRUE}
#Import concordant miRanda file
miRanda <- getInputSpecies("Mouse", threshold = 140)
```

## Identify relationships below threshold
```{r eval=TRUE, echo=TRUE}
#Identify miRNA mRNA relationships bellow a P value threshold, default is 0.05
sig_InterR <- threshSigInter(inter0, outs)
```

## miRanda intersection with results
```{r eval=TRUE, echo=TRUE}
#Intersect the mirRanda file with your output results
results <- mirandaIntersectInter(sig_InterR, outs, mrna, mirna, miRanda)
```

## Make dataframe and plots
```{r eval=TRUE, echo=TRUE, fig.width = 4, fig.height=4, out.width="90%", dpi=300, fig.align="center"}
#Create a results file for heatmap
final_results <- finInterResult(results)
#Draw plots of miRNA mRNA fold changes for your results file
par(mar=c(4,4,2,1))
drawInterPlots(mrna,mirna,final_results)
```

## mRNA miRNA heatmap of miRNA mRNA FC differences 
Heatmap for p value significant miRNA mRNA fold change differences 
when compared to backgound
```{r eval=TRUE, echo=TRUE, fig.width = 3, fig.height=2, out.width="80%", dpi=300, fig.align="center"}
CorRes<-results$corrs
#Draw heatmap for miRNA mRNA significant differences
#Note: you do not have to use the upper_bound function unless you want  
#investigate a particular range for miRNA mRNA differences/relationships
mirRnaHeatmapDiff(CorRes,upper_bound = 9.9)
```