### R code from vignette source 'vignettes/ChemmineR/inst/doc/ChemmineR.Rnw'

###################################################
### code chunk number 1: style
###################################################
BiocStyle::latex()


###################################################
### code chunk number 2: ChemmineR.Rnw:41-43
###################################################
options(width=95)
unlink("test.db")


###################################################
### code chunk number 3: ChemmineR.Rnw:84-86 (eval = FALSE)
###################################################
## source("http://bioconductor.org/biocLite.R") # Sources the biocLite.R installation script.
## biocLite("ChemmineR") # Installs the package.


###################################################
### code chunk number 4: ChemmineR.Rnw:91-92
###################################################
library("ChemmineR") # Loads the package


###################################################
### code chunk number 5: ChemmineR.Rnw:94-96 (eval = FALSE)
###################################################
## library(help="ChemmineR") # Lists all functions and classes 
## vignette("ChemmineR") # Opens this PDF manual from R


###################################################
### code chunk number 6: ChemmineR.Rnw:103-108
###################################################
data(sdfsample)
sdfset <- sdfsample
sdfset # Returns summary of SDFset
sdfset[1:4] # Subsetting of object 
sdfset[[1]] # Returns summarized content of one SDF


###################################################
### code chunk number 7: ChemmineR.Rnw:109-111 (eval = FALSE)
###################################################
## view(sdfset[1:4]) # Returns summarized content of many SDFs, not printed here
## as(sdfset[1:4], "list") # Returns complete content of many SDFs, not printed here


###################################################
### code chunk number 8: ChemmineR.Rnw:115-117 (eval = FALSE)
###################################################
## sdfset <- read.SDFset("http://faculty.ucr.edu/~tgirke/Documents/
##                        R_BioCond/Samples/sdfsample.sdf")


###################################################
### code chunk number 9: ChemmineR.Rnw:121-122 (eval = FALSE)
###################################################
## header(sdfset[1:4]) # Not printed here


###################################################
### code chunk number 10: ChemmineR.Rnw:123-124
###################################################
header(sdfset[[1]])


###################################################
### code chunk number 11: ChemmineR.Rnw:125-126 (eval = FALSE)
###################################################
## atomblock(sdfset[1:4]) # Not printed here


###################################################
### code chunk number 12: ChemmineR.Rnw:127-128
###################################################
atomblock(sdfset[[1]])[1:4,]


###################################################
### code chunk number 13: ChemmineR.Rnw:129-130 (eval = FALSE)
###################################################
## bondblock(sdfset[1:4]) # Not printed here


###################################################
### code chunk number 14: ChemmineR.Rnw:131-132
###################################################
bondblock(sdfset[[1]])[1:4,] 


###################################################
### code chunk number 15: ChemmineR.Rnw:133-134 (eval = FALSE)
###################################################
## datablock(sdfset[1:4]) # Not printed here


###################################################
### code chunk number 16: ChemmineR.Rnw:135-136
###################################################
datablock(sdfset[[1]])[1:4]


###################################################
### code chunk number 17: ChemmineR.Rnw:140-144
###################################################
cid(sdfset)[1:4] # Returns IDs from SDFset object
sdfid(sdfset)[1:4] # Returns IDs from SD file header block
unique_ids <- makeUnique(sdfid(sdfset))
cid(sdfset) <- unique_ids


###################################################
### code chunk number 18: ChemmineR.Rnw:148-154
###################################################
blockmatrix <- datablock2ma(datablocklist=datablock(sdfset))
   # Converts data block to matrix   
numchar <- splitNumChar(blockmatrix=blockmatrix) 
   # Splits to numeric and character matrix
numchar[[1]][1:2,1:2] # Slice of numeric matrix
numchar[[2]][1:2,10:11] # Slice of character matrix


###################################################
### code chunk number 19: ChemmineR.Rnw:158-160
###################################################
propma <- data.frame(MF=MF(sdfset), MW=MW(sdfset), atomcountMA(sdfset))
propma[1:4, ]


###################################################
### code chunk number 20: ChemmineR.Rnw:164-166
###################################################
datablock(sdfset) <- propma
datablock(sdfset[1])


###################################################
### code chunk number 21: ChemmineR.Rnw:170-173 (eval = FALSE)
###################################################
## grepSDFset("650001", sdfset, field="datablock", mode="subset") 
##    # Returns summary view of matches. Not printed here.
## .


###################################################
### code chunk number 22: ChemmineR.Rnw:175-176
###################################################
grepSDFset("650001", sdfset, field="datablock", mode="index") 


###################################################
### code chunk number 23: ChemmineR.Rnw:180-181 (eval = FALSE)
###################################################
## write.SDF(sdfset[1:4], file="sub.sdf", sig=TRUE)


###################################################
### code chunk number 24: plotstruct
###################################################
plot(sdfset[1:4], print=FALSE) # Plots structures to R graphics device


###################################################
### code chunk number 25: ChemmineR.Rnw:189-190 (eval = FALSE)
###################################################
## sdf.visualize(sdfset[1:4]) # Compound viewing in web browser


###################################################
### code chunk number 26: ChemmineR.Rnw:200-203 (eval = FALSE)
###################################################
## apset <- sdf2ap(sdfset) 
##    # Generate atom pair descriptor database for searching
## .


###################################################
### code chunk number 27: ChemmineR.Rnw:205-211
###################################################
data(apset)
   # Load sample apset data provided by library.
cmp.search(apset, apset[1], type=3, cutoff = 0.3, quiet=TRUE) 
   # Search apset database with single compound.
cmp.cluster(db=apset, cutoff = c(0.65, 0.5), quiet=TRUE)[1:4,] 
   # Binning clustering using variable similarity cutoffs.


###################################################
### code chunk number 28: ChemmineR.Rnw:328-330 (eval = FALSE)
###################################################
## sdfset <- read.SDFset("http://faculty.ucr.edu/~tgirke/Documents/
##                        R_BioCond/Samples/sdfsample.sdf")


###################################################
### code chunk number 29: ChemmineR.Rnw:332-336
###################################################
data(sdfsample) # Loads the same SDFset provided by the library
sdfset <- sdfsample 
valid <- validSDF(sdfset) # Identifies invalid SDFs in SDFset objects
sdfset <- sdfset[valid] # Removes invalid SDFs, if there are any


###################################################
### code chunk number 30: ChemmineR.Rnw:340-342 (eval = FALSE)
###################################################
## sdfstr <- read.SDFstr("http://faculty.ucr.edu/~tgirke/Documents/
##                        R_BioCond/Samples/sdfsample.sdf")


###################################################
### code chunk number 31: ChemmineR.Rnw:345-348
###################################################
sdfstr <- as(sdfset, "SDFstr")
sdfstr
as(sdfstr, "SDFset") 


###################################################
### code chunk number 32: ChemmineR.Rnw:355-358 (eval = FALSE)
###################################################
## data(smisample); smiset <- smisample
## write.SMI(smiset[1:4], file="sub.smi")
## smiset <- read.SMIset("sub.smi")


###################################################
### code chunk number 33: ChemmineR.Rnw:362-366
###################################################
data(smisample) # Loads the same SMIset provided by the library
smiset <- smisample 
smiset
view(smiset[1:2])


###################################################
### code chunk number 34: ChemmineR.Rnw:370-372
###################################################
cid(smiset[1:4])
(smi <- as.character(smiset[1:2]))


###################################################
### code chunk number 35: ChemmineR.Rnw:376-377
###################################################
as(smi, "SMIset") 


###################################################
### code chunk number 36: ChemmineR.Rnw:384-385 (eval = FALSE)
###################################################
## write.SDF(sdfset[1:4], file="sub.sdf")


###################################################
### code chunk number 37: ChemmineR.Rnw:389-390 (eval = FALSE)
###################################################
## write.SDF(sdfset[1:4], file="sub.sdf", sig=TRUE, cid=TRUE, db=NULL)


###################################################
### code chunk number 38: ChemmineR.Rnw:394-397 (eval = FALSE)
###################################################
## props <- data.frame(MF=MF(sdfset), MW=MW(sdfset), atomcountMA(sdfset))
## datablock(sdfset) <- props
## write.SDF(sdfset[1:4], file="sub.sdf", sig=TRUE, cid=TRUE)


###################################################
### code chunk number 39: ChemmineR.Rnw:401-405 (eval = FALSE)
###################################################
## sdf2str(sdf=sdfset[[1]], sig=TRUE, cid=TRUE) 
##    # Uses default components
## sdf2str(sdf=sdfset[[1]], head=letters[1:4], db=NULL) 
##    # Uses custom components for header and data block


###################################################
### code chunk number 40: ChemmineR.Rnw:409-412 (eval = FALSE)
###################################################
## write.SDF(sdfset[1:4], file="sub.sdf", sig=TRUE, cid=TRUE, db=NULL)
## write.SDF(sdfstr[1:4], file="sub.sdf")
## cat(unlist(as(sdfstr[1:4], "list")), file="sub.sdf", sep="\n")


###################################################
### code chunk number 41: ChemmineR.Rnw:417-420 (eval = FALSE)
###################################################
## data(smisample); smiset <- smisample # Sample data set
## write.SMI(smiset[1:4], file="sub.smi", cid=TRUE)
## write.SMI(smiset[1:4], file="sub.smi", cid=FALSE)


###################################################
### code chunk number 42: sdf2smiles (eval = FALSE)
###################################################
## data(sdfsample); sdfset <- sdfsample[1]
## smiles <- sdf2smiles(sdfset)
## smiles


###################################################
### code chunk number 43: smiles2sdf (eval = FALSE)
###################################################
## sdf <- smiles2sdf("CC(=O)OC1=CC=CC=C1C(=O)O\tname")
## view(sdf)


###################################################
### code chunk number 44: ChemmineR.Rnw:455-456 (eval = FALSE)
###################################################
## sdfStr <- convertFormat("SMI","SDF","CC(=O)OC1=CC=CC=C1C(=O)O\ttest_name")


###################################################
### code chunk number 45: ChemmineR.Rnw:464-465 (eval = FALSE)
###################################################
## convertFormatFile("SMI","SDF","test.smiles","test.sdf")


###################################################
### code chunk number 46: ChemmineR.Rnw:476-477 (eval = FALSE)
###################################################
## write.SDF(sdfset, "test.sdf")


###################################################
### code chunk number 47: ChemmineR.Rnw:481-482 (eval = FALSE)
###################################################
## sdfstr <- read.SDFstr("test.sdf")


###################################################
### code chunk number 48: ChemmineR.Rnw:486-488 (eval = FALSE)
###################################################
## write.SDFsplit(x=sdfstr, filetag="myfile", nmol=10)
##    # 'nmol' defines the number of molecules to write to each file


###################################################
### code chunk number 49: ChemmineR.Rnw:492-493 (eval = FALSE)
###################################################
## write.SDFsplit(x=sdfset, filetag="myfile", nmol=10)


###################################################
### code chunk number 50: ChemmineR.Rnw:500-501 (eval = FALSE)
###################################################
## write.SDF(sdfset, "test.sdf")


###################################################
### code chunk number 51: ChemmineR.Rnw:505-515 (eval = FALSE)
###################################################
## desc <- function(sdfset) {
##         cbind(SDFID=sdfid(sdfset),
##               # datablock2ma(datablocklist=datablock(sdfset)),
##               MW=MW(sdfset),
##               groups(sdfset),
##               APFP=desc2fp(x=sdf2ap(sdfset), descnames=1024, type="character"),
##               AP=sdf2ap(sdfset, type="character"),
##               rings(sdfset, type="count", upper=6, arom=TRUE)
##         )
## }


###################################################
### code chunk number 52: ChemmineR.Rnw:519-521 (eval = FALSE)
###################################################
## sdfStream(input="test.sdf", output="matrix.xls", fct=desc, Nlines=1000)
##    # 'Nlines': number of lines to read from input SD File at a time


###################################################
### code chunk number 53: ChemmineR.Rnw:525-527 (eval = FALSE)
###################################################
## sdfStream(input="test.sdf", output="matrix2.xls", append=FALSE, fct=desc, 
##           Nlines=1000, startline=950) 


###################################################
### code chunk number 54: ChemmineR.Rnw:531-536 (eval = FALSE)
###################################################
## indexDF <- read.delim("matrix.xls", row.names=1)[,1:4]
## indexDFsub <- indexDF[indexDF$MW < 400, ] 
##    # Selects molecules with MW < 400
## sdfset <- read.SDFindex(file="test.sdf", index=indexDFsub, type="SDFset")
##    # Collects results in 'SDFset' container


###################################################
### code chunk number 55: ChemmineR.Rnw:540-541 (eval = FALSE)
###################################################
## read.SDFindex(file="test.sdf", index=indexDFsub, type="file", outfile="sub.sdf")


###################################################
### code chunk number 56: ChemmineR.Rnw:545-547 (eval = FALSE)
###################################################
## apset <- read.AP(x="matrix.xls", type="ap", colid="AP") 
## apfp <- read.AP(x="matrix.xls", type="fp", colid="APFP")


###################################################
### code chunk number 57: ChemmineR.Rnw:551-554 (eval = FALSE)
###################################################
## apset <- read.AP(x=sdf2ap(sdfset[1:20], type="character"), type="ap")
## fpchar <- desc2fp(sdf2ap(sdfset[1:20]), descnames=1024, type="character")
## fpset <- as(fpchar, "FPset")


###################################################
### code chunk number 58: ChemmineR.Rnw:611-625
###################################################

data(sdfsample)

#create and initialize a new SQLite database
conn <- initDb("test.db")

# load data and compute 3 features: molecular weight, with the MW function, 
# and counts for RINGS and AROMATIC, as computed by rings, which 
# returns a data frame itself.
ids<-loadSdf(conn,sdfsample,
      function(sdfset) 
       data.frame(MW = MW(sdfset),  
                  rings(sdfset,type="count",upper=6, arom=TRUE))
      )


###################################################
### code chunk number 59: ChemmineR.Rnw:647-652
###################################################
lightIds <- findCompounds(conn,"MW",c("MW < 300"))
MW(getCompounds(conn,lightIds)) 

#names of matching compounds:
getCompoundNames(conn,lightIds)


###################################################
### code chunk number 60: ChemmineR.Rnw:659-665 (eval = FALSE)
###################################################
## view(sdfset[1:4]) # Summary view of several molecules
## length(sdfset) # Returns number of molecules
## sdfset[[1]] # Returns single molecule from SDFset as SDF object
## sdfset[[1]][[2]] # Returns atom block from first compound as matrix
## sdfset[[1]][[2]][1:4,] 
## c(sdfset[1:4], sdfset[5:8]) # Concatenation of several SDFsets


###################################################
### code chunk number 61: ChemmineR.Rnw:669-672 (eval = FALSE)
###################################################
## grepSDFset("650001", sdfset, field="datablock", mode="subset") 
##    # To return index, set mode="index")
## .


###################################################
### code chunk number 62: ChemmineR.Rnw:676-683 (eval = FALSE)
###################################################
## sdfid(sdfset[1:4])
##    # Retrieves CMP IDs from Molecule Name field in header block.
## cid(sdfset[1:4])
##    # Retrieves CMP IDs from ID slot in SDFset.
## unique_ids <- makeUnique(sdfid(sdfset)) 
##    # Creates unique IDs by appending a counter to duplicates.
## cid(sdfset) <- unique_ids # Assigns uniquified IDs to ID slot


###################################################
### code chunk number 63: ChemmineR.Rnw:687-691 (eval = FALSE)
###################################################
## view(sdfset[c("650001", "650012")])
## view(sdfset[4:1])
## mylog <- cid(sdfset) %in% c("650001", "650012")
## view(sdfset[mylog])


###################################################
### code chunk number 64: ChemmineR.Rnw:695-705 (eval = FALSE)
###################################################
## atomblock(sdf); sdf[[2]]; sdf[["atomblock"]] 
##    # All three methods return the same component
## header(sdfset[1:4])
## atomblock(sdfset[1:4])
## bondblock(sdfset[1:4])
## datablock(sdfset[1:4])
## header(sdfset[[1]])
## atomblock(sdfset[[1]])
## bondblock(sdfset[[1]])
## datablock(sdfset[[1]])


###################################################
### code chunk number 65: ChemmineR.Rnw:709-712 (eval = FALSE)
###################################################
## sdfset[[1]][[2]][1,1] <- 999
## atomblock(sdfset)[1] <- atomblock(sdfset)[2]
## datablock(sdfset)[1] <- datablock(sdfset)[2]


###################################################
### code chunk number 66: ChemmineR.Rnw:716-718 (eval = FALSE)
###################################################
## datablock(sdfset) <- as.matrix(iris[1:100,])
## view(sdfset[1:4])


###################################################
### code chunk number 67: ChemmineR.Rnw:722-725 (eval = FALSE)
###################################################
## as(sdfstr[1:2], "list") 
## as(sdfstr[[1]], "SDF")
## as(sdfstr[1:2], "SDFset")


###################################################
### code chunk number 68: ChemmineR.Rnw:729-734 (eval = FALSE)
###################################################
## sdfcomplist <- as(sdf, "list")
## sdfcomplist <- as(sdfset[1:4], "list"); as(sdfcomplist[[1]], "SDF")
## sdflist <- as(sdfset[1:4], "SDF"); as(sdflist, "SDFset")
## as(sdfset[[1]], "SDFstr")
## as(sdfset[[1]], "SDFset")


###################################################
### code chunk number 69: ChemmineR.Rnw:738-741 (eval = FALSE)
###################################################
## as(sdfset[1:4], "SDF")
## as(sdfset[1:4], "list")
## as(sdfset[1:4], "SDFstr")


###################################################
### code chunk number 70: boxplot
###################################################
propma <- atomcountMA(sdfset, addH=FALSE)
boxplot(propma, col="blue", main="Atom Frequency")


###################################################
### code chunk number 71: ChemmineR.Rnw:752-753 (eval = FALSE)
###################################################
## boxplot(rowSums(propma), main="All Atom Frequency")   


###################################################
### code chunk number 72: ChemmineR.Rnw:757-759
###################################################
data(atomprop)
atomprop[1:4,] 


###################################################
### code chunk number 73: ChemmineR.Rnw:763-765
###################################################
MW(sdfset[1:4], addH=FALSE)
MF(sdfset[1:4], addH=FALSE)


###################################################
### code chunk number 74: ChemmineR.Rnw:769-770
###################################################
groups(sdfset[1:4], groups="fctgroup", type="countMA")


###################################################
### code chunk number 75: ChemmineR.Rnw:774-780
###################################################
propma <- data.frame(MF=MF(sdfset, addH=FALSE), MW=MW(sdfset, addH=FALSE), 
                     Ncharges=sapply(bonds(sdfset, type="charge"), length), 
                     atomcountMA(sdfset, addH=FALSE), groups(sdfset, 
		     type="countMA"), rings(sdfset, upper=6, type="count", 
		     arom=TRUE))
propma[1:4,]


###################################################
### code chunk number 76: ChemmineR.Rnw:784-788 (eval = FALSE)
###################################################
## datablock(sdfset) <- propma # Works with all SDF components
## datablock(sdfset)[1:4]
## test <- apply(propma[1:4,], 1, function(x) data.frame(col=colnames(propma), value=x))
## sdf.visualize(sdfset[1:4], extra = test)


###################################################
### code chunk number 77: ChemmineR.Rnw:792-794 (eval = FALSE)
###################################################
## datablocktag(sdfset, tag="PUBCHEM_NIST_INCHI")
## datablocktag(sdfset, tag="PUBCHEM_OPENEYE_CAN_SMILES")


###################################################
### code chunk number 78: ChemmineR.Rnw:798-805 (eval = FALSE)
###################################################
## blockmatrix <- datablock2ma(datablocklist=datablock(sdfset)) 
##    # Converts data block to matrix 
## numchar <- splitNumChar(blockmatrix=blockmatrix) 
##    # Splits matrix to numeric matrix and character matrix
## numchar[[1]][1:4,]; numchar[[2]][1:4,] 
##    # Splits matrix to numeric matrix and character matrix
## .


###################################################
### code chunk number 79: contable (eval = FALSE)
###################################################
## conMA(sdfset[1:2], exclude=c("H"))
##    # Create bond matrix for first two molecules in sdfset
## conMA(sdfset[[1]], exclude=c("H"))
##    # Return bond matrix for first molecule
## plot(sdfset[1], atomnum = TRUE, noHbonds=FALSE , no_print_atoms = "", atomcex=0.8)
##    # Plot its structure with atom numbering
## rowSums(conMA(sdfset[[1]], exclude=c("H")))
##    # Return number of non-H bonds for each atom
## .


###################################################
### code chunk number 80: ChemmineR.Rnw:825-828
###################################################
bonds(sdfset[[1]], type="bonds")[1:4,]
bonds(sdfset[1:2], type="charge")
bonds(sdfset[1:2], type="addNH")


###################################################
### code chunk number 81: ChemmineR.Rnw:837-838
###################################################
(ringatoms <- rings(sdfset[1], upper=Inf, type="all", arom=FALSE, inner=FALSE))


###################################################
### code chunk number 82: ChemmineR.Rnw:842-844
###################################################
atomindex <- as.numeric(gsub(".*_", "", unique(unlist(ringatoms))))
plot(sdfset[1], print=FALSE, colbonds=atomindex)


###################################################
### code chunk number 83: ChemmineR.Rnw:848-849
###################################################
plot(sdfset[1], print=FALSE, atomnum=TRUE, no_print_atoms="H")


###################################################
### code chunk number 84: ChemmineR.Rnw:853-854
###################################################
rings(sdfset[1], upper=Inf, type="all", arom=TRUE, inner=FALSE)  


###################################################
### code chunk number 85: ChemmineR.Rnw:858-859
###################################################
rings(sdfset[1], upper=6, type="arom", arom=TRUE, inner=FALSE)


###################################################
### code chunk number 86: ChemmineR.Rnw:863-864
###################################################
rings(sdfset[1:4], upper=Inf, type="count", arom=TRUE, inner=TRUE) 


###################################################
### code chunk number 87: plotstruct2
###################################################
data(sdfsample)
sdfset <- sdfsample
plot(sdfset[1:4], print=FALSE) # 'print=TRUE' returns SDF summaries


###################################################
### code chunk number 88: ChemmineR.Rnw:879-881 (eval = FALSE)
###################################################
## plot(sdfset[1:4], griddim=c(2,2), print_cid=letters[1:4], print=FALSE, 
##      noHbonds=FALSE)


###################################################
### code chunk number 89: plotstruct3
###################################################
plot(sdfset["CMP1"], atomnum = TRUE,  noHbonds=F , no_print_atoms = "", 
     atomcex=0.8, sub=paste("MW:", MW(sdfsample["CMP1"])), print=FALSE)


###################################################
### code chunk number 90: plotstruct4
###################################################
plot(sdfset[1], print=FALSE, colbonds=c(22,26,25,3,28,27,2,23,21,18,8,19,20,24))


###################################################
### code chunk number 91: ChemmineR.Rnw:900-901 (eval = FALSE)
###################################################
## sdf.visualize(sdfset[1:4])


###################################################
### code chunk number 92: ChemmineR.Rnw:911-912 (eval = FALSE)
###################################################
## sdf.visualize(sdfset[1:4], extra=month.name[1:4])


###################################################
### code chunk number 93: ChemmineR.Rnw:916-919 (eval = FALSE)
###################################################
## extra <- apply(propma[1:4,], 1, function(x) 
##          data.frame(Property=colnames(propma), Value=x))
## sdf.visualize(sdfset[1:4], extra=extra)


###################################################
### code chunk number 94: ChemmineR.Rnw:923-924 (eval = FALSE)
###################################################
## sdf.visualize(sdfset[1:4], extra=bondblock(sdfset[1:4]))


###################################################
### code chunk number 95: plotmcs
###################################################
library(fmcsR)
data(fmcstest) # Loads test sdfset object
test <- fmcs(fmcstest[1], fmcstest[2], au=2, bu=1) # Searches for MCS with mismatches
plotMCS(test) # Plots both query compounds with MCS in color


###################################################
### code chunk number 96: ChemmineR.Rnw:942-944
###################################################
ap <- sdf2ap(sdfset[[1]]) # For single compound
ap


###################################################
### code chunk number 97: ChemmineR.Rnw:945-946 (eval = FALSE)
###################################################
## apset <- sdf2ap(sdfset) # For many compounds. 


###################################################
### code chunk number 98: ChemmineR.Rnw:947-948
###################################################
view(apset[1:4])


###################################################
### code chunk number 99: ChemmineR.Rnw:952-955 (eval = FALSE)
###################################################
## cid(apset[1:4]) # Compound IDs
## ap(apset[1:4]) # Atom pair descriptors
## db.explain(apset[1]) # Return atom pairs in human readable format


###################################################
### code chunk number 100: ChemmineR.Rnw:959-962 (eval = FALSE)
###################################################
## apset2descdb(apset) # Returns old list-style AP database
## tmp <- as(apset, "list") # Returns list
## as(tmp, "APset") # Converts list back to APset


###################################################
### code chunk number 101: ChemmineR.Rnw:969-973 (eval = FALSE)
###################################################
## save(sdfset, file = "sdfset.rda", compress = TRUE)
## load("sdfset.rda")
## save(apset, file = "apset.rda", compress = TRUE)
## load("apset.rda")


###################################################
### code chunk number 102: ChemmineR.Rnw:978-980
###################################################
cmp.similarity(apset[1], apset[2])
cmp.similarity(apset[1], apset[1])


###################################################
### code chunk number 103: ChemmineR.Rnw:985-986
###################################################
cmp.search(apset, apset["650065"], type=3, cutoff = 0.3, quiet=TRUE) 


###################################################
### code chunk number 104: ChemmineR.Rnw:989-991
###################################################
cmp.search(apset, apset["650065"], type=1, cutoff = 0.3, quiet=TRUE) 
cmp.search(apset, apset["650065"], type=2, cutoff = 0.3, quiet=TRUE) 


###################################################
### code chunk number 105: ChemmineR.Rnw:998-999
###################################################
showClass("FPset")


###################################################
### code chunk number 106: ChemmineR.Rnw:1003-1006
###################################################
data(apset)
(fpset <- desc2fp(apset))
view(fpset[1:2])


###################################################
### code chunk number 107: ChemmineR.Rnw:1010-1017
###################################################
fpset[1:4] # behaves like a list
fpset[[1]] # returns FP object
length(fpset) # number of compounds
cid(fpset) # returns compound ids
fpset[10] <- 0 # replacement of 10th fingerprint to all zeros  
cid(fpset) <- 1:length(fpset) # replaces compound ids
c(fpset[1:4], fpset[11:14]) # concatenation of several FPset objects


###################################################
### code chunk number 108: ChemmineR.Rnw:1021-1023
###################################################
fpma <- as.matrix(fpset) # coerces FPset to matrix 
as(fpma, "FPset")


###################################################
### code chunk number 109: ChemmineR.Rnw:1027-1029
###################################################
fpchar <- as.character(fpset) # coerces FPset to character strings
as(fpchar, "FPset") # construction of FPset class from character vector


###################################################
### code chunk number 110: ChemmineR.Rnw:1033-1034
###################################################
fpSim(fpset[1], fpset, method="Tanimoto", cutoff=0.4, top=4)


###################################################
### code chunk number 111: ChemmineR.Rnw:1041-1044 (eval = FALSE)
###################################################
## data(sdfsample)
## sdfset <- sdfsample[1:10]
## apset <- sdf2ap(sdfset)


###################################################
### code chunk number 112: ChemmineR.Rnw:1048-1049 (eval = FALSE)
###################################################
## fpset <- desc2fp(apset, descnames=1024, type="FPset")


###################################################
### code chunk number 113: ChemmineR.Rnw:1053-1055 (eval = FALSE)
###################################################
## fpset1024 <- names(rev(sort(table(unlist(as(apset, "list")))))[1:1024])
## fpset <- desc2fp(apset, descnames=fpset1024, type="FPset")


###################################################
### code chunk number 114: ChemmineR.Rnw:1059-1061 (eval = FALSE)
###################################################
## fpchar <- desc2fp(x=apset, descnames=1024, type="character")
## fpchar <- as.character(fpset) 


###################################################
### code chunk number 115: ChemmineR.Rnw:1065-1067 (eval = FALSE)
###################################################
## fpma <- as.matrix(fpset) 
## fpset <- as(fpma, "FPset")


###################################################
### code chunk number 116: ChemmineR.Rnw:1071-1072 (eval = FALSE)
###################################################
## fpSim(fpset[1], fpset, method="Tanimoto")


###################################################
### code chunk number 117: ChemmineR.Rnw:1076-1077 (eval = FALSE)
###################################################
## fpSim(fpset[1], fpset, method="Tversky", cutoff=0.4, top=4, alpha=0.5, beta=1)


###################################################
### code chunk number 118: ChemmineR.Rnw:1081-1083 (eval = FALSE)
###################################################
## myfct <- function(a, b, c, d) c/(a+b+c+d)
## fpSim(fpset[1], fpset, method=myfct) 


###################################################
### code chunk number 119: ChemmineR.Rnw:1087-1090 (eval = FALSE)
###################################################
## simMAap <- sapply(cid(apfpset), function(x) fpSim(x=apfpset[x], apfpset, sorted=FALSE))
## hc <- hclust(as.dist(1-simMAap), method="single")
## plot(as.dendrogram(hc), edgePar=list(col=4, lwd=2), horiz=TRUE)


###################################################
### code chunk number 120: ChemmineR.Rnw:1097-1102
###################################################
cid(sdfset) <- sdfid(sdfset)
fpset <- fp2bit(sdfset, type=1) 
fpset <- fp2bit(sdfset, type=2)
fpset <- fp2bit(sdfset, type=3)
fpset


###################################################
### code chunk number 121: ChemmineR.Rnw:1106-1107
###################################################
fpSim(fpset[1], fpset[2])


###################################################
### code chunk number 122: ChemmineR.Rnw:1112-1113
###################################################
fpSim(fpset["650065"], fpset, method="Tanimoto", cutoff=0.6, top=6) 


###################################################
### code chunk number 123: search_result
###################################################
cid(sdfset) <- cid(apset) # Assure compound name consistency among objects.
plot(sdfset[names(cmp.search(apset, apset["650065"], type=2, cutoff=4, 
     quiet=TRUE))], print=FALSE)


###################################################
### code chunk number 124: ChemmineR.Rnw:1127-1129 (eval = FALSE)
###################################################
## similarities <- cmp.search(apset, apset[1], type=3, cutoff = 10)
## sdf.visualize(sdfset[similarities[,1]], extra=similarities[,3])


###################################################
### code chunk number 125: ChemmineR.Rnw:1140-1142
###################################################
cmp.duplicated(apset, type=1)[1:4] # Returns AP duplicates as logical vector
cmp.duplicated(apset, type=2)[1:4,] # Returns AP duplicates as data frame


###################################################
### code chunk number 126: duplicates
###################################################
plot(sdfset[c("650059","650060", "650065", "650066")], print=FALSE)


###################################################
### code chunk number 127: ChemmineR.Rnw:1151-1153
###################################################
apdups <- cmp.duplicated(apset, type=1)
sdfset[which(!apdups)]; apset[which(!apdups)]


###################################################
### code chunk number 128: ChemmineR.Rnw:1162-1166
###################################################
count <- table(datablocktag(sdfset, tag="PUBCHEM_NIST_INCHI"))
count <- table(datablocktag(sdfset, tag="PUBCHEM_OPENEYE_CAN_SMILES"))
count <- table(datablocktag(sdfset, tag="PUBCHEM_MOLECULAR_FORMULA"))
count[1:4]


###################################################
### code chunk number 129: ChemmineR.Rnw:1196-1198
###################################################
clusters <- cmp.cluster(db=apset, cutoff = c(0.7, 0.8, 0.9), quiet = TRUE)
clusters[1:12,]


###################################################
### code chunk number 130: ChemmineR.Rnw:1203-1207
###################################################
fpset <- desc2fp(apset)
clusters2 <- cmp.cluster(fpset, cutoff=c(0.5, 0.7, 0.9), method="Tanimoto", 
                         quiet=TRUE) 
clusters2[1:12,]


###################################################
### code chunk number 131: ChemmineR.Rnw:1211-1213
###################################################
clusters3 <- cmp.cluster(fpset, cutoff=c(0.5, 0.7, 0.9), method="Tversky", 
                         alpha=0.3, beta=0.7, quiet=TRUE) 


###################################################
### code chunk number 132: ChemmineR.Rnw:1217-1220
###################################################
cluster.sizestat(clusters, cluster.result=1)
cluster.sizestat(clusters, cluster.result=2)
cluster.sizestat(clusters, cluster.result=3)


###################################################
### code chunk number 133: ChemmineR.Rnw:1224-1228 (eval = FALSE)
###################################################
## clusters <- cmp.cluster(db=apset, cutoff = c(0.65, 0.5, 0.3), 
##                         save.distances="distmat.rda") 
##    # Saves distance matrix to file "distmat.rda" in current working directory.
## load("distmat.rda") # Loads distance matrix.


###################################################
### code chunk number 134: ChemmineR.Rnw:1256-1258
###################################################
data(apset)
fpset <- desc2fp(apset)


###################################################
### code chunk number 135: ChemmineR.Rnw:1262-1264
###################################################
jarvisPatrick(nearestNeighbors(apset,numNbrs=6), k=5, mode="a1a2b") #Using "APset"
jarvisPatrick(nearestNeighbors(fpset,numNbrs=6), k=5, mode="a1a2b") #Using "FPset"


###################################################
### code chunk number 136: ChemmineR.Rnw:1269-1271
###################################################
cl<-jarvisPatrick(nearestNeighbors(fpset,cutoff=0.6, method="Tanimoto"), k=2 ,mode="b")
byCluster(cl)


###################################################
### code chunk number 137: ChemmineR.Rnw:1275-1279
###################################################
nnm <- nearestNeighbors(fpset,numNbrs=6)
nnm$names[1:4]
nnm$ids[1:4,]
nnm$similarities[1:4,]


###################################################
### code chunk number 138: ChemmineR.Rnw:1283-1286
###################################################
nnm <- trimNeighbors(nnm,cutoff=0.4)
nnm$similarities[1:4,]


###################################################
### code chunk number 139: ChemmineR.Rnw:1290-1291
###################################################
jarvisPatrick(nnm, k=5,mode="b")


###################################################
### code chunk number 140: ChemmineR.Rnw:1295-1298
###################################################
nn <- matrix(c(1,2,2,1),2,2,dimnames=list(c('one','two')))
nn
byCluster(jarvisPatrick(fromNNMatrix(nn),k=1))


###################################################
### code chunk number 141: ChemmineR.Rnw:1311-1314 (eval = FALSE)
###################################################
## cluster.visualize(apset, clusters, size.cutoff=2, quiet = TRUE) 
##    # Color codes clusters with at least two members.
## cluster.visualize(apset, clusters, quiet = TRUE) # Plots all items.


###################################################
### code chunk number 142: mds_scatter
###################################################
library(scatterplot3d)
coord <- cluster.visualize(apset, clusters, size.cutoff=1, dimensions=3, quiet=TRUE)
scatterplot3d(coord)


###################################################
### code chunk number 143: ChemmineR.Rnw:1325-1332 (eval = FALSE)
###################################################
## library(rgl)
## rgl.open(); offset <- 50; par3d(windowRect=c(offset, offset, 640+offset, 640+offset))
## rm(offset); rgl.clear(); rgl.viewpoint(theta=45, phi=30, fov=60, zoom=1)
## spheres3d(coord[,1], coord[,2], coord[,3], radius=0.03, color=coord[,4], alpha=1, 
## shininess=20); aspect3d(1, 1, 1)
## axes3d(col='black'); title3d("", "", "", "", "", col='black'); bg3d("white") 
##    # To save a snapshot of the graph, one can use the command rgl.snapshot("test.png").


###################################################
### code chunk number 144: mds_scatter
###################################################
dummy <- cmp.cluster(db=apset, cutoff=0, save.distances="distmat.rda", quiet=TRUE)
load("distmat.rda")


###################################################
### code chunk number 145: hclust
###################################################
hc <- hclust(as.dist(distmat), method="single")
hc[["labels"]] <- cid(apset) # Assign correct item labels
plot(as.dendrogram(hc), edgePar=list(col=4, lwd=2), horiz=T) 


###################################################
### code chunk number 146: fp_hclust (eval = FALSE)
###################################################
## simMA <- sapply(cid(fpset), function(x) fpSim(fpset[x], fpset, sorted=FALSE)) 
## hc <- hclust(as.dist(1-simMA), method="single")  
## plot(as.dendrogram(hc), edgePar=list(col=4, lwd=2), horiz=TRUE)


###################################################
### code chunk number 147: heatmap
###################################################
library(gplots)
heatmap.2(1-distmat, Rowv=as.dendrogram(hc), Colv=as.dendrogram(hc), 
          col=colorpanel(40, "darkblue", "yellow", "white"), 
          density.info="none", trace="none")


###################################################
### code chunk number 148: getIds (eval = FALSE)
###################################################
## compounds <- getIds(c(111,123))
## compounds


###################################################
### code chunk number 149: getIds (eval = FALSE)
###################################################
## compounds <- searchString("CC(=O)OC1=CC=CC=C1C(=O)O")
## compounds


###################################################
### code chunk number 150: getIds (eval = FALSE)
###################################################
## data(sdfsample); sdfset <- sdfsample[1]
## compounds <- searchSim(sdfset)
## compounds


###################################################
### code chunk number 151: sessionInfo
###################################################
toLatex(sessionInfo())