## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    library(DeepPINCS)
    example_cpi <- example_cpi[1:500,]
    validation_split <- 0.3
    idx <- sample(seq_len(length(example_cpi[,1])))
    train_idx <- seq_len(length(example_cpi[,1])) %in%
        idx[seq_len(round(length(example_cpi[,1]) * (1 - validation_split)))]
}

## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    net_args <- list(
        compound = "gcn_in_out",
        compound_args = list(
            gcn_units = c(128, 64),
            gcn_activation = c("relu", "relu"),
            fc_units = c(10),
            fc_activation = c("relu")),
        protein = "cnn_in_out",
        protein_args = list(
            cnn_filters = c(32),
            cnn_kernel_size = c(3),
            cnn_activation = c("relu"),
            fc_units = c(10),
            fc_activation = c("relu")),
        fc_units = c(1),
        fc_activation = c("sigmoid"),
        loss = "binary_crossentropy",
        optimizer = keras::optimizer_adam(),
        metrics = "accuracy")
}

## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    compound_max_atoms <- 50
    protein_embedding_dim <- 16
    protein_length_seq <- 100
    gcn_cnn_cpi <- fit_cpi(
        smiles = example_cpi[train_idx, 1],
        AAseq = example_cpi[train_idx, 2], 
        outcome = example_cpi[train_idx, 3],
        compound_type = "graph",
        compound_max_atoms = compound_max_atoms,
        protein_length_seq = protein_length_seq,
        protein_embedding_dim = protein_embedding_dim,
        protein_ngram_max = 2,
        protein_ngram_min = 1,
        smiles_val = example_cpi[!train_idx, 1],
        AAseq_val = example_cpi[!train_idx, 2],
        outcome_val = example_cpi[!train_idx, 3],
        net_args = net_args,
        epochs = 20,
        batch_size = 64,
        callbacks = keras::callback_early_stopping(
            monitor = "val_accuracy",
            patience = 10,
            restore_best_weights = TRUE))
    ttgsea::plot_model(gcn_cnn_cpi$model)
}

## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    pred <- predict_cpi(gcn_cnn_cpi,
        smiles = example_cpi[!train_idx, 1],
        AAseq = example_cpi[!train_idx, 2],
        batch_size = 32)
    pred_calss <- ifelse(pred$values > 0.5, 1, 0)
    table(pred_calss, example_cpi[!train_idx, 3])
    
    roc <- PRROC::roc.curve(scores.class0 = pred$values[example_cpi[!train_idx, 3] == 1],
        scores.class1 = pred$values[example_cpi[!train_idx,3] == 0],
        curve = TRUE)
    plot(roc)
    pr <- PRROC::pr.curve(scores.class0 = pred$values[example_cpi[!train_idx, 3] == 1],
        scores.class1 = pred$values[example_cpi[!train_idx,3] == 0],
        curve = TRUE)
    plot(pr)
}

## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    library(DeepPINCS)
    validation_split <- 0.3
    idx <- sample(seq_len(length(example_cci[,1])))
    train_idx <- seq_len(length(example_cci[,1])) %in%
        idx[seq_len(round(length(example_cci[,1]) * (1 - validation_split)))]
    
    mlp_mlp_cci <- fit_cpi(
        smiles = example_cci[train_idx, 1:2],
        outcome = example_cci[train_idx, 3],
        compound_type = "fingerprint",
        smiles_val = example_cci[!train_idx, 1:2],
        outcome_val = example_cci[!train_idx, 3],
        net_args = list(
            compound = "mlp_in_out",
            compound_args = list(
                fc_units = c(10, 5),
                fc_activation = c("relu", "relu")),
            fc_units = c(1),
            fc_activation = c("sigmoid"),
            loss = "binary_crossentropy",
            optimizer = keras::optimizer_adam(),
            metrics = "accuracy"),
        epochs = 20, batch_size = 64,
        callbacks = keras::callback_early_stopping(
            monitor = "val_accuracy",
            patience = 10,
            restore_best_weights = TRUE))
    ttgsea::plot_model(mlp_mlp_cci$model)
    
    pred <- predict_cpi(mlp_mlp_cci,
        smiles = example_cci[!train_idx, 1:2],
        batch_size = 32)
    pred_calss <- ifelse(pred$values > 0.5, 1, 0)
    table(pred_calss, example_cci[!train_idx, 3])
}

## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    validation_split <- 0.3
    idx <- sample(seq_len(length(example_ppi[,1])))
    train_idx <- seq_len(length(example_ppi[,1])) %in%
        idx[seq_len(round(length(example_ppi[,1]) * (1 - validation_split)))]
    
    protein_embedding_dim <- 16
    protein_length_seq <- 100
    mlp_mlp_ppi <- fit_cpi(
        AAseq = example_ppi[train_idx, 1:2],
        outcome = example_ppi[train_idx, 3],
        protein_length_seq = protein_length_seq,
        protein_embedding_dim = protein_embedding_dim,
        AAseq_val = example_ppi[!train_idx, 1:2],
        outcome_val = example_ppi[!train_idx, 3],
        net_args = list(
            protein = "mlp_in_out",
            protein_args = list(
                fc_units = c(10, 5),
                fc_activation = c("relu", "relu")),
            fc_units = c(1),
            fc_activation = c("sigmoid"),
            loss = "binary_crossentropy",
            optimizer = keras::optimizer_adam(),
            metrics = "accuracy"),
        epochs = 20, batch_size = 64,
        callbacks = keras::callback_early_stopping(
            monitor = "val_accuracy",
            patience = 10,
            restore_best_weights = TRUE))
    ttgsea::plot_model(mlp_mlp_ppi$model)
    
    pred <- predict_cpi(mlp_mlp_ppi,
        AAseq = example_ppi[!train_idx, 1:2],
        batch_size = 32)
    pred_calss <- ifelse(pred$values > 0.5, 1, 0)
    table(pred_calss, example_ppi[!train_idx,3])
}

## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    validation_split <- 0.1
    idx <- sample(seq_len(length(example_pd[,1])))
    train_idx <- seq_len(length(example_pd[,1])) %in%
        idx[seq_len(round(length(example_pd[,1]) * (1 - validation_split)))]
    
    protein_embedding_dim <- 16
    protein_length_seq <- 30
    mlp_mlp_pd <- fit_cpi(
        AAseq = example_pd[train_idx, 1:2],
        outcome = example_pd[train_idx, 3],
        protein_length_seq = protein_length_seq,
        protein_embedding_dim = protein_embedding_dim,
        AAseq_val = example_pd[!train_idx, 1:2],
        outcome_val = example_pd[!train_idx, 3],
        net_args = list(
            protein = "mlp_in_out",
            protein_args = list(
                fc_units = c(10, 5),
                fc_activation = c("relu", "relu")),
            fc_units = c(1),
            fc_activation = c("sigmoid"),
            loss = "binary_crossentropy",
            optimizer = keras::optimizer_adam(),
            metrics = "accuracy"),
        epochs = 30, batch_size = 16,
        callbacks = keras::callback_early_stopping(
            monitor = "val_accuracy",
            patience = 10,
            restore_best_weights = TRUE))
    
    pred <- predict_cpi(mlp_mlp_pd,
        AAseq = example_pd[!train_idx, 1:2],
        batch_size = 16)
    pred_calss <- ifelse(pred$values > 0.5, 1, 0)
    table(pred_calss, example_pd[!train_idx, 3])
}

## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    validation_split <- 0.3
    idx <- sample(seq_len(length(example_chem[,1])))
    train_idx <- seq_len(length(example_chem[,1])) %in%
        idx[seq_len(round(length(example_chem[,1]) * (1 - validation_split)))]
    
    compound_length_seq <- 50
    compound_embedding_dim <- 16
    gcn_chem <- fit_cpi(
        smiles = example_chem[train_idx, 1],
        outcome = example_chem[train_idx, 2],
        compound_type = "sequence",
        compound_length_seq = compound_length_seq,
        compound_embedding_dim = compound_embedding_dim,
        smiles_val = example_chem[!train_idx, 1],
        outcome_val = example_chem[!train_idx, 2],
        net_args = list(
            compound = "mlp_in_out",
            compound_args = list(
                fc_units = c(5),
                fc_activation = c("relu")),
            fc_units = c(1),
            fc_activation = c("sigmoid"),
            loss='binary_crossentropy',
            optimizer = keras::optimizer_adam(),
            metrics = "accuracy"),
        epochs = 20, batch_size = 16,
        callbacks = keras::callback_early_stopping(
            monitor = "val_accuracy",
            patience = 10,
            restore_best_weights = TRUE))
    ttgsea::plot_model(gcn_chem$model)
    
    pred <- predict_cpi(gcn_chem, smiles = example_chem[!train_idx, 1])
    pred_calss <- ifelse(pred$values > 0.5, 1, 0)
    table(pred_calss, smiles = example_chem[!train_idx,2])
}

## ----eval=TRUE----------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    example_prot <- example_prot[1:500,]
    example_prot[,2] <- as.numeric(factor(example_prot[,2])) - 1
    
    validation_split <- 0.3
    idx <- sample(seq_len(length(example_prot[,1])))
    train_idx <- seq_len(length(example_prot[,1])) %in%
        idx[seq_len(round(length(example_prot[,1]) * (1 - validation_split)))]
    
    protein_embedding_dim <- 16
    protein_length_seq <- 100
    rnn_prot <- fit_cpi(
        AAseq = example_prot[train_idx, 1],
        outcome = to_categorical(example_prot[train_idx, 2]),
        protein_length_seq = protein_length_seq,
        protein_embedding_dim = protein_embedding_dim,
        AAseq_val = example_prot[!train_idx, 1],
        outcome_val = to_categorical(example_prot[!train_idx, 2]),
        net_args = list(
            protein = "rnn_in_out",
            protein_args = list(
                rnn_type = c("gru"),
                rnn_bidirectional = c(TRUE),
                rnn_units = c(50),
                rnn_activation = c("relu"),
                fc_units = c(10),
                fc_activation = c("relu")),
            fc_units = c(3),
            fc_activation = c("softmax"),
            loss = 'categorical_crossentropy',
            optimizer = keras::optimizer_adam(clipvalue = 0.5),
            metrics = "accuracy"),
        epochs = 20, batch_size = 64,
        callbacks = keras::callback_early_stopping(
            monitor = "val_accuracy",
            patience = 10,
            restore_best_weights = TRUE))
    ttgsea::plot_model(rnn_prot$model)
    
    val_index <- seq_len(length(example_prot[,2]))[!train_idx]
    if (!is.null(rnn_prot$preprocessing$removed_AAseq_val)) {
        pred <- predict_cpi(rnn_prot,
            AAseq = example_prot[val_index[-rnn_prot$preprocessing$removed_AAseq_val[[1]]], 1])
        pred_calss <- apply(pred$values, 1, which.max) - 1
        table(pred_calss, example_prot[val_index[-rnn_prot$preprocessing$removed_AAseq_val[[1]]], 2])
    } else {
        pred <- predict_cpi(rnn_prot, AAseq = example_prot[!train_idx, 1])
        pred_calss <- apply(pred$values, 1, which.max) - 1
        table(pred_calss, example_prot[!train_idx, 2])
    }
}

## -----------------------------------------------------------------------------
if (keras::is_keras_available() & reticulate::py_available()) {
    compound_length_seq <- 50
    protein_length_seq <- 500
    compound_embedding_dim <- 16
    protein_embedding_dim <- 16
    
    mlp_mlp <- fit_cpi(
        smiles = example_bioassay[,1],
        AAseq = example_bioassay[,2], 
        outcome = example_bioassay[,3],
        compound_type = "sequence",
        compound_length_seq = compound_length_seq,
        protein_length_seq = protein_length_seq,
        compound_embedding_dim = compound_embedding_dim,
        protein_embedding_dim = protein_embedding_dim,
        net_args = list(
            compound = "mlp_in_out",
            compound_args = list(
                fc_units = c(10, 5),
                fc_activation = c("relu", "relu")),
            protein = "mlp_in_out",
            protein_args = list(
                fc_units = c(10, 5),
                fc_activation = c("relu", "relu")),
            fc_units = c(1),
            fc_activation = c("sigmoid"),
            loss = 'binary_crossentropy',
            optimizer = keras::optimizer_adam(),
            metrics = "accuracy"),
        epochs = 20, batch_size = 64,
        validation_split = 0.3,
        verbose = 0,
        callbacks = keras::callback_early_stopping(
            monitor = "val_accuracy",
            patience = 5,
            restore_best_weights = TRUE))
    ttgsea::plot_model(mlp_mlp$model)
    
    pred <- predict_cpi(mlp_mlp,
        antiviral_drug[,2],
        rep(SARS_CoV2_3CL_Protease, nrow(antiviral_drug)))
    
    Result <- data.frame(antiviral_drug[,1], pred$values)
    colnames(Result) <- c("drug", "probability")
    Result[order(Result[,2], decreasing = TRUE),]
}

## ----eval=TRUE----------------------------------------------------------------
sessionInfo()