Contents

1 Getting the data

1.1 Discovery

library(tidyverse)
library(cgdsr)
url <- "http://www.cbioportal.org/public-portal/"
cbiop <- CGDS(url)

studies <- getCancerStudies(cbiop) %>% as.tibble()
## View(studies)
study <- "skcm_tcga"

cases <- getCaseLists(cbiop, study) %>% as.tibble()
## View(cases)
allcases <- "skcm_tcga_all"
mRNAcases <- "skcm_tcga_rna_seq_v2_mrna"

profiles <- getGeneticProfiles(cbiop, study) %>% as.tibble()
## View(profiles)
profile = "skcm_tcga_rna_seq_v2_mrna_median_Zscores"

genes <- c('CD63', 'CD9', 'CD81')

1.2 Download

mRNA <- getProfileData(cbiop, genes, profile, mRNAcases) %>% 
    rownames_to_column("id") %>% as.tibble()

clinical <- getClinicalData(cbiop, allcases) %>%
    rownames_to_column("id") %>% as.tibble()

1.3 Cleaning

data <- full_join(clinical, mRNA)
## Joining, by = "id"
clean <- data %>% 
    filter(!is.na(CD63), !is.na(CD9), !is.na(CD81)) %>%
    select(
        id, SAMPLE_TYPE, 
        OS_STATUS, OS_MONTHS, 
        DFS_STATUS, DFS_MONTHS,
        CD63, CD9, CD81
    ) %>%
    mutate(
        SAMPLE_TYPE = factor(SAMPLE_TYPE),
        OS_STATUS = factor(OS_STATUS),
        DFS_STATUS = factor(DFS_STATUS)
    )

clean %>% summary()
##       id                           SAMPLE_TYPE     OS_STATUS  
##  Length:472         Additional Metastatic:  1           :  1  
##  Class :character   Metastatic           :368   DECEASED:223  
##  Mode  :character   Primary Tumor        :103   LIVING  :248  
##                                                               
##                                                               
##                                                               
##                                                               
##    OS_MONTHS                    DFS_STATUS    DFS_MONTHS    
##  Min.   : -0.07                      : 60   Min.   : -0.07  
##  1st Qu.: 16.02   DiseaseFree        :150   1st Qu.: 12.06  
##  Median : 36.68   Recurred/Progressed:262   Median : 27.30  
##  Mean   : 60.52                             Mean   : 50.12  
##  3rd Qu.: 78.62                             3rd Qu.: 64.59  
##  Max.   :369.65                             Max.   :368.50  
##  NA's   :10                                 NA's   :67      
##       CD63               CD9               CD81        
##  Min.   :-1.52280   Min.   :-0.9580   Min.   :-1.5369  
##  1st Qu.:-0.63860   1st Qu.:-0.5410   1st Qu.:-0.7140  
##  Median :-0.14630   Median :-0.1022   Median :-0.2895  
##  Mean   : 0.08805   Mean   : 0.1419   Mean   :-0.0110  
##  3rd Qu.: 0.52597   3rd Qu.: 0.5160   3rd Qu.: 0.3700  
##  Max.   : 5.46230   Max.   : 8.0387   Max.   : 5.1765  
## 

2 Analysis

2.1 Testing continuous variables

library(survival)
library(broom)   # tidy()
surv <- clean %>% with(Surv(OS_MONTHS, OS_STATUS == "DECEASED"))
fits <- bind_rows(
    clean %>% coxph(surv ~ CD63, data = .) %>% tidy(),
    clean %>% coxph(surv ~ CD9, data = .)  %>% tidy(),
    clean %>% coxph(surv ~ CD81, data = .) %>% tidy()
) %>% as.tibble()
fits
## # A tibble: 3 x 7
##    term    estimate  std.error statistic     p.value    conf.low  conf.high
##   <chr>       <dbl>      <dbl>     <dbl>       <dbl>       <dbl>      <dbl>
## 1  CD63  0.10642950 0.06902683  1.541857 0.123108363 -0.02886060 0.24171960
## 2   CD9 -0.07089716 0.07184821 -0.986763 0.323758816 -0.21171708 0.06992275
## 3  CD81  0.20512105 0.06619713  3.098640 0.001944113  0.07537706 0.33486504

2.2 Visualization as Kaplan-Meier curves

library(survminer)

cutpoints <- clean %>% 
    mutate(OS_STATUS = OS_STATUS == "DECEASED") %>% 
    surv_cutpoint("OS_MONTHS", "OS_STATUS", genes)
cats <- cutpoints %>% surv_categorize() %>% as.tibble()

surv <- cats %>% with(Surv(OS_MONTHS, OS_STATUS))
fit <- survfit(surv ~ CD81, data = cats)
ggsurvplot(fit, conf.int=TRUE)
## Warning in grid.Call.graphics(C_polygon, x$x, x$y, index): semi-transparency
## is not supported on this device: reported only once per page