## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5 ) ## ----eval=FALSE--------------------------------------------------------------- # install.packages('crrstep') ## ----message=FALSE, warning=FALSE--------------------------------------------- library(crrstep) library(cmprsk) ## ----------------------------------------------------------------------------- set.seed(123) n <- 400 age <- rnorm(n, mean = 60, sd = 10) sex <- factor(sample(c('Male', 'Female'), n, replace = TRUE)) treatment <- factor(sample(c('Control', 'Treatment'), n, replace = TRUE)) biomarker <- rnorm(n, mean = 100, sd = 20) linpred <- 0.03 * (age - 60) + 0.5 * (sex == 'Male') - 0.4 * (treatment == 'Treatment') + 0.01 * biomarker base_hazard <- 0.1 true_time <- rexp(n, rate = base_hazard * exp(linpred)) p_cause1 <- plogis(linpred) cause <- ifelse(runif(n) < p_cause1, 1, 2) censor_time <- rexp(n, rate = 0.05) ftime <- pmin(true_time, censor_time) fstatus <- ifelse(ftime == censor_time, 0, cause) sim_data <- data.frame(ftime, fstatus, age, sex, treatment, biomarker) table(sim_data$fstatus) ## ----------------------------------------------------------------------------- result_back <- crrstep( formula = ftime ~ age + sex + treatment + biomarker, scope.min = ~1, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'AIC', trace = TRUE ) print(result_back) ## ----------------------------------------------------------------------------- result_fwd <- crrstep( formula = ftime ~ age + sex + treatment + biomarker, scope.min = ~1, etype = fstatus, data = sim_data, failcode = 1, direction = 'forward', criterion = 'AIC', trace = TRUE ) print(result_fwd) ## ----------------------------------------------------------------------------- result_bic <- crrstep( formula = ftime ~ age + sex + treatment + biomarker, scope.min = ~1, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'BIC', trace = FALSE ) cat('AIC selected variables:', nrow(result_back$coefficients), '\\n') cat('BIC selected variables:', nrow(result_bic$coefficients), '\\n') ## ----------------------------------------------------------------------------- result_min <- crrstep( formula = ftime ~ age + sex + treatment + biomarker, scope.min = ~ age + sex, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'AIC', trace = TRUE ) print(result_min) ## ----------------------------------------------------------------------------- set.seed(456) sim_data$stage <- factor(sample(c('I', 'II', 'III', 'IV'), n, replace = TRUE)) sim_data$region <- factor(sample(c('North', 'South', 'East', 'West'), n, replace = TRUE)) result_factors <- crrstep( formula = ftime ~ age + sex + stage + region, scope.min = ~1, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'BIC', trace = TRUE ) summary(result_factors) coef(result_factors) logLik(result_factors) BIC(result_factors) ## ----------------------------------------------------------------------------- result_interact <- crrstep( formula = ftime ~ age + sex + treatment + age:sex + sex:treatment, scope.min = ~ age + sex, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'AIC', trace = TRUE ) print(result_interact) ## ----------------------------------------------------------------------------- sim_data$biomarker_c <- scale(sim_data$biomarker, scale = FALSE)[,1] result_poly <- crrstep( formula = ftime ~ age + sex + biomarker_c + I(biomarker_c^2), scope.min = ~1, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'AIC', trace = TRUE ) print(result_poly) ## ----------------------------------------------------------------------------- result_complex <- crrstep( formula = ftime ~ age + treatment + biomarker_c + I(biomarker_c^2) + biomarker_c:treatment + I(biomarker_c^2):treatment, scope.min = ~ age, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'BIC', trace = TRUE ) print(result_complex) ## ----------------------------------------------------------------------------- sim_data$biomarker_pos <- sim_data$biomarker - min(sim_data$biomarker) + 1 result_log <- crrstep( formula = ftime ~ age + sex + log(biomarker_pos) + log(biomarker_pos):sex, scope.min = ~1, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'AIC', trace = TRUE ) print(result_log) ## ----------------------------------------------------------------------------- result_threeway <- crrstep( formula = ftime ~ age + sex + treatment + stage + sex:treatment + sex:stage + treatment:stage + sex:treatment:stage, scope.min = ~ age, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'BIC', trace = TRUE ) print(result_threeway) ## ----------------------------------------------------------------------------- fit_full <- crrstep( formula = ftime ~ age + sex + treatment + biomarker, scope.min = ~1, etype = fstatus, data = sim_data, failcode = 1, direction = 'backward', criterion = 'AIC', trace = FALSE, crr.object = TRUE ) cat('Coefficients:\\n') print(fit_full$coef) cat('\\nLog-likelihood:\\n') print(fit_full$loglik) ## ----------------------------------------------------------------------------- sim_data_miss <- sim_data sim_data_miss$age[sample(1:n, 20)] <- NA sim_data_miss$biomarker[sample(1:n, 15)] <- NA result_miss <- crrstep( formula = ftime ~ age + sex + treatment + biomarker, scope.min = ~1, etype = fstatus, data = sim_data_miss, failcode = 1, direction = 'backward', criterion = 'AIC', trace = TRUE ) print(result_miss) ## ----------------------------------------------------------------------------- result_subset <- crrstep( formula = ftime ~ age + treatment + biomarker, scope.min = ~1, etype = fstatus, subset = sim_data$sex == 'Female', data = sim_data, failcode = 1, direction = 'backward', criterion = 'AIC', trace = FALSE ) print(result_subset) ## ----------------------------------------------------------------------------- sessionInfo()