## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(flexhaz) ## ----------------------------------------------------------------------------- # Exponential: constant hazard h(t) = lambda exp_dist <- dfr_exponential(lambda = 0.5) # Weibull: power-law hazard h(t) = (k/sigma)(t/sigma)^(k-1) weib_dist <- dfr_weibull(shape = 2, scale = 3) # Gompertz: exponentially increasing hazard h(t) = a*exp(b*t) gomp_dist <- dfr_gompertz(a = 0.01, b = 0.1) # Log-logistic: non-monotonic hazard (increases then decreases) ll_dist <- dfr_loglogistic(alpha = 10, beta = 2) print(exp_dist) is_dfr_dist(exp_dist) ## ----------------------------------------------------------------------------- # Custom: linear increasing hazard h(t) = a + b*t linear_dist <- dfr_dist( rate = function(t, par, ...) { a <- par[1] b <- par[2] a + b * t }, par = c(a = 0.1, b = 0.01) ) ## ----------------------------------------------------------------------------- h <- hazard(exp_dist) H <- cum_haz(exp_dist) # Evaluate at specific times h(1) # Hazard at t=1 h(c(1, 2, 3)) # Vectorized H(2) # Cumulative hazard at t=2 ## ----------------------------------------------------------------------------- S <- surv(exp_dist) F <- cdf(exp_dist) # Verify S(t) + F(t) = 1 t <- 2 c(survival = S(t), cdf = F(t), sum = S(t) + F(t)) ## ----------------------------------------------------------------------------- # Use stats::density which flexhaz implements as density.dfr_dist pdf_fn <- density(exp_dist) # For exponential: f(t) = lambda * exp(-lambda * t) t <- 1 lambda <- 0.5 c(computed = pdf_fn(t), analytical = lambda * exp(-lambda * t)) ## ----------------------------------------------------------------------------- Q <- inv_cdf(exp_dist) # Median of exponential is log(2)/lambda median_computed <- Q(0.5) median_analytical <- log(2) / 0.5 c(computed = median_computed, analytical = median_analytical) ## ----------------------------------------------------------------------------- samp <- sampler(exp_dist) set.seed(42) samples <- samp(1000) # Compare sample mean to theoretical mean (1/lambda = 2) c(sample_mean = mean(samples), theoretical = 1/0.5) ## ----------------------------------------------------------------------------- h <- hazard(exp_dist) # Use default lambda = 0.5 h(1) # Override with lambda = 2 h(1, par = c(2)) ## ----------------------------------------------------------------------------- # Simulate exact failure times from exponential(lambda=1) set.seed(123) true_lambda <- 1 n <- 50 times <- rexp(n, rate = true_lambda) # Create data frame with standard survival format # delta = 1 means exact observation, delta = 0 means censored df_exact <- data.frame(t = times, delta = rep(1, n)) head(df_exact) ## ----------------------------------------------------------------------------- dist <- dfr_dist( rate = function(t, par, ...) rep(par[1], length(t)), par = NULL # No default - must be supplied ) ll <- loglik(dist) # Evaluate at different parameter values ll(df_exact, par = c(0.5)) # lambda = 0.5 ll(df_exact, par = c(1.0)) # lambda = 1.0 (true value) ll(df_exact, par = c(2.0)) # lambda = 2.0 ## ----------------------------------------------------------------------------- s <- score(dist) s(df_exact, par = c(1.0)) # Should be close to 0 at MLE ## ----------------------------------------------------------------------------- H_ll <- hess_loglik(dist) hess <- H_ll(df_exact, par = c(1.0)) hess # Should be negative (concave at maximum) ## ----warning=FALSE------------------------------------------------------------ solver <- fit(dist) # Find MLE starting from initial guess result <- solver(df_exact, par = c(0.5), method = "BFGS") # Extract fitted parameters (the fisher_mle class from likelihood.model uses coef()) coef(result) # Compare to analytical MLE: lambda_hat = n / sum(t) analytical_mle <- n / sum(times) c(fitted = coef(result), analytical = analytical_mle, true = true_lambda) ## ----warning=FALSE------------------------------------------------------------ # Some observations are censored (patient still alive at study end) df_mixed <- data.frame( t = c(1, 2, 3, 4, 5, 6, 7, 8), delta = c(1, 1, 1, 0, 0, 1, 1, 0) # 0 = censored ) ll <- loglik(dist) # Fit with censored data solver <- fit(dist) result <- solver(df_mixed, par = c(0.5), method = "BFGS") coef(result) ## ----warning=FALSE------------------------------------------------------------ # Create Weibull DFR weibull <- dfr_dist( rate = function(t, par, ...) { k <- par[1] sigma <- par[2] (k / sigma) * (t / sigma)^(k - 1) } ) # Simulate Weibull data (shape=2, scale=3) set.seed(456) true_shape <- 2 true_scale <- 3 n <- 100 # Use inverse CDF sampling u <- runif(n) weibull_times <- true_scale * (-log(u))^(1/true_shape) df_weibull <- data.frame(t = weibull_times, delta = rep(1, n)) # Fit solver <- fit(weibull) result <- solver(df_weibull, par = c(1.5, 2.5), method = "BFGS") c(fitted_shape = coef(result)[1], true_shape = true_shape) c(fitted_scale = coef(result)[2], true_scale = true_scale) ## ----fig.alt="Bathtub hazard curve showing three phases: high infant mortality at t=0 that decreases, then a constant useful life period, followed by increasing wear-out hazard."---- # h(t) = a * exp(-b*t) + c + d * t^k # Three components: infant mortality + baseline + wear-out bathtub <- dfr_dist( rate = function(t, par, ...) { a <- par[1] # infant mortality magnitude b <- par[2] # infant mortality decay rate c <- par[3] # baseline hazard (useful life) d <- par[4] # wear-out coefficient k <- par[5] # wear-out exponent a * exp(-b * t) + c + d * t^k }, par = c(a = 1, b = 2, c = 0.02, d = 0.001, k = 2) ) # Plot the hazard function t_seq <- seq(0.01, 15, length.out = 200) h <- hazard(bathtub) plot(t_seq, sapply(t_seq, h), type = "l", xlab = "Time", ylab = "Hazard rate", main = "Bathtub hazard curve") ## ----------------------------------------------------------------------------- # Proportional hazards with covariate x # h(t, x) = h0(t) * exp(beta * x) # where h0(t) = Weibull baseline ph_model <- dfr_dist( rate = function(t, par, x = 0, ...) { k <- par[1] sigma <- par[2] beta <- par[3] baseline <- (k / sigma) * (t / sigma)^(k - 1) baseline * exp(beta * x) }, par = c(shape = 2, scale = 3, beta = 0.5) ) h <- hazard(ph_model) # Hazard for different covariate values h(2, x = 0) # Baseline h(2, x = 1) # Higher risk group ## ----------------------------------------------------------------------------- # Support is (0, Inf) for all DFR distributions support <- sup(exp_dist) print(support) # Parameters params(exp_dist) ## ----warning=FALSE, fig.alt="Cox-Snell residuals Q-Q plot showing model fit assessment."---- # Fit a model and check residuals set.seed(99) test_times <- rexp(80, rate = 0.3) test_df <- data.frame(t = test_times, delta = 1) # Fit exponential exp_fitted <- dfr_exponential() solver <- fit(exp_fitted) fit_result <- solver(test_df, par = c(0.5)) lambda_hat <- coef(fit_result) # Create fitted distribution with estimated parameters exp_final <- dfr_exponential(lambda = lambda_hat) # Q-Q plot of Cox-Snell residuals qqplot_residuals(exp_final, test_df) ## ----------------------------------------------------------------------------- mart_resid <- residuals(exp_final, test_df, type = "martingale") summary(mart_resid) # Large positive values: failed earlier than expected # Large negative values: survived longer than expected