## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", eval = FALSE ) ## ----install------------------------------------------------------------------ # # Step 1: Install cmdstanr # install.packages("cmdstanr", # repos = c("https://stan-dev.r-universe.dev", # getOption("repos"))) # # # Step 2: Install CmdStan (only needed once) # library(cmdstanr) # install_cmdstan() # # # Step 3: Install clmstan # # devtools::install_github("t-momozaki/clmstan") ## ----quickstart--------------------------------------------------------------- # library(clmstan) # library(ordinal) # data(wine) # # # Fit a model with logit link # fit <- clm_stan(rating ~ temp + contact, data = wine, # chains = 4, iter = 2000, warmup = 1000) # # # View results # print(fit) # summary(fit) ## ----comparison--------------------------------------------------------------- # library(ordinal) # library(clmstan) # data(wine) # # # ordinal::clm (frequentist) # fit_freq <- clm(rating ~ temp + contact, data = wine, link = "logit") # # # clmstan::clm_stan (Bayesian) # fit_bayes <- clm_stan(rating ~ temp + contact, data = wine, link = "logit", # chains = 4, iter = 2000, warmup = 1000) # # # Compare coefficients # coef(fit_freq) # coef(fit_bayes) ## ----standard-links----------------------------------------------------------- # # View available link functions # supported_links("standard") # # # Logit (default) - proportional odds model # fit_logit <- clm_stan(rating ~ temp, data = wine, link = "logit", # chains = 2, iter = 1000, warmup = 500) # # # Probit - normal distribution # fit_probit <- clm_stan(rating ~ temp, data = wine, link = "probit", # chains = 2, iter = 1000, warmup = 500) # # # Complementary log-log - asymmetric (right-skewed) # fit_cloglog <- clm_stan(rating ~ temp, data = wine, link = "cloglog", # chains = 2, iter = 1000, warmup = 500) # # # Log-log - asymmetric (left-skewed) # fit_loglog <- clm_stan(rating ~ temp, data = wine, link = "loglog", # chains = 2, iter = 1000, warmup = 500) # # # Cauchit - heavy tails # fit_cauchit <- clm_stan(rating ~ temp, data = wine, link = "cauchit", # chains = 2, iter = 1000, warmup = 500) ## ----flexible-links----------------------------------------------------------- # # View flexible link functions # supported_links("flexible") # # # t-link with fixed df (heavier tails than logit) # fit_t8 <- clm_stan(rating ~ temp, data = wine, link = "tlink", # link_param = list(df = 8), # chains = 2, iter = 1000, warmup = 500) # # # t-link with estimated df (data-driven tail behavior) # fit_t_est <- clm_stan(rating ~ temp, data = wine, link = "tlink", # link_param = list(df = "estimate"), # chains = 2, iter = 1000, warmup = 500) # # # GEV link with estimated shape parameter # fit_gev <- clm_stan(rating ~ temp, data = wine, link = "gev", # link_param = list(xi = "estimate"), # chains = 2, iter = 1000, warmup = 500) # # # Aranda-Ordaz link (asymmetric) # fit_ao <- clm_stan(rating ~ temp, data = wine, link = "aranda_ordaz", # link_param = list(lambda = "estimate"), # chains = 2, iter = 1000, warmup = 500) # # # Symmetric Power link (skewness adjustment) # fit_sp <- clm_stan(rating ~ temp, data = wine, link = "sp", # base = "logit", # link_param = list(r = "estimate"), # chains = 2, iter = 1000, warmup = 500) # # # Log-gamma link # fit_lg <- clm_stan(rating ~ temp, data = wine, link = "log_gamma", # link_param = list(lambda = "estimate"), # chains = 2, iter = 1000, warmup = 500) # # # AEP link (asymmetric exponential power) # fit_aep <- clm_stan(rating ~ temp, data = wine, link = "aep", # link_param = list(theta1 = "estimate", theta2 = "estimate"), # chains = 2, iter = 1000, warmup = 500) ## ----thresholds--------------------------------------------------------------- # # View available threshold structures # supported_thresholds() ## ----threshold-flexible------------------------------------------------------- # fit_flex <- clm_stan(rating ~ temp, data = wine, threshold = "flexible", # chains = 2, iter = 1000, warmup = 500) ## ----threshold-equidistant---------------------------------------------------- # # Useful for Likert scales with assumed equal intervals # fit_equi <- clm_stan(rating ~ temp, data = wine, threshold = "equidistant", # chains = 2, iter = 1000, warmup = 500) ## ----threshold-symmetric------------------------------------------------------ # fit_sym <- clm_stan(rating ~ temp, data = wine, threshold = "symmetric", # chains = 2, iter = 1000, warmup = 500) ## ----priors-modern------------------------------------------------------------ # # Single prior # fit <- clm_stan(rating ~ temp, data = wine, # prior = prior(normal(0, 1), class = "b"), # chains = 2, iter = 1000, warmup = 500) # # # Multiple priors # fit <- clm_stan(rating ~ temp, data = wine, # prior = c( # prior(normal(0, 1), class = "b"), # prior(normal(0, 5), class = "Intercept") # ), # chains = 2, iter = 1000, warmup = 500) # # # Prior for link parameters # fit_gev <- clm_stan(rating ~ temp, data = wine, link = "gev", # link_param = list(xi = "estimate"), # prior = prior(normal(0, 0.5), class = "xi"), # chains = 2, iter = 1000, warmup = 500) ## ----dist-helpers------------------------------------------------------------- # normal(mu, sigma) # Normal distribution # gamma(alpha, beta) # Gamma distribution (shape, rate) # student_t(df, mu, sigma) # Student-t distribution # cauchy(mu, sigma) # Cauchy distribution # flat() # Flat (improper) prior ## ----priors-legacy------------------------------------------------------------ # fit <- clm_stan(rating ~ temp, data = wine, # prior = clm_prior(beta_sd = 1, c_sd = 5), # chains = 2, iter = 1000, warmup = 500) ## ----results-basic------------------------------------------------------------ # fit <- clm_stan(rating ~ temp + contact, data = wine, # chains = 4, iter = 2000, warmup = 1000) # # # Quick overview # print(fit) # # # Detailed summary with credible intervals # summary(fit) # summary(fit, probs = c(0.025, 0.5, 0.975)) # # # Extract point estimates # coef(fit) # Posterior mean (default) # coef(fit, type = "median") # Posterior median ## ----plots-------------------------------------------------------------------- # # Trace plots (check mixing) # plot(fit, type = "trace") # # # Posterior density # plot(fit, type = "dens") # # # Posterior intervals # plot(fit, type = "intervals") # # # Autocorrelation (check for high autocorrelation) # plot(fit, type = "acf") # # # Select specific parameters # plot(fit, type = "trace", pars = c("beta[1]", "beta[2]")) ## ----diagnostics-------------------------------------------------------------- # # Quick summary # diagnostics(fit) # # # Detailed output # diagnostics(fit, detail = TRUE) ## ----predict-class------------------------------------------------------------ # fit <- clm_stan(rating ~ temp + contact, data = wine, # chains = 4, iter = 2000, warmup = 1000) # # # Predict most likely category # pred_class <- predict(fit, type = "class") # head(pred_class) # # # Prediction for new data # newdata <- data.frame( # temp = factor("warm", levels = levels(wine$temp)), # contact = factor("yes", levels = levels(wine$contact)) # ) # predict(fit, newdata = newdata, type = "class") ## ----predict-probs------------------------------------------------------------ # # Predicted probabilities for each category # pred_probs <- predict(fit, type = "probs") # head(pred_probs) # # # Alternative: fitted values # fitted_vals <- fitted(fit) # head(fitted_vals) ## ----posterior-predict-------------------------------------------------------- # # Draw from posterior predictive distribution # y_rep <- posterior_predict(fit) # dim(y_rep) # draws x observations # # # Uncertainty in predictions # pred_draws <- predict(fit, type = "class", summary = FALSE) ## ----model-comparison--------------------------------------------------------- # # Fit competing models # fit1 <- clm_stan(rating ~ temp, data = wine, link = "logit", # chains = 4, iter = 2000, warmup = 1000) # fit2 <- clm_stan(rating ~ temp + contact, data = wine, link = "logit", # chains = 4, iter = 2000, warmup = 1000) # fit3 <- clm_stan(rating ~ temp + contact, data = wine, link = "probit", # chains = 4, iter = 2000, warmup = 1000) # # # Compute LOO-CV # loo1 <- loo(fit1) # loo2 <- loo(fit2) # loo3 <- loo(fit3) # # # View results # print(loo1) # # # Pareto k diagnostic plot # plot(loo1) # # # Compare models # loo::loo_compare(loo1, loo2, loo3) # # # WAIC is also available # waic(fit1) ## ----workflow----------------------------------------------------------------- # library(clmstan) # library(ordinal) # data(wine) # # # Step 1: Fit baseline model # set.seed(123) # fit_base <- clm_stan(rating ~ temp + contact, data = wine, # link = "logit", threshold = "flexible", # chains = 4, iter = 2000, warmup = 1000) # # # Step 2: Check convergence # diagnostics(fit_base) # plot(fit_base, type = "trace") # # # Step 3: Review results # summary(fit_base) # # # Step 4: Try alternative link functions # fit_probit <- clm_stan(rating ~ temp + contact, data = wine, # link = "probit", # chains = 4, iter = 2000, warmup = 1000) # # fit_gev <- clm_stan(rating ~ temp + contact, data = wine, # link = "gev", link_param = list(xi = "estimate"), # chains = 4, iter = 2000, warmup = 1000) # # # Step 5: Compare models # loo_base <- loo(fit_base) # loo_probit <- loo(fit_probit) # loo_gev <- loo(fit_gev) # loo::loo_compare(loo_base, loo_probit, loo_gev) # # # Step 6: Final model interpretation # summary(fit_base) # plot(fit_base, type = "intervals") ## ----troubleshoot-cmdstan----------------------------------------------------- # # Check CmdStan installation # cmdstanr::cmdstan_path() # cmdstanr::cmdstan_version() # # # If not found, install CmdStan: # cmdstanr::install_cmdstan() ## ----troubleshoot-convergence------------------------------------------------- # # Increase iterations and warmup # fit <- clm_stan(rating ~ temp, data = wine, # chains = 4, # iter = 4000, # warmup = 2000) # # # Increase adapt_delta (helps with divergences) # fit <- clm_stan(rating ~ temp, data = wine, # chains = 4, # iter = 2000, # warmup = 1000, # adapt_delta = 0.95) # # # Increase max_treedepth # fit <- clm_stan(rating ~ temp, data = wine, # chains = 4, # iter = 2000, # warmup = 1000, # max_treedepth = 12) ## ----troubleshoot-divergence-------------------------------------------------- # # High adapt_delta # fit <- clm_stan(rating ~ temp, data = wine, # chains = 4, # iter = 2000, # warmup = 1000, # adapt_delta = 0.99) ## ----troubleshoot-memory------------------------------------------------------ # # Reduce number of chains # fit <- clm_stan(rating ~ temp, data = wine, # chains = 2, # iter = 2000, # warmup = 1000) # # # Run chains in parallel (default) # fit <- clm_stan(rating ~ temp, data = wine, # chains = 4, # parallel_chains = 4)