# Using EDA to Analyze Logistic Regression Donner Party Example # Empirical Probability Plots # m = number of parameters # Ntot = number of observations # M = Number of simulations # clev = cutoff subscrpit for confidence level clev <- 1 m <- 3 Ntot <- 45 M <- 100 SimRes <- matrix(0,Ntot,M) SimStatus <- 0 df <- read.table("donnernum.txt",header=T) attach(df) # Create matrix of numeric data status <- df$STATUS fit <- glm(STATUS~AGE+SEX,family=binomial,data=df) b <- coef(fit) pidat <- exp(b[1] + b[2]*AGE + b[3]*SEX)/(1 + exp(b[1] + b[2]*AGE + b[3]*SEX)) resdat <- sort(status - pidat) for (k in 1:M) { for (j in 1:Ntot) { SimStatus[j] <- rbinom(1,1,pidat[j])} newfit <- glm(SimStatus~AGE+SEX,family=binomial,data=df) bsim <- coef(newfit) pisim <- exp(bsim[1] + bsim[2]*AGE + bsim[3]*SEX)/(1+exp(bsim[1] + bsim[2]*AGE + bsim[3]*SEX)) SimRes[,k] <- sort(SimStatus - pisim) } Simmedians <- apply(SimRes,1,median) Lsim <- SimRes[,clev] Upbnd <- M - clev + 1 Usim <- SimRes[,Upbnd] plot(Simmedians,resdat,main = "Empirical Probability Plot",ylab="Y - Phat",xlab="Quantiles from Simulations") lines(Simmedians,Lsim) lines(Simmedians,Usim)