# Final Part I data autompg; infile 'C:\Users\sungkyu\Dropbox\Teaching\1301-StatisticalPackages\Exams\auto-mpg.data'; input mpg cylinders displacement hp weight acc; run; proc sgscatter data = autompg; matrix mpg--acc; run; proc reg data =autompg; model mpg = cylinders--acc / vif; run; ods graphics on; proc reg data=autompg; model mpg = cylinders--acc / selection = stepwise; run; data autompg; set autompg; lmpg = log10(mpg); run; proc reg data=autompg; model lmpg = cylinders--acc / selection = stepwise; output out = autompgfit predicted = yhat residual = resid student = rstandard rstudent = rstudent h = leverage cookd = cooksd; run; proc print data = autompgfit; run; data autompgfit2; set autompgfit; if leverage > 0.08 then delete; run; proc reg data=autompgfit2; model lmpg = weight hp cylinders /vif; run; # Final Part II # 1 data(sleep) str(sleep) help(sleep) t.test(extra~group, data = sleep,paired = TRUE) # Do not use two-sample t-test attach(sleep) paired.difference = extra[group==1]-extra[group==2] qqnorm(paired.difference);qqline(paired.difference) hist(paired.difference) wilcox.test(extra~group, data = sleep, paired = TRUE) wilcox.test(paired.difference) detach(sleep) #2 getwd() col=read.table("college.txt",header = T, sep = "\t") dim(col) attach(col) sum(Alumni.giving > 50, na.rm = T) sum(is.na(Alumni.giving)) hist(Alumni.giving, breaks = 50) a<-School[Alumni.giving > 45] a[!is.na(a)] #3 boxplot(Full.time~ Tier, data =col) tapply(Full.time, Tier, mean, na.rm = T) tapply(Full.time, Tier, sd, na.rm = T) bartlett.test(Full.time~Tier, data = col) anova(lm(Full.time ~ Tier)) # not good oneway.test(Full.time ~ Tier) pairwise.t.test(Full.time, Tier) #4 bp<-read.table("hypertension.dat", header = F) drug = rep(rep(c("x","y","z"),c(12,12,12)),2) biofeed = rep(c("P","A"),c(36,36)) diet = rep(rep(c("No","Yes"),c(6,6)),6) bp = as.vector(t(bp)) hypertension <- data.frame(bp, drug, biofeed, diet) str(hypertension) head(hypertension) combined.factors <- factor(paste(diet,drug,biofeed)) boxplot(bp ~ combined.factors) anova(lm(bp~ ., data = hypertension)) #5 power.t.test(power = 0.9, delta = 0.6, sd = 1, sig.level = 0.05) #6 sign.test.pvalue <- function(x) { w <- sum(x > 0) p <- pbinom(w,length(x),0.5) return(p) } sign.test.pvalue(x) attach(sleep) paired.difference = extra[group==1]-extra[group==2] sign.test.pvalue(paired.difference)