install.packages('rpart') library(rpart) par(mfrow = c(1,1)) library(rpart) data(kyphosis) head(kyphosis) fit1 <- rpart(Kyphosis ~ ., data = kyphosis, parms=list(split = "information")) fit1 summary(fit1) plot(fit1) text(fit1, use.n = TRUE) predict(fit1, kyphosis,type = "prob") # class probabilities (default) table(predict(fit1, kyphosis,type = "class"),kyphosis$Kyphosis) # better than LDA? library(MASS) fit.lda <- lda(Kyphosis ~ ., data = kyphosis) fit.lda # show results table(predict(fit.lda)$class, kyphosis$Kyphosis) # compare various options par(mfrow = c(1,3)) fit <- rpart(Kyphosis ~ ., data = kyphosis, parms=list(split = "information"), control = rpart.control(cp = 0.03)) plot(fit) text(fit, use.n = TRUE) fit <- rpart(Kyphosis ~ ., data = kyphosis, parms=list(split = "gini")) plot(fit) text(fit, use.n = TRUE) fit <- rpart(Kyphosis ~ ., data = kyphosis, parms=list(split = "gini"), control = rpart.control(cp = 0.05)) plot(fit) text(fit, use.n = TRUE) par(mfrow = c(1,1)) # iris data (3-group classification) sub <- c(sample(1:50, 25), sample(51:100, 25), sample(101:150, 25)) fit <- rpart(Species ~ ., data = iris, subset = sub) fit.pruned.tree <-prune(fit, cp = 0.1) fit.pruned.tree table(predict(fit.pruned.tree , iris[-sub,], type = "class"), iris[-sub, "Species"]) plot(fit) text(fit, use.n = TRUE,cex = 0.7) ## regression library(MASS) data(Boston) head(Boston) lm.out<-lm(medv ~ ptratio, data = Boston) plot(medv ~ ptratio, data = Boston) abline(lm.out, col="red") fit <- rpart(medv ~ ptratio, data = Boston) fit.pruned.tree <-prune(fit, cp = 0.05) plot(fit.pruned.tree) text(fit.pruned.tree, use.n = TRUE) fit.pruned.tree plot(medv ~ ptratio, data = Boston) ord = order(Boston$ptratio) lines(Boston$ptratio[ord],predict(fit.pruned.tree,Boston[ord,])) fit <- rpart(medv ~ ptratio, data = Boston) fit.pruned.tree <-prune(fit, cp = 0.05) plot(fit.pruned.tree) text(fit.pruned.tree, use.n = TRUE) fit.pruned.tree plot(medv ~ ptratio, data = Boston) ord = order(Boston$ptratio) lines(Boston$ptratio[ord],predict(fit.pruned.tree,Boston[ord,])) fit <- rpart(medv ~ ptratio + nox, data = Boston) fit.pruned.tree <-prune(fit, cp = 0.01) plot(fit.pruned.tree) text(fit.pruned.tree, use.n = TRUE) fit.pruned.tree