#     Statystyczne systemy ucz¹ce siê - Cwiczenia w oparciu o pakiet R.
#     Jan Ćwik, Jan Mielniczuk
#     Oficyna Wydawnicza Politechniki Warszawskiej
#     Ostatnia aktualizacja: 16 października 2009
 
 
library(rpart)
 
 
fitness <- read.table("fitness.txt",header=TRUE)
fitness <- read.table(file.choose(), header = TRUE)
 
dim(fitness)
 
pairs(fitness)
 
 
fitness.rpart <- rpart(Oxygen ~ ., data=fitness,
                       cp=0.01, minsplit=2)
 
 
?rpart
 
fitness.rpart
 
plot(fitness.rpart,
     uniform=T, margin=0.1)
text(fitness.rpart)
 
 
fitness.rpart
 
 
 
print("wektor median zmiennych objasniajacych")
x0 <- apply(fitness[ ,-3],2,median)
print(x0)
 
print("pedykcja dla wektora median")
predict(fitness.rpart, newdata=data.frame(t(x0)))
 
predict(fitness.rpart)
 
 
 
 
 
 
 
 
summary(fitness.rpart)
 
 
 
par(mfrow=c(1,1))
 
predict(fitness.rpart,
        newdata=data.frame(Age=NaN, 
                           Weight=69, RunTime=NaN, RestPulse=57,
                           RunPulse=168, MaxPulse=172))
 
plot(fitness.rpart,
     uniform=TRUE,
     margin=0.1)
text(fitness.rpart)
 
 
 
 
set.seed(289)
 
fitness2.rpart <- rpart(Oxygen ~ ., data=fitness,
                        cp=0.001, minsplit=2)
 
fitness2.rpart
 
 
printcp(fitness2.rpart)
 
printcp(fitness.rpart)
 
 
 
plotcp(fitness2.rpart)
 
 
fitness3.rpart <- prune(fitness2.rpart, cp=0.03)
fitness12.rpart<- rpart(Oxygen ~ ., data=fitness,
                        cp=0.03)
 
 
print(fitness3.rpart)
print(fitness12.rpart)
 
 
gg=lm(Oxygen ~ ., data=fitness)
 
print(summary(gg))
 
print("RSS dla lm")
print(sum(gg$res^2))
 
print("RSS dla fitness.rpart")
print(sum(residuals(fitness.rpart)^2))
 
print("RSS dla fitness3.rpart")
print(sum(residuals(fitness3.rpart)^2))
 
 
 
 
fitness5.rpart <- rpart(Oxygen ~ RunTime + Age, data=fitness,
                        cp=0.00001, minsplit=2)
 
print(fitness5.rpart)
 
plot(fitness5.rpart, uniform=TRUE, margin=0.1)
 
text(fitness5.rpart)
 
 
plot(fitness$RunTime, fitness$Age, xlim=c(8.0,14.0), ylim=c(37,58),
     xlab="RunTime", ylab="Age")
 
lines(c(8.935,8.935), c(37,58))
lines(c(12.29,12.29), c(37,58))
lines(c(8.64,8.64), c(37,58))
lines(c(12.29,12.29), c(37,58))
lines(c(10.9,10.9), c(37,58))
lines(c(11.015,11.015), c(37,58))
lines(c(8.935,10.9), c(53,53))
 
 
 
len=60
 
xp <- seq(8.0, 15.0, length=len)
yp <- seq(35, 60, length=len)
siatka <- expand.grid(RunTime=xp, Age=yp)
 
zp <- predict(fitness5.rpart, siatka)
 
persp(xp, yp, matrix(zp, len), theta = 150, phi = 30, col = "lightblue", 
      zlim=c(37,60), xlab = "RunTime", ylab = "Age", zlab = "Oxygen")
 
 
 
 
library(MASS)
data(Cars93)
 
dim(Cars93)
 
names(Cars93)
 
 
table(Cars93$Type)
 
 
 
typ <- ifelse(Cars93$Type=="Large" | Cars93$Type=="Van", "D", "SR")
typ2 <- ifelse(Cars93$Type=="Small", "M", typ)
typ3 <- ifelse(Cars93$Type=="Sporty", "SP", typ2)
 
typ3 <- ifelse(Cars93$Type=="Sporty",
               "SP", ifelse(Cars93$Type=="Small",
                            "M", ifelse(Cars93$Type=="Large" | Cars93$Type=="Van",
                                        "D", "SR")))
 
table(typ3)
 
Cars93 = data.frame(Cars93, typ=typ3)
dim(Cars93)
names(Cars93)
 
 
set.seed(222) 
 
 
 
cars.tree <- rpart(typ ~ Length + Weight + EngineSize + Horsepower + RPM, 
                   data=Cars93, cp=0.0001, minsplit=5)
 
 
 
par(mfrow=c(1,1), mar=c(0,0,0,0), pty="m")
 
plot(cars.tree,compress=T,uniform=TRUE,
     branch=0.4,
     margin=0.2)
 
text(cars.tree, use.n=T, cex=0.7)
 
 
print(summary(cars.tree), digits=4)
 
 
printcp(cars.tree)
plotcp(cars.tree)
 
 
library('party')
cars2zmienne.tree <- ctree(typ ~ ., 
                           data=Cars93[,-3])
 
 
 
 
cars2zmienne.tree <- rpart(typ ~ Weight + EngineSize, 
                           data=Cars93, cp=0.03, minsplit=5)
 
 
cars2zmienne.tree <- rpart(typ ~ ., 
                           data=Cars93, cp=0.03, minsplit=5)
 
 
 
cars2zmienne.tree <- rpart(typ ~ Weight + EngineSize, 
                           data=Cars93, cp=0.03, minsplit=5)
 
 
print(cars2zmienne.tree)
 
 
 
plot(cars2zmienne.tree,compress=T,uniform=TRUE,branch=0.4,margin=0.2)
text(cars2zmienne.tree, use.n=TRUE, cex=0.7)
print(summary(cars2zmienne.tree), digits=4)
 
 
 
 
plot(c(1,5.70), c(1695,4105), type="p", xlab="Weight", ylab="EngineSize")
text(Cars93$EngineSize, Cars93$Weight, Cars93$typ, cex=0.6)
 
lines(c(1.95,1.95), c(1695,4105))
lines(c(1.95,5.70), c(3460,3460))
lines(c(3.2,3.2), c(1695,3460))
lines(c(1.95,5.70), c(3700,3700))
lines(c(3.25,3.25), c(3460,3700))
lines(c(3.25,5.70), c(3630,3630))
 
 
 
 
p=0.51
 
 
n=1000
 
 
rbinom(1,n,p)
rbinom(1,n,p)
rbinom(1,n,p)
rbinom(1,n,p)
rbinom(1,n,p)
 
 
 
rbinom(1,n,p)>500
rbinom(1,n,p)>500
rbinom(1,n,p)>500
rbinom(1,n,p)>500
rbinom(1,n,p)>500
 
prop.table(table(rbinom(1000000,n,p)>500))
 
 
library('randomForest')
sam93<-na.omit(Cars93)
which(sapply(sam93, function(y) nlevels(y) > 32))
las<-randomForest(y=sam93$typ,x=sam93[,-c(2,3,27, 28)], mtry=sqrt(dim(sam93)[2]), ntree=2500)