Autorka: Asia Franaszek
 
 
library(nnet)
 
 
wino <- read.csv(file.choose(), header=T, sep=";", dec=".")
 
names(wino)
attach(wino)
 
dim(wino)
 
pairs(wino)
pairs(wino[sample(100),])
 
 
is.numeric(wino[,12])
 
wino[,12]<-factor(wino[,12])
is.factor(quality)
 
prop.table(table(quality))
 
set.seed(1)
losowa <- sample(nrow(wino), nrow(wino)/2)
 
 
 
wino.ucz<-wino[losowa,]
wino.walid<-wino[-losowa,]
 
dim(wino.ucz)
dim(wino.walid)
 
 
 
set.seed(1)
nn_wino1 <- nnet(factor(quality) ~ ., data=wino.ucz, size=2)
summary(nn_wino1)
 
beta0<-0
beta1<-2
alpha0<-0
alpha1<-.5
 
x<-seq(-20,20,by=.5)
t<-beta0+beta1*(1/(1+exp(-(alpha0+alpha1*x))))
plot(t~x, col='mediumpurple', lty=2, lwd=2, type='l')
 
 
 
 
summary(nn_wino1)
 
 
 
 
 
 
 
 
 
 
attributes(nn_wino1)
 
 
 
 
nn_wino1$softmax
 
######WARSTWY
library(nnet)
set.seed(1)
 
source.data <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
data.set <- read.table(source.data)
names(data.set) <- c("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM",
                     "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV")
data.set <- data.set[-2]
std.input.data <- scale(data.set[-ncol(data.set)])
data.set <- cbind(std.input.data, data.set[ncol(data.set)])
 
net1<-nnet(MEDV~., data = data$train, size = 5,
           linout = T, maxit = 10000,
           trace = FALSE, Wts = wts)
net2<-neuralnet(MEDV~CRIM+INDUS+CHAS+NOX+RM+AGE+DIS+RAD+     
  TAX+PTRATIO+B+LSTAT, data = data$train, hidden=5,
                linear.output = T,stepmax=20000,
                startweights = wts)
 
AND <- c(rep(0,7),1)
OR <- c(0,rep(1,7))
binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR)
net <- neuralnet(AND+OR~Var1+Var2+Var3,  binary.data, hidden=2, 
                 rep=10, err.fct="ce", linear.output=FALSE)
plot(net)
 
 
 
set.seed(1)
test <- nnet(factor(quality) ~ ., data=wino.ucz, size=10,maxit=1000)
 
 
nn_wino2=test
attributes(nn_wino2)
nn_wino2$value
 
nn_wino2 <- nnet(factor(quality) ~ ., data=wino.ucz, size=10,maxit=1000)
 
 
summary(nn_wino2)
 
 
pred1<-predict(nn_wino1, newdata=wino.walid, type="class")
pred2<-predict(nn_wino2, newdata=wino.walid, type="class")
table(pred1, wino.walid$quality)
table(pred2, wino.walid$quality)
 
error1<-mean(pred1!=wino.walid$quality)
error1
error2<-mean(pred2!=wino.walid$quality)
error2
 
 
regulacja <- seq(0, 5, length = 10)
 
 
nnerror<-numeric(length(regulacja))
for (i in 1:length(regulacja)){
  nnwino<-nnet(factor(quality) ~ .,
               data=wino.ucz,
               size=10,
               decay=regulacja[i],
               trace=F,maxit=1000)
  nnpred<-predict(nnwino, newdata=wino.walid, type="class")
  nnerror[i]<-mean(nnpred!=wino.walid$quality)
}
 
plot(regulacja,nnerror,type="l")
grid()
 
regulacja[which.min(nnerror)]
 
 
warstwa<-1:20
nnerror<-numeric(length(regulacja))
for (i in 1:length(warstwa)){
  nnwino<-nnet(factor(quality) ~ ., data=wino.ucz, size=warstwa[i], trace=F,maxit=1000)
  nnpred<-predict(nnwino, newdata=wino.walid, type="class")
  nnerror[i]<-mean(nnpred!=wino.walid$quality)
}
 
 
plot(warstwa,nnerror,type="l")
warstwa[which.min(nnerror)]
 
 
wino_sd<-sweep(wino[-12], 2, apply(wino[-12], 2,mean), FUN="-")
wino_sd<-sweep(wino_sd,2, apply(wino[-12],2,sd), FUN="/")
wino_sd[12]<-wino[12]
summary(wino_sd)
 
set.seed(1)
 
losowa<-sample(nrow(wino_sd),nrow(wino_sd)/2)
wino_sd.ucz<-wino_sd[losowa,]
wino_sd.walid<-wino_sd[-losowa,]
 
nn_wino_sd<-nnet(factor(quality)~., data=wino_sd.ucz, size=10, trace=T,maxit=1999)
pred_sd<-predict(nn_wino_sd, newdata=wino_sd.walid, type="class")
table(pred_sd, wino_sd.walid$quality)
 
 
1-sum(diag(table(pred_sd, wino_sd.walid$quality)))/sum(table(pred_sd, wino_sd.walid$quality))
 
mean(pred_sd!=wino_sd.walid$quality)
 
regulacja <- seq(0, 5, length = 10)
 
 
nnerror<-numeric(length(regulacja))
for (i in 1:length(regulacja)){
  nnwino<-nnet(factor(quality) ~ .,
               data=wino.ucz,
               size=10,
               decay=regulacja[i],
               trace=F,maxit=1000)
  nnpred<-predict(nnwino, newdata=wino.walid, type="class")
  nnerror[i]<-mean(nnpred!=wino.walid$quality)
}
 
plot(regulacja,nnerror,type="l")
grid()
 
 
 
 
 
# ZAGADNIENIE REGRESJI
 
 
 
library(nnet)
set.seed(1)
 
source.data <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data"
data.set <- read.table(source.data)
names(data.set) <- c("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM",
                     "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV")
data.set <- data.set[-2]
std.input.data <- scale(data.set[-ncol(data.set)])
data.set <- cbind(std.input.data, data.set[ncol(data.set)])
 
net1<-nnet(MEDV~., data = data$train, size = 5,
            linout = T, maxit = 10000,
           trace = FALSE, Wts = wts)
net2<-neuralnet(MEDV~CRIM+INDUS+CHAS+NOX+RM+AGE+DIS+RAD+     
TAX+PTRATIO+B+LSTAT, data = data$train, hidden=5,
             linear.output = T,stepmax=20000,
              startweights = wts)
 
AND <- c(rep(0,7),1)
OR <- c(0,rep(1,7))
binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR)
net <- neuralnet(AND+OR~Var1+Var2+Var3,  binary.data, hidden=2, 
                 rep=10, err.fct="ce", linear.output=FALSE)
plot(net)
 
 
 
 
train.frac <- valid.frac <- 1/3
rnd_split <- sample.int(nrow(data.set))
quan <- quantile(rnd_split, probs = c(0, train.frac,
                                      train.frac + valid.frac, 1))
rnd_class <- cut(rnd_split, quan, include.lowest = T,
                 labels=c("train", "valid", "test"))
data <- split(data.set, rnd_class)
 
 
 
 
neurons <- 5
decays <- seq(0, 4, length.out = 100)
wts <-  2 * runif(5 * ncol(data.set) + neurons + 1) - 1
train.err <- valid.err <- numeric(length(decays))
nets <- list()
 
 
#WINDOWS
pb <- winProgressBar("Postep w %", "0% zrobione", 0, 1, 0)
for (d in 1:length(decays)){
  nets[[d]] <- nnet(MEDV~., data = data$train, size = 5,
                    decay = decays[d], linout = T, maxit = 10000,
                    trace = FALSE, Wts = wts)
  train.err[d] <- mean(nets[[d]]$residuals ^ 2)
  pred <- predict(nets[[d]], newdata = data$valid)
  valid.err[d] <- mean((pred - data$valid$MEDV) ^ 2)
  perc <- d / length(decays)
  setWinProgressBar(pb, perc, "Postep w %", sprintf("%d%% zrobione",
                                                    round(100*perc)))
}
close(pb)
 
#UN*X
 
library('tcltk')
 
pb <- tkProgressBar("Postep w %", "0% zrobione", 0, 1, 0)
for (d in 1:length(decays)){
  nets[[d]] <- nnet(MEDV~., data = data$train, size = 5,
                    decay = decays[d], linout = T, maxit = 10000,
                    trace = FALSE, Wts = wts)
  train.err[d] <- mean(nets[[d]]$residuals ^ 2)
  pred <- predict(nets[[d]], newdata = data$valid)
  valid.err[d] <- mean((pred - data$valid$MEDV) ^ 2)
  perc <- d / length(decays)
 
  setTkProgressBar(pb, perc, "Postęp w %", sprintf("%d%% zrobione",
                                                    round(100*perc)))
}
close(pb)
 
 
 
 
 
 
best.net <- nets[[which.min(valid.err)]]
pred <- predict(best.net, newdata = data$test)
net.test.err <- mean((pred - data$test$MEDV) ^ 2)
 
ols <- lm(MEDV~., data = data$train)
ols.train.err <- mean(ols$residuals ^ 2)
pred <- predict(ols, newdata = data$valid)
ols.valid.err <- mean((pred - data$valid$MEDV) ^ 2)
pred <- predict(ols, newdata = data$test)
ols.test.err <- mean((pred - data$test$MEDV) ^ 2)
 
plot(decays, train.err, "l", lwd = 2, col = "red",
     xlab = "Parametr decay", ylab = "MSE",
     ylim = c(min(train.err), max(valid.err,ols.test.err)))
lines(decays, valid.err, "l", col = "blue", lwd = 2)
points(decays[which.min(valid.err)], min(valid.err), pch = 19,
       col = "blue", cex = 1.5)
points(decays[which.min(valid.err)], net.test.err, pch = 19,
       col = "green", cex = 1.5)
abline(h = ols.train.err, col = "red", lty = 2)
abline(h = ols.valid.err, col = "blue", lty = 2)
abline(h = ols.test.err, col = "green", lty = 2)
legend("topright", lty = c(1, 1, NA, 2, 2, 2),
       lwd = c(2, 2, NA, 1, 1, 1), col=c("red", "blue", "green"),
       pch = c(NA, NA, 19, NA, NA, NA),
       legend = c("Net train", "Net valid", "Net test",
                  "OLS train", "OLS valid", "OLS test"))                                                    
 
 
beton<-read.csv2("Concrete_Data.csv")
names(beton)
names(beton)<-c("cement", "zuzel", "popiol", "woda", "superplastyfikator",
                "krusz_grube", "krusz_drobne", "wiek", "CCS")
net2<-neuralnet(CCS~cement+zuzel+woda+wiek+popiol, data=beton,
                hidden=5)
net3<-neuralnet(CCS~cement+zuzel+woda+wiek+popiol, data=beton,
                hidden=c(3,2))
plot(net3)
 
names(beton)
attach(beton)
pairs(beton)
 
dim(beton)
 
 
beton_sd<-sweep(beton, 2, apply(beton, 2,mean), FUN="-")
beton_sd<-sweep(beton_sd,2, apply(beton,2,sd), FUN="/")
 
mean(beton_sd)
sd(beton_sd)
 
rnd_split <-runif(nrow(beton_sd))
quan <- quantile(rnd_split, probs=c(0,0.5,0.75,1))
rnd_class <- cut(rnd_split,quan,include.lowest=TRUE,labels=c("ucz","walid","test"))
beton_split<-split(beton_sd,rnd_class)
 
dim(beton_split$ucz)
dim(beton_split$walid)
dim(beton_split$test)
 
 
nn_beton<-nnet(CCS~., data=beton_split$ucz, size=10,linout=T)
 
 
 
nn_beton<-nnet(CCS~., data=beton_split$ucz,
               size=10,linout=T,maxit=1000)
 
betonpred<-predict(nn_beton, newdata=beton_split$walid)
mean((betonpred-beton_split$walid$CCS)^2)
 
regulacja <- seq(0, 1, length = 10)
 
nnerror<-numeric(length(regulacja))
for (i in 1:length(regulacja)){
  nn_beton<-nnet(CCS~., data=beton_split$ucz,
                 size=10,linout=T,maxit=1000,
                 decay=regulacja[i])
  betonpred<-predict(nn_beton, newdata=beton_split$walid)
  nnerror[i]<-mean((betonpred-beton_split$walid$CCS)^2)
}
 
plot(regulacja,nnerror,type="l")
grid()
 
opt_reg=regulacja[which.min(nnerror)]
 
opt_reg
 
nn_beton<-nnet(CCS~., data=beton_split$ucz,
               size=10,linout=T,maxit=1000,
               decay=opt_reg)
 
 
betonpred<-predict(nn_beton, newdata=beton_split$test)
mean((betonpred-beton_split$test$CCS)^2)
 
 
 
 
 
b_pred<-numeric(nrow(beton_split$walid))
for (i in 1:200){
  bootsample<-beton_split$ucz[sample(1:nrow(beton_split$ucz), replace=T),]
  nn_model<-nnet(CCS~., data=bootsample, size=10, linout=T, trace=F)
  b_pred<-b_pred+predict(nn_model, newdata=beton_split$walid)
}
 
b_pred<-b_pred/200
mean((b_pred-beton_split$walid$CCS)^2)