Autorka: Asia Franaszek library(nnet) wino <- read.csv(file.choose(), header=T, sep=";", dec=".") names(wino) attach(wino) dim(wino) pairs(wino) pairs(wino[sample(100),]) is.numeric(wino[,12]) wino[,12]<-factor(wino[,12]) is.factor(quality) prop.table(table(quality)) set.seed(1) losowa <- sample(nrow(wino), nrow(wino)/2) wino.ucz<-wino[losowa,] wino.walid<-wino[-losowa,] dim(wino.ucz) dim(wino.walid) set.seed(1) nn_wino1 <- nnet(factor(quality) ~ ., data=wino.ucz, size=2) summary(nn_wino1) beta0<-0 beta1<-2 alpha0<-0 alpha1<-.5 x<-seq(-20,20,by=.5) t<-beta0+beta1*(1/(1+exp(-(alpha0+alpha1*x)))) plot(t~x, col='mediumpurple', lty=2, lwd=2, type='l') summary(nn_wino1) attributes(nn_wino1) nn_wino1$softmax ######WARSTWY library(nnet) set.seed(1) source.data <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data" data.set <- read.table(source.data) names(data.set) <- c("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV") data.set <- data.set[-2] std.input.data <- scale(data.set[-ncol(data.set)]) data.set <- cbind(std.input.data, data.set[ncol(data.set)]) net1<-nnet(MEDV~., data = data$train, size = 5, linout = T, maxit = 10000, trace = FALSE, Wts = wts) net2<-neuralnet(MEDV~CRIM+INDUS+CHAS+NOX+RM+AGE+DIS+RAD+ TAX+PTRATIO+B+LSTAT, data = data$train, hidden=5, linear.output = T,stepmax=20000, startweights = wts) AND <- c(rep(0,7),1) OR <- c(0,rep(1,7)) binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR) net <- neuralnet(AND+OR~Var1+Var2+Var3, binary.data, hidden=2, rep=10, err.fct="ce", linear.output=FALSE) plot(net) set.seed(1) test <- nnet(factor(quality) ~ ., data=wino.ucz, size=10,maxit=1000) nn_wino2=test attributes(nn_wino2) nn_wino2$value nn_wino2 <- nnet(factor(quality) ~ ., data=wino.ucz, size=10,maxit=1000) summary(nn_wino2) pred1<-predict(nn_wino1, newdata=wino.walid, type="class") pred2<-predict(nn_wino2, newdata=wino.walid, type="class") table(pred1, wino.walid$quality) table(pred2, wino.walid$quality) error1<-mean(pred1!=wino.walid$quality) error1 error2<-mean(pred2!=wino.walid$quality) error2 regulacja <- seq(0, 5, length = 10) nnerror<-numeric(length(regulacja)) for (i in 1:length(regulacja)){ nnwino<-nnet(factor(quality) ~ ., data=wino.ucz, size=10, decay=regulacja[i], trace=F,maxit=1000) nnpred<-predict(nnwino, newdata=wino.walid, type="class") nnerror[i]<-mean(nnpred!=wino.walid$quality) } plot(regulacja,nnerror,type="l") grid() regulacja[which.min(nnerror)] warstwa<-1:20 nnerror<-numeric(length(regulacja)) for (i in 1:length(warstwa)){ nnwino<-nnet(factor(quality) ~ ., data=wino.ucz, size=warstwa[i], trace=F,maxit=1000) nnpred<-predict(nnwino, newdata=wino.walid, type="class") nnerror[i]<-mean(nnpred!=wino.walid$quality) } plot(warstwa,nnerror,type="l") warstwa[which.min(nnerror)] wino_sd<-sweep(wino[-12], 2, apply(wino[-12], 2,mean), FUN="-") wino_sd<-sweep(wino_sd,2, apply(wino[-12],2,sd), FUN="/") wino_sd[12]<-wino[12] summary(wino_sd) set.seed(1) losowa<-sample(nrow(wino_sd),nrow(wino_sd)/2) wino_sd.ucz<-wino_sd[losowa,] wino_sd.walid<-wino_sd[-losowa,] nn_wino_sd<-nnet(factor(quality)~., data=wino_sd.ucz, size=10, trace=T,maxit=1999) pred_sd<-predict(nn_wino_sd, newdata=wino_sd.walid, type="class") table(pred_sd, wino_sd.walid$quality) 1-sum(diag(table(pred_sd, wino_sd.walid$quality)))/sum(table(pred_sd, wino_sd.walid$quality)) mean(pred_sd!=wino_sd.walid$quality) regulacja <- seq(0, 5, length = 10) nnerror<-numeric(length(regulacja)) for (i in 1:length(regulacja)){ nnwino<-nnet(factor(quality) ~ ., data=wino.ucz, size=10, decay=regulacja[i], trace=F,maxit=1000) nnpred<-predict(nnwino, newdata=wino.walid, type="class") nnerror[i]<-mean(nnpred!=wino.walid$quality) } plot(regulacja,nnerror,type="l") grid() # ZAGADNIENIE REGRESJI library(nnet) set.seed(1) source.data <- "http://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data" data.set <- read.table(source.data) names(data.set) <- c("CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV") data.set <- data.set[-2] std.input.data <- scale(data.set[-ncol(data.set)]) data.set <- cbind(std.input.data, data.set[ncol(data.set)]) net1<-nnet(MEDV~., data = data$train, size = 5, linout = T, maxit = 10000, trace = FALSE, Wts = wts) net2<-neuralnet(MEDV~CRIM+INDUS+CHAS+NOX+RM+AGE+DIS+RAD+ TAX+PTRATIO+B+LSTAT, data = data$train, hidden=5, linear.output = T,stepmax=20000, startweights = wts) AND <- c(rep(0,7),1) OR <- c(0,rep(1,7)) binary.data <- data.frame(expand.grid(c(0,1), c(0,1), c(0,1)), AND, OR) net <- neuralnet(AND+OR~Var1+Var2+Var3, binary.data, hidden=2, rep=10, err.fct="ce", linear.output=FALSE) plot(net) train.frac <- valid.frac <- 1/3 rnd_split <- sample.int(nrow(data.set)) quan <- quantile(rnd_split, probs = c(0, train.frac, train.frac + valid.frac, 1)) rnd_class <- cut(rnd_split, quan, include.lowest = T, labels=c("train", "valid", "test")) data <- split(data.set, rnd_class) neurons <- 5 decays <- seq(0, 4, length.out = 100) wts <- 2 * runif(5 * ncol(data.set) + neurons + 1) - 1 train.err <- valid.err <- numeric(length(decays)) nets <- list() #WINDOWS pb <- winProgressBar("Postep w %", "0% zrobione", 0, 1, 0) for (d in 1:length(decays)){ nets[[d]] <- nnet(MEDV~., data = data$train, size = 5, decay = decays[d], linout = T, maxit = 10000, trace = FALSE, Wts = wts) train.err[d] <- mean(nets[[d]]$residuals ^ 2) pred <- predict(nets[[d]], newdata = data$valid) valid.err[d] <- mean((pred - data$valid$MEDV) ^ 2) perc <- d / length(decays) setWinProgressBar(pb, perc, "Postep w %", sprintf("%d%% zrobione", round(100*perc))) } close(pb) #UN*X library('tcltk') pb <- tkProgressBar("Postep w %", "0% zrobione", 0, 1, 0) for (d in 1:length(decays)){ nets[[d]] <- nnet(MEDV~., data = data$train, size = 5, decay = decays[d], linout = T, maxit = 10000, trace = FALSE, Wts = wts) train.err[d] <- mean(nets[[d]]$residuals ^ 2) pred <- predict(nets[[d]], newdata = data$valid) valid.err[d] <- mean((pred - data$valid$MEDV) ^ 2) perc <- d / length(decays) setTkProgressBar(pb, perc, "Postęp w %", sprintf("%d%% zrobione", round(100*perc))) } close(pb) best.net <- nets[[which.min(valid.err)]] pred <- predict(best.net, newdata = data$test) net.test.err <- mean((pred - data$test$MEDV) ^ 2) ols <- lm(MEDV~., data = data$train) ols.train.err <- mean(ols$residuals ^ 2) pred <- predict(ols, newdata = data$valid) ols.valid.err <- mean((pred - data$valid$MEDV) ^ 2) pred <- predict(ols, newdata = data$test) ols.test.err <- mean((pred - data$test$MEDV) ^ 2) plot(decays, train.err, "l", lwd = 2, col = "red", xlab = "Parametr decay", ylab = "MSE", ylim = c(min(train.err), max(valid.err,ols.test.err))) lines(decays, valid.err, "l", col = "blue", lwd = 2) points(decays[which.min(valid.err)], min(valid.err), pch = 19, col = "blue", cex = 1.5) points(decays[which.min(valid.err)], net.test.err, pch = 19, col = "green", cex = 1.5) abline(h = ols.train.err, col = "red", lty = 2) abline(h = ols.valid.err, col = "blue", lty = 2) abline(h = ols.test.err, col = "green", lty = 2) legend("topright", lty = c(1, 1, NA, 2, 2, 2), lwd = c(2, 2, NA, 1, 1, 1), col=c("red", "blue", "green"), pch = c(NA, NA, 19, NA, NA, NA), legend = c("Net train", "Net valid", "Net test", "OLS train", "OLS valid", "OLS test")) beton<-read.csv2("Concrete_Data.csv") names(beton) names(beton)<-c("cement", "zuzel", "popiol", "woda", "superplastyfikator", "krusz_grube", "krusz_drobne", "wiek", "CCS") net2<-neuralnet(CCS~cement+zuzel+woda+wiek+popiol, data=beton, hidden=5) net3<-neuralnet(CCS~cement+zuzel+woda+wiek+popiol, data=beton, hidden=c(3,2)) plot(net3) names(beton) attach(beton) pairs(beton) dim(beton) beton_sd<-sweep(beton, 2, apply(beton, 2,mean), FUN="-") beton_sd<-sweep(beton_sd,2, apply(beton,2,sd), FUN="/") mean(beton_sd) sd(beton_sd) rnd_split <-runif(nrow(beton_sd)) quan <- quantile(rnd_split, probs=c(0,0.5,0.75,1)) rnd_class <- cut(rnd_split,quan,include.lowest=TRUE,labels=c("ucz","walid","test")) beton_split<-split(beton_sd,rnd_class) dim(beton_split$ucz) dim(beton_split$walid) dim(beton_split$test) nn_beton<-nnet(CCS~., data=beton_split$ucz, size=10,linout=T) nn_beton<-nnet(CCS~., data=beton_split$ucz, size=10,linout=T,maxit=1000) betonpred<-predict(nn_beton, newdata=beton_split$walid) mean((betonpred-beton_split$walid$CCS)^2) regulacja <- seq(0, 1, length = 10) nnerror<-numeric(length(regulacja)) for (i in 1:length(regulacja)){ nn_beton<-nnet(CCS~., data=beton_split$ucz, size=10,linout=T,maxit=1000, decay=regulacja[i]) betonpred<-predict(nn_beton, newdata=beton_split$walid) nnerror[i]<-mean((betonpred-beton_split$walid$CCS)^2) } plot(regulacja,nnerror,type="l") grid() opt_reg=regulacja[which.min(nnerror)] opt_reg nn_beton<-nnet(CCS~., data=beton_split$ucz, size=10,linout=T,maxit=1000, decay=opt_reg) betonpred<-predict(nn_beton, newdata=beton_split$test) mean((betonpred-beton_split$test$CCS)^2) b_pred<-numeric(nrow(beton_split$walid)) for (i in 1:200){ bootsample<-beton_split$ucz[sample(1:nrow(beton_split$ucz), replace=T),] nn_model<-nnet(CCS~., data=bootsample, size=10, linout=T, trace=F) b_pred<-b_pred+predict(nn_model, newdata=beton_split$walid) } b_pred<-b_pred/200 mean((b_pred-beton_split$walid$CCS)^2)