suppressMessages(library(MASS, quietly = T))
suppressMessages(library(ISLR, quietly = T))
suppressMessages(library(ggplot2, quietly = T))
suppressMessages(library(corrplot, quietly = T))
suppressMessages(library("readxl",quietly = T))
suppressMessages(library(Metrics,quietly = T))
suppressMessages(library(ggpubr, quietly = T))
suppressMessages(library(tidyverse, quietly = T))
suppressMessages(library(boot,quietly = T))
suppressMessages(require(corrplot))
suppressMessages(library(e1071))
Leemos los datos del archivo excel del Sesor, contiene las siguientes
variables: “Time” “A_10” “A_20” “A_30” “A_40” “X_Value”
“hilo” “fuerza” “incli_sup” “Incli_inf”
Sensor <- read_excel("C:/Users/User/Desktop/MasterIndustriales/Ingenia/ProbetA/Sensor.xlsx")
#View(Sensor)
Sensor$fuerza = 10 * Sensor$fuerza
Sensor=Sensor[,c(-11)]
names(Sensor)
## [1] "Time" "A_10" "A_20" "A_30" "A_40" "X_Value"
## [7] "hilo" "fuerza" "incli_sup" "Incli_inf"
“Time” “A_10” “A_20” “A_30” “A_40” “X_Value”
“hilo” “fuerza” “incli_sup” “Incli_inf”
dat_modelo=Sensor[c(1:2021),c(-6)]
modelo_simple <- lm(formula = fuerza ~ ., data = dat_modelo)
summary(modelo_simple)
##
## Call:
## lm(formula = fuerza ~ ., data = dat_modelo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -185.902 -43.778 -1.242 42.988 265.156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.124e+04 1.270e+03 -40.359 < 2e-16 ***
## Time 2.782e+00 3.026e-01 9.193 < 2e-16 ***
## A_10 -1.627e+00 7.013e-02 -23.194 < 2e-16 ***
## A_20 3.428e-01 3.215e-02 10.665 < 2e-16 ***
## A_30 1.473e+00 1.639e-01 8.983 < 2e-16 ***
## A_40 3.297e-01 1.721e-01 1.915 0.0556 .
## hilo 8.199e+01 1.373e+00 59.731 < 2e-16 ***
## incli_sup -6.786e+01 8.439e+00 -8.041 1.5e-15 ***
## Incli_inf 3.712e+02 2.386e+01 15.557 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 67.21 on 2012 degrees of freedom
## Multiple R-squared: 0.9975, Adjusted R-squared: 0.9975
## F-statistic: 9.957e+04 on 8 and 2012 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
#plot(modelo_simple)
par(mfrow=c(1,1))
#Predecimos valores con los mismos que realizamos el modelo#
fuerzaPredict = predict(object = modelo_simple, newdata = dat_modelo)
dat_modelo$fuerzaPredict = fuerzaPredict
Colors00<- c("Fuerza" = "black", "Fuerza_Predicha" = "turquoise4")
ggplot(data = dat_modelo) +
geom_point(aes(y = fuerza , x = Time, color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict , x = Time, color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Todas las variables y todos los datos") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
dat_modelo=Sensor[c(1:2021),c(-6)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
dim(train)
## [1] 1616 9
dim(test)
## [1] 405 9
modelo_train <- lm(formula = fuerza ~ ., data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
#Predichos vs Reales
ggplot(data = test) +
geom_point(aes(y = fuerza , x = Time, color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = Time, color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Test Data Set, Todas las variables") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
#Distribucion de los valores predichos
ggplot(data = test) +
geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") +
ggtitle("Test Data Set, distribución Valores")
ggplot(data = test) +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") +
ggtitle("Test Data Set, distribución Valores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#RMSE
(rmse_lineal_todos <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))
## [1] 69.40432
#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
ggplot() +
geom_point(aes(y = Error_Plot[c(100:300)] , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
#Corelation entre todas las varibales
corrplot.mixed(corr = cor(dat_modelo[,c("A_10", "A_20", "A_30", "A_40", "hilo", "fuerza",
"incli_sup", "Incli_inf")],
method = "pearson"))
Consideramos las siguientes variables “A_10” “A_20” “A_30” “A_40” “fuerza”
#GRAFICA GALGAS
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
colors <- c("A_10" = "aquamarine", "A_20" = "aquamarine1","A_30" = "aquamarine2", "A_40" = "aquamarine3")
ggplot(data = dat_modelo) +
geom_point(aes(y = A_10 , x = Time, color = "A_10"))+
geom_point(aes(y = A_20 , x = Time, color = "A_20"))+
geom_point(aes(y = A_30 , x = Time, color = "A_30"))+
geom_point(aes(y = A_40 , x = Time, color = "A_40")) +
ggtitle("Galgas") +
xlab("Time") + ylab("um/m")+
scale_color_manual(values = colors) +
theme(legend.position="bottom")+
labs(color = "Legend")
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
#"A_10" "A_20" "A_30" "A_40"
modelo_simple <- lm(formula = fuerza ~ ., data = dat_modelo)
summary(modelo_simple)
##
## Call:
## lm(formula = fuerza ~ ., data = dat_modelo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -358.51 -69.92 -7.02 56.85 381.84
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -546.00656 8.88178 -61.475 < 2e-16 ***
## A_10 -4.17022 0.05945 -70.150 < 2e-16 ***
## A_20 0.62878 0.04784 13.142 < 2e-16 ***
## A_30 3.68750 0.26680 13.821 < 2e-16 ***
## A_40 2.13034 0.27334 7.794 1.03e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 114.2 on 2016 degrees of freedom
## Multiple R-squared: 0.9927, Adjusted R-squared: 0.9927
## F-statistic: 6.86e+04 on 4 and 2016 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
#plot(modelo_simple)
par(mfrow=c(1,1))
#Predecimos valores con los mismos que realizamos el modelo#
fuerzaPredict = predict(object = modelo_simple, newdata = dat_modelo)
dat_modelo$fuerzaPredict = fuerzaPredict
ggplot(data = dat_modelo) +
geom_point(aes(y = fuerza , x = c(1:length(fuerza))), color = "aquamarine") +
geom_line(aes(y = fuerzaPredict , x = c(1:length(fuerza))), color = "black")
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
set.seed(10)
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
dim(train)
## [1] 1616 5
dim(test)
## [1] 405 5
modelo_train <- lm(formula = fuerza ~ ., data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
#Predichos vs Reales
ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Test Data Set, Todas las GALGAS") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
#Distribucion de los valores predichos
ggplot(data = test) +
geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") +
ggtitle("Distribucion_Valores")
ggplot(data = test) +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") +
ggtitle("Distribucion_Valores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
(rmse_lineal_galgas <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))
## [1] 117.1184
#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
ggplot() +
geom_point(aes(y = Error_Plot[c(100:300)] , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
#Corelation entre galgas y fuerza
corrplot.mixed(corr = cor(dat_modelo[,c("A_10", "A_20", "A_30", "A_40", "fuerza")],
method = "pearson"))
####A10####
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
modelo_simple <- lm(formula = fuerza ~ A_10, data = dat_modelo)
summary(modelo_simple)
##
## Call:
## lm(formula = fuerza ~ A_10, data = dat_modelo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -585.9 -338.9 186.7 216.7 351.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -42.753191 9.497209 -4.502 7.13e-06 ***
## A_10 0.769870 0.003788 203.229 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 288.6 on 2019 degrees of freedom
## Multiple R-squared: 0.9534, Adjusted R-squared: 0.9534
## F-statistic: 4.13e+04 on 1 and 2019 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
#plot(modelo_simple)
par(mfrow=c(1,1))
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
modelo_train <- lm(formula = fuerza ~ A_10, data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
#Predichos vs Reales
ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Test Data Set, A_10") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
#Distribución de los valores predichos
ggplot(data = test) +
geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") +
ggtitle("Distribución_Valores")
ggplot(data = test) +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") +
ggtitle("Distribución_Valores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
(rmse_A10 <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))
## [1] 286.2313
#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
ggplot() +
geom_point(aes(y = Error_Plot[c(100:300)] , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
#### A_20
####A_20####
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
modelo_simple <- lm(formula = fuerza ~ A_20, data = dat_modelo)
summary(modelo_simple)
##
## Call:
## lm(formula = fuerza ~ A_20, data = dat_modelo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -565.2 -306.6 146.7 218.8 330.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.679e+02 8.509e+00 -19.73 <2e-16 ***
## A_20 8.242e-01 3.807e-03 216.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 271.6 on 2019 degrees of freedom
## Multiple R-squared: 0.9587, Adjusted R-squared: 0.9587
## F-statistic: 4.688e+04 on 1 and 2019 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
#plot(modelo_simple)
par(mfrow=c(1,1))
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
modelo_train <- lm(formula = fuerza ~ A_20, data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
#Predichos vs Reales
ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerza))), color = "aquamarine") +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerza))), color = "black")
#Distribución de los valores predichos
ggplot(data = test) +
geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") +
ggtitle("Distribución_Valores")
ggplot(data = test) +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") +
ggtitle("Distribución_Valores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
(rmse_A20 <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))
## [1] 274.2799
#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
ggplot() +
geom_point(aes(y = Error_Plot[c(100:300)] , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
#### A_30
####A_30####
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
modelo_simple <- lm(formula = fuerza ~ A_30, data = dat_modelo)
summary(modelo_simple)
##
## Call:
## lm(formula = fuerza ~ A_30, data = dat_modelo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -550.9 -290.6 151.7 184.6 317.0
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -98.742424 8.050845 -12.27 <2e-16 ***
## A_30 0.974629 0.004142 235.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 250.7 on 2019 degrees of freedom
## Multiple R-squared: 0.9648, Adjusted R-squared: 0.9648
## F-statistic: 5.538e+04 on 1 and 2019 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
#plot(modelo_simple)
par(mfrow=c(1,1))
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
modelo_train <- lm(formula = fuerza ~ A_30, data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
#Predichos vs Reales
ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerza))), color = "aquamarine") +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerza))), color = "black")
#Distribución de los valores predichos
ggplot(data = test) +
geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") +
ggtitle("Distribución_Valores")
ggplot(data = test) +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") +
ggtitle("Distribución_Valores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
(rmse_A30 <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))
## [1] 258.1888
#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
ggplot() +
geom_point(aes(y = Error_Plot[c(100:300)] , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
#### A_40
####A_40####
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
modelo_simple <- lm(formula = fuerza ~ A_40, data = dat_modelo)
summary(modelo_simple)
##
## Call:
## lm(formula = fuerza ~ A_40, data = dat_modelo)
##
## Residuals:
## Min 1Q Median 3Q Max
## -531.6 -281.1 144.9 175.9 318.6
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.235e+02 7.793e+00 -15.85 <2e-16 ***
## A_40 1.160e+00 4.813e-03 240.94 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 245.1 on 2019 degrees of freedom
## Multiple R-squared: 0.9664, Adjusted R-squared: 0.9664
## F-statistic: 5.805e+04 on 1 and 2019 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
#plot(modelo_simple)
par(mfrow=c(1,1))
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
modelo_train <- lm(formula = fuerza ~ A_40, data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
#Predichos vs Reales
ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerza))), color = "aquamarine") +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerza))), color = "black")
#Distribución de los valores predichos
ggplot(data = test) +
geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") +
ggtitle("Distribución_Valores")
ggplot(data = test) +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") +
ggtitle("Distribución_Valores")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
(rmse_A40 <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))
## [1] 255.1003
#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
ggplot() +
geom_point(aes(y = Error_Plot[c(100:300)] , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") +
ggtitle("Error") +
xlab("Values") + ylab("Error(%)")
### Comparación
#####Comparación####
modelo <- c("A_10", "A_20","A_30","A_40","Todas_Galgas","Todas_Variables")
test.MSE <- c(rmse_A10 ,rmse_A20,rmse_A30,rmse_A40,rmse_lineal_galgas,rmse_lineal_todos)
comparacion <- data.frame(modelo = modelo, test.MSE = test.MSE)
p_comp00<- ggplot(data = comparacion, aes(x = reorder(x = modelo, X = test.MSE),
y = test.MSE, color = modelo,
label = round(test.MSE,2))) +
geom_point(size = 15) +
geom_text(color = "white", size = 4) +
labs(x = "Modelo regresión", y = "Test error(RMSE)", title = "RMSE all models lineal") + theme_bw() +
coord_flip() + theme(legend.position = "none")
#####
p_comp00
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
# Vector para almacenar el RSS de cada polinomio
rss <- rep (NA, 10)
for (i in 1:10){
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
modelo.poli <- lm(fuerza ~ poly(A_10, i) + poly(A_20, i) + poly(A_30, i)+ poly(A_40, i), data = train)
fuerzaPredict_test = predict(object = modelo.poli, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
rss[i] <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))
}
p_00<- ggplot(data = data.frame(polinomio = 1:10, rss = rss),
aes(x = polinomio, y = rss)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A10 ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))
p_00
Considerando el polinomio de grado 5 como el óptimo
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
# Vector para almacenar el RSS de cada polinomio
rss_10 <- rep (NA, 10)
rss_20 <- rep (NA, 10)
rss_30 <- rep (NA, 10)
rss_40 <- rep (NA, 10)
for (i in 1:10){
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
modelo.poli_10 <- lm(fuerza ~ poly(A_10, i), data = train)
modelo.poli_20 <- lm(fuerza ~ poly(A_20, i), data = train)
modelo.poli_30 <- lm(fuerza ~ poly(A_30, i), data = train)
modelo.poli_40 <- lm(fuerza ~ poly(A_40, i), data = train)
set.seed(2)
fuerzaPredict_test_10 = predict(object = modelo.poli_10, newdata = test)
test$fuerzaPredict_test_10 = fuerzaPredict_test_10
fuerzaPredict_test_20 = predict(object = modelo.poli_20, newdata = test)
test$fuerzaPredict_test_20 = fuerzaPredict_test_20
fuerzaPredict_test_30 = predict(object = modelo.poli_30, newdata = test)
test$fuerzaPredict_test_30 = fuerzaPredict_test_30
fuerzaPredict_test_40 = predict(object = modelo.poli_40, newdata = test)
test$fuerzaPredict_test_40 = fuerzaPredict_test_40
rss_10[i] <- sqrt(sum((fuerzaPredict_test_10 - test$fuerza)^2)/length(test$fuerza))
rss_20[i] <- sqrt(sum((fuerzaPredict_test_20 - test$fuerza)^2)/length(test$fuerza))
rss_30[i] <- sqrt(sum((fuerzaPredict_test_30 - test$fuerza)^2)/length(test$fuerza))
rss_40[i] <- sqrt(sum((fuerzaPredict_test_40 - test$fuerza)^2)/length(test$fuerza))
}
p_a10 <- ggplot(data = data.frame(polinomio = 1:10, rss_10 = rss_10),
aes(x = polinomio, y = rss_10)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A10 ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))
p_a20 <- ggplot(data = data.frame(polinomio = 1:10, rss_20 = rss_20),
aes(x = polinomio, y = rss_20)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A20 ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))
p_a30 <- ggplot(data = data.frame(polinomio = 1:10, rss_30 = rss_30),
aes(x = polinomio, y = rss_30)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A30 ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))
p_a40 <- ggplot(data = data.frame(polinomio = 1:10, rss_40 = rss_40),
aes(x = polinomio, y = rss_40)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A40 ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))
ggarrange(p_a10,p_a20,p_a30,p_a40,nrow=2, ncol=2,common.legend = TRUE, legend = "bottom")
modelo.poli_10 <- lm(fuerza ~ poly(A_10, 7), data = train)
modelo.poli_20 <- lm(fuerza ~ poly(A_20, 7), data = train)
modelo.poli_30 <- lm(fuerza ~ poly(A_30, 7), data = train)
modelo.poli_40 <- lm(fuerza ~ poly(A_40, 7), data = train)
fuerzaPredict_test_10 = predict(object = modelo.poli_10, newdata = test)
test$fuerzaPredict_test_10 = fuerzaPredict_test_10
p10<- ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test_10)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test_10 , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha_10 ~ Grado 7") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
fuerzaPredict_test_20 = predict(object = modelo.poli_20, newdata = test)
test$fuerzaPredict_test_20 = fuerzaPredict_test_20
p20<- ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test_20)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test_20 , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha_20 ~ Grado 7") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
fuerzaPredict_test_30 = predict(object = modelo.poli_30, newdata = test)
test$fuerzaPredict_test_30 = fuerzaPredict_test_30
p30<- ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test_30)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test_30 , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha_30 ~ Grado 7") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
fuerzaPredict_test_40 = predict(object = modelo.poli_40, newdata = test)
test$fuerzaPredict_test_40 = fuerzaPredict_test_40
p40<- ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test_40)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test_40 , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha_40 ~ Grado 7") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
ggarrange(p10,p20,p30,p40,nrow=2, ncol=2,common.legend = TRUE, legend = "bottom")
Considerando el polinomio de grado 7 como el óptimo en todos los casos
#####Comparación####
modelo01 <- c("A_10_G7", "A_20_G7","A_30_G7","A_40_G7","Todas_Galgas_G5")
test.MSE <- c(rss_10[7] ,rss_20[7],rss_30[7],rss_40[7],rss[5])
comparacion01 <- data.frame(modelo01 = modelo01, test.MSE = test.MSE)
p_comp01<- ggplot(data = comparacion01, aes(x = reorder(x = modelo01, X = test.MSE),
y = test.MSE, color = modelo01,
label = round(test.MSE,2))) +
geom_point(size = 15) +
geom_text(color = "white", size = 4) +
labs(x = "Modelo regresión", y = "Test error(RMSE)", title = "RMSE all models no lineal") + theme_bw() +
coord_flip() + theme(legend.position = "none")
#####
p_comp01
La Regresión con Vectores de Soporte o SVR por sus siglas en inglés, es un modelo de regresión basado en las Máquinas de Vectores de Soporte y que, grosso modo, son modelos capaces de generar clasificaciones o regresiones de datos no lineales a partir de la transformación de los datos de entrada a otros espacios de mayores dimensiones. En el caso de la regresión, la SVR busca encontrar aquella curva que sea capaz de ajustar los datos garantizando que la separación entre ésta y ciertos valores específicos del conjunto de entrenamiento (los vectores de soporte) sea la mayor posible. Para entender mejor el concepto, recomiendo echarle un ojo al video Support Vector Machine (SVM) - Fun and Easy Machine Learning.
dat_modelo=Sensor[c(1:2021),c(-6)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ ., data = train, type = "eps-regression")
fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
rss_all_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ ., data = train, type = "eps-regression")
fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
rss_galgas_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ A_10, data = train, type = "eps-regression")
fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
p10_svr<- ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha A_10") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
rss_a10_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ A_20, data = train, type = "eps-regression")
fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
p20_svr<- ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha A_20") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
rss_a20_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ A_30, data = train, type = "eps-regression")
fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
p30_svr<- ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha A_30") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
rss_a30_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ A_40, data = train, type = "eps-regression")
fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
p40_svr<- ggplot(data = test) +
geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
theme(legend.position="bottom")+
ylab ("Fuerza (Scaled)") +
ggtitle("Fuerza vs Predicha A_30") +
scale_color_manual(values = Colors00) +
labs(color = "Legend") +
theme(legend.position="bottom")
rss_a40_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))
ggarrange(p10_svr,p20_svr,p30_svr,p40_svr,nrow=2, ncol=2,common.legend = TRUE, legend = "bottom")
#####Comparación####
modelo01 <- c("A_10", "A_20","A_30","A_40","Todas_Galgas","Todas_Variables")
test.MSE <- c(rss_a10_svr ,rss_a20_svr,rss_a30_svr,rss_a40_svr,rss_galgas_svr,rss_all_svr)
comparacion01 <- data.frame(modelo01 = modelo01, test.MSE = test.MSE)
p_comp02<- ggplot(data = comparacion01, aes(x = reorder(x = modelo, X = test.MSE),
y = test.MSE, color = modelo,
label = round(test.MSE,2))) +
geom_point(size = 15) +
geom_text(color = "white", size = 4) +
labs(x = "Modelo regresión", y = "Test error(RMSE)", title = "RMSE all models SVR") + theme_bw() +
coord_flip() + theme(legend.position = "none")
#####
p_comp02
p_comp00
p_comp01
p_comp02
Considerando TODAS LAS GALGAS EN un modelo no lineal con un polinomio de G5
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
# Vector para almacenar el RSS de cada polinomio
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
modelo.poli <- lm(fuerza ~ poly(A_10, 5) + poly(A_20, 5) + poly(A_30, 5)+ poly(A_40, 5), data = train)
summary(modelo.poli)
##
## Call:
## lm(formula = fuerza ~ poly(A_10, 5) + poly(A_20, 5) + poly(A_30,
## 5) + poly(A_40, 5), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -175.041 -26.508 0.341 28.843 169.333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.493e+03 1.176e+00 -1269.060 < 2e-16 ***
## poly(A_10, 5)1 -3.054e+05 4.922e+03 -62.048 < 2e-16 ***
## poly(A_10, 5)2 -2.035e+04 1.905e+03 -10.681 < 2e-16 ***
## poly(A_10, 5)3 2.294e+04 6.445e+02 35.600 < 2e-16 ***
## poly(A_10, 5)4 2.290e+03 7.914e+02 2.894 0.003861 **
## poly(A_10, 5)5 9.474e+02 5.116e+02 1.852 0.064207 .
## poly(A_20, 5)1 9.976e+04 8.356e+03 11.939 < 2e-16 ***
## poly(A_20, 5)2 -2.542e+04 2.875e+03 -8.843 < 2e-16 ***
## poly(A_20, 5)3 -1.157e+04 1.287e+03 -8.990 < 2e-16 ***
## poly(A_20, 5)4 7.448e+03 9.707e+02 7.673 2.90e-14 ***
## poly(A_20, 5)5 9.101e+02 3.535e+02 2.574 0.010133 *
## poly(A_30, 5)1 1.653e+05 9.397e+03 17.593 < 2e-16 ***
## poly(A_30, 5)2 2.884e+04 3.806e+03 7.577 5.97e-14 ***
## poly(A_30, 5)3 -1.348e+04 2.347e+03 -5.741 1.13e-08 ***
## poly(A_30, 5)4 -4.264e+03 1.163e+03 -3.667 0.000253 ***
## poly(A_30, 5)5 8.102e+02 9.153e+02 0.885 0.376156
## poly(A_40, 5)1 9.307e+04 8.553e+03 10.881 < 2e-16 ***
## poly(A_40, 5)2 1.909e+04 3.438e+03 5.552 3.30e-08 ***
## poly(A_40, 5)3 3.105e+03 2.105e+03 1.475 0.140343
## poly(A_40, 5)4 -1.871e+03 1.014e+03 -1.845 0.065191 .
## poly(A_40, 5)5 -2.339e+03 7.951e+02 -2.942 0.003307 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.29 on 1595 degrees of freedom
## Multiple R-squared: 0.9988, Adjusted R-squared: 0.9987
## F-statistic: 6.441e+04 on 20 and 1595 DF, p-value: < 2.2e-16