suppressMessages(library(MASS, quietly = T))
suppressMessages(library(ISLR, quietly = T))
suppressMessages(library(ggplot2, quietly = T))
suppressMessages(library(corrplot, quietly = T))
suppressMessages(library("readxl",quietly = T))
suppressMessages(library(Metrics,quietly = T))
suppressMessages(library(ggpubr, quietly = T))
suppressMessages(library(tidyverse, quietly = T))
suppressMessages(library(boot,quietly = T))
suppressMessages(require(corrplot))
suppressMessages(library(e1071))

Sensor FSAE GRUPO 6

Leemos los datos del archivo excel del Sesor, contiene las siguientes variables: “Time” “A_10” “A_20” “A_30” “A_40” “X_Value”
“hilo” “fuerza” “incli_sup” “Incli_inf”

Sensor <- read_excel("C:/Users/User/Desktop/MasterIndustriales/Ingenia/ProbetA/Sensor.xlsx")
#View(Sensor)
Sensor$fuerza = 10 * Sensor$fuerza 
Sensor=Sensor[,c(-11)]
names(Sensor)

##  [1] "Time"      "A_10"      "A_20"      "A_30"      "A_40"      "X_Value"  
##  [7] "hilo"      "fuerza"    "incli_sup" "Incli_inf"

REGRESIÓN LINEAL

Todas las variables

“Time” “A_10” “A_20” “A_30” “A_40” “X_Value”
“hilo” “fuerza” “incli_sup” “Incli_inf”

dat_modelo=Sensor[c(1:2021),c(-6)]
modelo_simple <- lm(formula = fuerza ~ ., data = dat_modelo)
summary(modelo_simple)

## 
## Call:
## lm(formula = fuerza ~ ., data = dat_modelo)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -185.902  -43.778   -1.242   42.988  265.156 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.124e+04  1.270e+03 -40.359  < 2e-16 ***
## Time         2.782e+00  3.026e-01   9.193  < 2e-16 ***
## A_10        -1.627e+00  7.013e-02 -23.194  < 2e-16 ***
## A_20         3.428e-01  3.215e-02  10.665  < 2e-16 ***
## A_30         1.473e+00  1.639e-01   8.983  < 2e-16 ***
## A_40         3.297e-01  1.721e-01   1.915   0.0556 .  
## hilo         8.199e+01  1.373e+00  59.731  < 2e-16 ***
## incli_sup   -6.786e+01  8.439e+00  -8.041  1.5e-15 ***
## Incli_inf    3.712e+02  2.386e+01  15.557  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 67.21 on 2012 degrees of freedom
## Multiple R-squared:  0.9975, Adjusted R-squared:  0.9975 
## F-statistic: 9.957e+04 on 8 and 2012 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2)) 
#plot(modelo_simple)
par(mfrow=c(1,1)) 

#Predecimos valores con los mismos que realizamos el modelo#
fuerzaPredict = predict(object = modelo_simple, newdata = dat_modelo)
dat_modelo$fuerzaPredict = fuerzaPredict 
Colors00<- c("Fuerza" = "black", "Fuerza_Predicha" = "turquoise4")

ggplot(data = dat_modelo) +
  geom_point(aes(y = fuerza , x = Time, color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict , x = Time, color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Todas las variables y todos los datos")  +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

Train and Test prediction all variables

dat_modelo=Sensor[c(1:2021),c(-6)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
dim(train)

## [1] 1616    9

dim(test)

## [1] 405   9

modelo_train <- lm(formula = fuerza ~ ., data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test 

#Predichos vs Reales
ggplot(data = test) +
  geom_point(aes(y = fuerza , x = Time, color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = Time, color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Test Data Set, Todas las variables") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

#Distribucion de los valores predichos
ggplot(data = test) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") + 
  ggtitle("Test Data Set, distribución Valores")

ggplot(data = test) +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") + 
  ggtitle("Test Data Set, distribución Valores")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#RMSE
(rmse_lineal_todos <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))

## [1] 69.40432

#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
  geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

ggplot() +
  geom_point(aes(y = Error_Plot[c(100:300)]  , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

#Corelation entre todas las varibales
corrplot.mixed(corr = cor(dat_modelo[,c("A_10", "A_20", "A_30", "A_40", "hilo", "fuerza", 
                                        "incli_sup", "Incli_inf")],
                          method = "pearson"))

Todas las galgas

Consideramos las siguientes variables “A_10” “A_20” “A_30” “A_40” “fuerza”

#GRAFICA GALGAS
dat_modelo=Sensor[c(1:2021),c(1:5,8)]

colors <- c("A_10" = "aquamarine", "A_20" = "aquamarine1","A_30" = "aquamarine2", "A_40" = "aquamarine3")

ggplot(data = dat_modelo) +
  geom_point(aes(y = A_10 , x = Time, color = "A_10"))+
  geom_point(aes(y = A_20 , x = Time, color = "A_20"))+
  geom_point(aes(y = A_30 , x = Time, color = "A_30"))+
  geom_point(aes(y = A_40 , x = Time, color = "A_40")) + 
  ggtitle("Galgas") +
  xlab("Time") + ylab("um/m")+
  scale_color_manual(values = colors) + 
  theme(legend.position="bottom")+
  labs(color = "Legend")

dat_modelo=Sensor[c(1:2021),c(2:5,8)]

#"A_10"      "A_20"      "A_30"      "A_40"
modelo_simple <- lm(formula = fuerza ~ ., data = dat_modelo)
summary(modelo_simple)

## 
## Call:
## lm(formula = fuerza ~ ., data = dat_modelo)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -358.51  -69.92   -7.02   56.85  381.84 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -546.00656    8.88178 -61.475  < 2e-16 ***
## A_10          -4.17022    0.05945 -70.150  < 2e-16 ***
## A_20           0.62878    0.04784  13.142  < 2e-16 ***
## A_30           3.68750    0.26680  13.821  < 2e-16 ***
## A_40           2.13034    0.27334   7.794 1.03e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 114.2 on 2016 degrees of freedom
## Multiple R-squared:  0.9927, Adjusted R-squared:  0.9927 
## F-statistic: 6.86e+04 on 4 and 2016 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2)) 
#plot(modelo_simple)
par(mfrow=c(1,1))
#Predecimos valores con los mismos que realizamos el modelo#
fuerzaPredict = predict(object = modelo_simple, newdata = dat_modelo)
dat_modelo$fuerzaPredict = fuerzaPredict 

ggplot(data = dat_modelo) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerza))), color = "aquamarine") +
  geom_line(aes(y = fuerzaPredict , x = c(1:length(fuerza))), color = "black")

Train and Test prediction all GALGAS

dat_modelo=Sensor[c(1:2021),c(2:5,8)]
set.seed(10)
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
dim(train)

## [1] 1616    5

dim(test)

## [1] 405   5

modelo_train <- lm(formula = fuerza ~ ., data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test 

#Predichos vs Reales
ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Test Data Set, Todas las GALGAS") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

#Distribucion de los valores predichos
ggplot(data = test) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") + 
  ggtitle("Distribucion_Valores")

ggplot(data = test) +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") + 
  ggtitle("Distribucion_Valores")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(rmse_lineal_galgas <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))

## [1] 117.1184

#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
  geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

ggplot() +
  geom_point(aes(y = Error_Plot[c(100:300)]  , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

#Corelation entre galgas y fuerza
corrplot.mixed(corr = cor(dat_modelo[,c("A_10", "A_20", "A_30", "A_40", "fuerza")],
                          method = "pearson"))

Cada galga por separado

A_10

####A10####
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
modelo_simple <- lm(formula = fuerza ~ A_10, data = dat_modelo)
summary(modelo_simple)

## 
## Call:
## lm(formula = fuerza ~ A_10, data = dat_modelo)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -585.9 -338.9  186.7  216.7  351.9 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -42.753191   9.497209  -4.502 7.13e-06 ***
## A_10          0.769870   0.003788 203.229  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 288.6 on 2019 degrees of freedom
## Multiple R-squared:  0.9534, Adjusted R-squared:  0.9534 
## F-statistic: 4.13e+04 on 1 and 2019 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2)) 
#plot(modelo_simple)
par(mfrow=c(1,1))

Train and Test prediction A_10

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]

modelo_train <- lm(formula = fuerza ~ A_10, data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test 

#Predichos vs Reales
ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Test Data Set, A_10") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

#Distribución de los valores predichos
ggplot(data = test) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") + 
  ggtitle("Distribución_Valores")

ggplot(data = test) +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") + 
  ggtitle("Distribución_Valores")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(rmse_A10 <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))

## [1] 286.2313

#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
  geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

ggplot() +
  geom_point(aes(y = Error_Plot[c(100:300)]  , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

#### A_20

####A_20####
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
modelo_simple <- lm(formula = fuerza ~ A_20, data = dat_modelo)
summary(modelo_simple)

## 
## Call:
## lm(formula = fuerza ~ A_20, data = dat_modelo)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -565.2 -306.6  146.7  218.8  330.2 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.679e+02  8.509e+00  -19.73   <2e-16 ***
## A_20         8.242e-01  3.807e-03  216.52   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 271.6 on 2019 degrees of freedom
## Multiple R-squared:  0.9587, Adjusted R-squared:  0.9587 
## F-statistic: 4.688e+04 on 1 and 2019 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2)) 
#plot(modelo_simple)
par(mfrow=c(1,1))

Train and Test prediction A_20

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]

modelo_train <- lm(formula = fuerza ~ A_20, data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test 

#Predichos vs Reales
ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerza))), color = "aquamarine") +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerza))), color = "black")

#Distribución de los valores predichos
ggplot(data = test) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") + 
  ggtitle("Distribución_Valores")

ggplot(data = test) +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") + 
  ggtitle("Distribución_Valores")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(rmse_A20 <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))

## [1] 274.2799

#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
  geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

ggplot() +
  geom_point(aes(y = Error_Plot[c(100:300)]  , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

#### A_30

####A_30####
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
modelo_simple <- lm(formula = fuerza ~ A_30, data = dat_modelo)
summary(modelo_simple)

## 
## Call:
## lm(formula = fuerza ~ A_30, data = dat_modelo)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -550.9 -290.6  151.7  184.6  317.0 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -98.742424   8.050845  -12.27   <2e-16 ***
## A_30          0.974629   0.004142  235.33   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 250.7 on 2019 degrees of freedom
## Multiple R-squared:  0.9648, Adjusted R-squared:  0.9648 
## F-statistic: 5.538e+04 on 1 and 2019 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2)) 
#plot(modelo_simple)
par(mfrow=c(1,1))

Train and Test prediction A_30

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]

modelo_train <- lm(formula = fuerza ~ A_30, data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test 

#Predichos vs Reales
ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerza))), color = "aquamarine") +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerza))), color = "black")

#Distribución de los valores predichos
ggplot(data = test) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") + 
  ggtitle("Distribución_Valores")

ggplot(data = test) +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") + 
  ggtitle("Distribución_Valores")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(rmse_A30 <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))

## [1] 258.1888

#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
  geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

ggplot() +
  geom_point(aes(y = Error_Plot[c(100:300)]  , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

#### A_40

####A_40####
dat_modelo=Sensor[c(1:2021),c(2:5,8)]
modelo_simple <- lm(formula = fuerza ~ A_40, data = dat_modelo)
summary(modelo_simple)

## 
## Call:
## lm(formula = fuerza ~ A_40, data = dat_modelo)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -531.6 -281.1  144.9  175.9  318.6 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.235e+02  7.793e+00  -15.85   <2e-16 ***
## A_40         1.160e+00  4.813e-03  240.94   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 245.1 on 2019 degrees of freedom
## Multiple R-squared:  0.9664, Adjusted R-squared:  0.9664 
## F-statistic: 5.805e+04 on 1 and 2019 DF,  p-value: < 2.2e-16

par(mfrow=c(2,2)) 
#plot(modelo_simple)
par(mfrow=c(1,1))

Train and Test prediction A_40

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]

modelo_train <- lm(formula = fuerza ~ A_40, data = train)
fuerzaPredict_test = predict(object = modelo_train, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test 

#Predichos vs Reales
ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerza))), color = "aquamarine") +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerza))), color = "black")

#Distribución de los valores predichos
ggplot(data = test) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho")) +
  geom_density(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), type = "real") + 
  ggtitle("Distribución_Valores")

ggplot(data = test) +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerzaPredict_test, color = "Predicho"), fill ="white") +
  geom_histogram(alpha=0.5, position="identity", aes(x=fuerza, color = "Real"), fill ="lightblue") + 
  ggtitle("Distribución_Valores")

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

(rmse_A40 <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza)))

## [1] 255.1003

#Error Absoluto
Error_Plot = abs(fuerzaPredict_test - test$fuerza)*100/test$fuerza
ggplot() +
  geom_point(aes(y = Error_Plot , x = c(1:length(Error_Plot))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

ggplot() +
  geom_point(aes(y = Error_Plot[c(100:300)]  , x = c(1:length(Error_Plot[c(100:300)]))), color = "black") + 
  ggtitle("Error") +
  xlab("Values") + ylab("Error(%)")

### Comparación

  #####Comparación####
  modelo <- c("A_10", "A_20","A_30","A_40","Todas_Galgas","Todas_Variables")
  test.MSE <- c(rmse_A10 ,rmse_A20,rmse_A30,rmse_A40,rmse_lineal_galgas,rmse_lineal_todos)
  
  comparacion <- data.frame(modelo = modelo, test.MSE = test.MSE)
  
  p_comp00<- ggplot(data = comparacion, aes(x = reorder(x = modelo, X = test.MSE), 
                                 y = test.MSE, color = modelo, 
                                 label = round(test.MSE,2))) + 
    geom_point(size = 15) + 
    geom_text(color = "white", size = 4) + 
    labs(x = "Modelo regresión", y = "Test error(RMSE)", title = "RMSE all models lineal") + theme_bw() + 
    coord_flip() + theme(legend.position = "none")
  #####
  p_comp00

REGESIONES NO LINEALES

Todas las Galgas

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
# Vector para almacenar el RSS de cada polinomio
rss <- rep (NA, 10)


for (i in 1:10){
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
modelo.poli <- lm(fuerza ~ poly(A_10, i) + poly(A_20, i) + poly(A_30, i)+ poly(A_40, i), data = train)
fuerzaPredict_test = predict(object = modelo.poli, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test
rss[i] <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))
}

p_00<- ggplot(data = data.frame(polinomio = 1:10, rss = rss), 
       aes(x = polinomio, y = rss)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A10  ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))

p_00

Considerando el polinomio de grado 5 como el óptimo

Cada galga por separado

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
# Vector para almacenar el RSS de cada polinomio
rss_10 <- rep (NA, 10)
rss_20 <- rep (NA, 10)
rss_30 <- rep (NA, 10)
rss_40 <- rep (NA, 10)


for (i in 1:10){
dat_modelo=Sensor[c(1:2021),c(1:5,8)]
modelo.poli_10 <- lm(fuerza ~ poly(A_10, i), data = train)
modelo.poli_20 <- lm(fuerza ~ poly(A_20, i), data = train)
modelo.poli_30 <- lm(fuerza ~ poly(A_30, i), data = train)
modelo.poli_40 <- lm(fuerza ~ poly(A_40, i), data = train)

set.seed(2)
fuerzaPredict_test_10 = predict(object = modelo.poli_10, newdata = test)
test$fuerzaPredict_test_10 = fuerzaPredict_test_10

fuerzaPredict_test_20 = predict(object = modelo.poli_20, newdata = test)
test$fuerzaPredict_test_20 = fuerzaPredict_test_20

fuerzaPredict_test_30 = predict(object = modelo.poli_30, newdata = test)
test$fuerzaPredict_test_30 = fuerzaPredict_test_30

fuerzaPredict_test_40 = predict(object = modelo.poli_40, newdata = test)
test$fuerzaPredict_test_40 = fuerzaPredict_test_40

rss_10[i] <- sqrt(sum((fuerzaPredict_test_10 - test$fuerza)^2)/length(test$fuerza))

rss_20[i] <- sqrt(sum((fuerzaPredict_test_20 - test$fuerza)^2)/length(test$fuerza))

rss_30[i] <- sqrt(sum((fuerzaPredict_test_30 - test$fuerza)^2)/length(test$fuerza))

rss_40[i] <- sqrt(sum((fuerzaPredict_test_40 - test$fuerza)^2)/length(test$fuerza))
}

p_a10 <- ggplot(data = data.frame(polinomio = 1:10, rss_10 = rss_10), 
       aes(x = polinomio, y = rss_10)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A10  ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))

p_a20 <- ggplot(data = data.frame(polinomio = 1:10, rss_20 = rss_20), 
       aes(x = polinomio, y = rss_20)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A20  ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))

p_a30 <- ggplot(data = data.frame(polinomio = 1:10, rss_30 = rss_30), 
       aes(x = polinomio, y = rss_30)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A30  ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))

p_a40 <- ggplot(data = data.frame(polinomio = 1:10, rss_40 = rss_40), 
       aes(x = polinomio, y = rss_40)) +
geom_point(color = "orangered2") +
geom_path() +
scale_x_continuous(breaks = 0:10) +
labs(title = "cv.MSE_A40  ~ Grado de polinomio") +
theme_bw() +
theme(panel.background = element_blank(), panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
theme(plot.title = element_text(hjust = 0.5))

ggarrange(p_a10,p_a20,p_a30,p_a40,nrow=2, ncol=2,common.legend = TRUE, legend = "bottom")

modelo.poli_10 <- lm(fuerza ~ poly(A_10, 7), data = train)
modelo.poli_20 <- lm(fuerza ~ poly(A_20, 7), data = train)
modelo.poli_30 <- lm(fuerza ~ poly(A_30, 7), data = train)
modelo.poli_40 <- lm(fuerza ~ poly(A_40, 7), data = train)

fuerzaPredict_test_10 = predict(object = modelo.poli_10, newdata = test)
test$fuerzaPredict_test_10 = fuerzaPredict_test_10

p10<- ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test_10)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test_10 , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha_10  ~ Grado 7") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

fuerzaPredict_test_20 = predict(object = modelo.poli_20, newdata = test)
test$fuerzaPredict_test_20 = fuerzaPredict_test_20

p20<- ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test_20)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test_20 , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha_20  ~ Grado 7") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

fuerzaPredict_test_30 = predict(object = modelo.poli_30, newdata = test)
test$fuerzaPredict_test_30 = fuerzaPredict_test_30

p30<- ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test_30)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test_30 , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha_30  ~ Grado 7") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

fuerzaPredict_test_40 = predict(object = modelo.poli_40, newdata = test)
test$fuerzaPredict_test_40 = fuerzaPredict_test_40

p40<- ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test_40)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test_40 , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha_40  ~ Grado 7") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

ggarrange(p10,p20,p30,p40,nrow=2, ncol=2,common.legend = TRUE, legend = "bottom")

Considerando el polinomio de grado 7 como el óptimo en todos los casos

Comparación

  #####Comparación####
  modelo01 <- c("A_10_G7", "A_20_G7","A_30_G7","A_40_G7","Todas_Galgas_G5")
  test.MSE <- c(rss_10[7] ,rss_20[7],rss_30[7],rss_40[7],rss[5])
  
  comparacion01 <- data.frame(modelo01 = modelo01, test.MSE = test.MSE)
  
  p_comp01<- ggplot(data = comparacion01, aes(x = reorder(x = modelo01, X = test.MSE), 
                                 y = test.MSE, color = modelo01, 
                                 label = round(test.MSE,2))) + 
    geom_point(size = 15) + 
    geom_text(color = "white", size = 4) + 
    labs(x = "Modelo regresión", y = "Test error(RMSE)", title = "RMSE all models no lineal") + theme_bw() + 
    coord_flip() + theme(legend.position = "none")
  #####
 p_comp01

VECTORES SOPORTE

La Regresión con Vectores de Soporte o SVR por sus siglas en inglés, es un modelo de regresión basado en las Máquinas de Vectores de Soporte y que, grosso modo, son modelos capaces de generar clasificaciones o regresiones de datos no lineales a partir de la transformación de los datos de entrada a otros espacios de mayores dimensiones. En el caso de la regresión, la SVR busca encontrar aquella curva que sea capaz de ajustar los datos garantizando que la separación entre ésta y ciertos valores específicos del conjunto de entrenamiento (los vectores de soporte) sea la mayor posible. Para entender mejor el concepto, recomiendo echarle un ojo al video Support Vector Machine (SVM) - Fun and Easy Machine Learning.

Todas las variables

dat_modelo=Sensor[c(1:2021),c(-6)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]

regresor_svr <- svm(fuerza ~ ., data = train, type = "eps-regression")

fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test

ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

rss_all_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))

Todas las Galgas

dat_modelo=Sensor[c(1:2021),c(2:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]

regresor_svr <- svm(fuerza ~ ., data = train, type = "eps-regression")

fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test

ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

rss_galgas_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))

Cada galga por separado

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]

regresor_svr <- svm(fuerza ~ A_10, data = train, type = "eps-regression")

fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test

p10_svr<- ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha A_10") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

rss_a10_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ A_20, data = train, type = "eps-regression")

fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test

p20_svr<- ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha A_20") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

rss_a20_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ A_30, data = train, type = "eps-regression")

fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test

p30_svr<- ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha A_30") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

rss_a30_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
regresor_svr <- svm(fuerza ~ A_40, data = train, type = "eps-regression")

fuerzaPredict_test = predict(object = regresor_svr, newdata = test)
test$fuerzaPredict_test = fuerzaPredict_test

p40_svr<- ggplot(data = test) +
  geom_point(aes(y = fuerza , x = c(1:length(fuerzaPredict_test)), color = "Fuerza")) +
  geom_line(aes(y = fuerzaPredict_test , x = c(1:length(fuerzaPredict_test)), color = "Fuerza_Predicha"))+
  theme(legend.position="bottom")+
  ylab ("Fuerza (Scaled)") +
  ggtitle("Fuerza vs Predicha A_30") +
  scale_color_manual(values = Colors00) + 
  labs(color = "Legend") +
  theme(legend.position="bottom")

rss_a40_svr <- sqrt(sum((fuerzaPredict_test - test$fuerza)^2)/length(test$fuerza))

ggarrange(p10_svr,p20_svr,p30_svr,p40_svr,nrow=2, ncol=2,common.legend = TRUE, legend = "bottom")

Comparación

  #####Comparación####
  modelo01 <- c("A_10", "A_20","A_30","A_40","Todas_Galgas","Todas_Variables")
  test.MSE <- c(rss_a10_svr ,rss_a20_svr,rss_a30_svr,rss_a40_svr,rss_galgas_svr,rss_all_svr)
  
  comparacion01 <- data.frame(modelo01 = modelo01, test.MSE = test.MSE)
  
  p_comp02<- ggplot(data = comparacion01, aes(x = reorder(x = modelo, X = test.MSE), 
                                 y = test.MSE, color = modelo, 
                                 label = round(test.MSE,2))) + 
    geom_point(size = 15) + 
    geom_text(color = "white", size = 4) + 
    labs(x = "Modelo regresión", y = "Test error(RMSE)", title = "RMSE all models SVR") + theme_bw() + 
    coord_flip() + theme(legend.position = "none")
  #####
 p_comp02

ALL MODELOS

p_comp00

p_comp01

p_comp02

Considerando TODAS LAS GALGAS EN un modelo no lineal con un polinomio de G5

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
row.number <- sample(1:nrow(dat_modelo), 0.8*nrow(dat_modelo))
train = dat_modelo[row.number,]
test = dat_modelo[-row.number,]
# Vector para almacenar el RSS de cada polinomio

dat_modelo=Sensor[c(1:2021),c(1:5,8)]
modelo.poli <- lm(fuerza ~ poly(A_10, 5) + poly(A_20, 5) + poly(A_30, 5)+ poly(A_40, 5), data = train)

summary(modelo.poli)

## 
## Call:
## lm(formula = fuerza ~ poly(A_10, 5) + poly(A_20, 5) + poly(A_30, 
##     5) + poly(A_40, 5), data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -175.041  -26.508    0.341   28.843  169.333 
## 
## Coefficients:
##                  Estimate Std. Error   t value Pr(>|t|)    
## (Intercept)    -1.493e+03  1.176e+00 -1269.060  < 2e-16 ***
## poly(A_10, 5)1 -3.054e+05  4.922e+03   -62.048  < 2e-16 ***
## poly(A_10, 5)2 -2.035e+04  1.905e+03   -10.681  < 2e-16 ***
## poly(A_10, 5)3  2.294e+04  6.445e+02    35.600  < 2e-16 ***
## poly(A_10, 5)4  2.290e+03  7.914e+02     2.894 0.003861 ** 
## poly(A_10, 5)5  9.474e+02  5.116e+02     1.852 0.064207 .  
## poly(A_20, 5)1  9.976e+04  8.356e+03    11.939  < 2e-16 ***
## poly(A_20, 5)2 -2.542e+04  2.875e+03    -8.843  < 2e-16 ***
## poly(A_20, 5)3 -1.157e+04  1.287e+03    -8.990  < 2e-16 ***
## poly(A_20, 5)4  7.448e+03  9.707e+02     7.673 2.90e-14 ***
## poly(A_20, 5)5  9.101e+02  3.535e+02     2.574 0.010133 *  
## poly(A_30, 5)1  1.653e+05  9.397e+03    17.593  < 2e-16 ***
## poly(A_30, 5)2  2.884e+04  3.806e+03     7.577 5.97e-14 ***
## poly(A_30, 5)3 -1.348e+04  2.347e+03    -5.741 1.13e-08 ***
## poly(A_30, 5)4 -4.264e+03  1.163e+03    -3.667 0.000253 ***
## poly(A_30, 5)5  8.102e+02  9.153e+02     0.885 0.376156    
## poly(A_40, 5)1  9.307e+04  8.553e+03    10.881  < 2e-16 ***
## poly(A_40, 5)2  1.909e+04  3.438e+03     5.552 3.30e-08 ***
## poly(A_40, 5)3  3.105e+03  2.105e+03     1.475 0.140343    
## poly(A_40, 5)4 -1.871e+03  1.014e+03    -1.845 0.065191 .  
## poly(A_40, 5)5 -2.339e+03  7.951e+02    -2.942 0.003307 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 47.29 on 1595 degrees of freedom
## Multiple R-squared:  0.9988, Adjusted R-squared:  0.9987 
## F-statistic: 6.441e+04 on 20 and 1595 DF,  p-value: < 2.2e-16

Sensor

Gonzalo Aris Jiménez - Guillermo García Gracia - Patricio González Pérez

Sensor FSAE GRUPO 6

REGRESIÓN LINEAL

Todas las variables

Todas las galgas

Cada galga por separado

A_10

REGESIONES NO LINEALES

Todas las Galgas

Cada galga por separado

Comparación

VECTORES SOPORTE

Todas las variables

Todas las Galgas

Cada galga por separado

Comparación

ALL MODELOS

MODELO ALTAIR