R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(ISLR)
library(corrplot)
## corrplot 0.92 loaded
library(ggplot2)
library(GGally)
## Warning: package 'GGally' was built under R version 4.2.2
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
library(Metrics)
library(boot)
library(boot)

output <- read_excel("output.xlsx")
## New names:
## • `` -> `...1`
#View(output)

df = output %>% select(num_heteroatoms,tpsa, mol_w, num_valence_electrons, total_energy_pbe)

ggplot(df, aes(x=total_energy_pbe)) + 
  geom_histogram(aes(y=..density..), colour="black", fill="white", bins=100)+
  geom_density(alpha=.2, fill="#FF6666") 

#COR
ggpairs(df, lower = list(continuous = "smooth"),
        diag = list(continuous = "barDiag"), axisLabels = "none")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#With the more correlated
ggplot(data = df, aes(x = total_energy_pbe, y = mol_w)) +
  geom_point(color = "grey30", alpha = 0.3) + 
  labs(title = "Total_Energy_PBE ~ mol_w") +
  theme_bw()

modelo_1 <- lm(total_energy_pbe ~ mol_w, data = df)
summary(modelo_1)
## 
## Call:
## lm(formula = total_energy_pbe ~ mol_w, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -704145    -398    8601   16384   92207 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 14002.90     487.94    28.7   <2e-16 ***
## mol_w        -183.22       1.38  -132.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 40270 on 61035 degrees of freedom
## Multiple R-squared:  0.2241, Adjusted R-squared:  0.2241 
## F-statistic: 1.763e+04 on 1 and 61035 DF,  p-value: < 2.2e-16
######## ENERGY VS MOL_W
ggplot(data = df, aes(x = total_energy_pbe, y = mol_w)) +
  geom_point(color = "grey30", alpha = 0.3) + 
  geom_smooth(method = "lm", formula = y ~ poly(x, 4), color = "red") +
  labs(title = "POLY 4: total_energy_pbe ~ mol_w") +
  theme_bw() +
  theme(plot.title = element_text(hjust = 0.5))

##Polinomical Regresion Study Different degree Mol_w

cv_MSE_k5 <- rep(NA,5)
for (i in 1:5) {
  modelo <- glm(total_energy_pbe ~ poly(mol_w, i), data = df)
  set.seed(17)
  cv_MSE_k5[i] <- cv.glm(data = df, glmfit = modelo, K = 5)$delta[1]
}
p5 <- ggplot(data = data.frame(polinomio = 1:5, cv_MSE = cv_MSE_k5),
             aes(x = polinomio, y = cv_MSE)) +
  geom_point(colour = c("firebrick3")) +
  geom_path()
p5 <- p5 + theme(panel.grid.major = element_line(colour  =  'gray90'))
p5 <- p5 + theme(plot.title = element_text(face  =  'bold'))
p5 <- p5 + theme(panel.background = element_rect(fill  =  'gray98'))
p5 <- p5 + labs(title  =  'Test Error ~ Grado del polinomio')
p5 <- p5 + scale_x_continuous(breaks = 1:5)
p5

#Multilinear Regresion
# ----------------------------------------
modelo_poli4 <- lm(total_energy_pbe ~., data = df)
summary(modelo_poli4)
## 
## Call:
## lm(formula = total_energy_pbe ~ ., data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -302474   -6568     842    8232  187011 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            2536.695    229.156   11.07   <2e-16 ***
## num_heteroatoms        3931.100     43.780   89.79   <2e-16 ***
## tpsa                   -295.503      3.378  -87.47   <2e-16 ***
## mol_w                 -1140.134      2.141 -532.41   <2e-16 ***
## num_valence_electrons  2728.553      5.616  485.86   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17510 on 61032 degrees of freedom
## Multiple R-squared:  0.8533, Adjusted R-squared:  0.8532 
## F-statistic: 8.872e+04 on 4 and 61032 DF,  p-value: < 2.2e-16
#Partitioning whole data into 2 subset with probability 80 and 20
ind = sample(2,nrow(df),replace = T,prob = c(0.8,0.2))

#Separating training and testing data
train <- df[ind==1,]
test <- df[ind==2,]

modelo <- lm(train$total_energy_pbe ~. , data = train)
summary(modelo)
## 
## Call:
## lm(formula = train$total_energy_pbe ~ ., data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -303536   -6558     853    8215  186429 
## 
## Coefficients:
##                        Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)            2519.647    255.153    9.875   <2e-16 ***
## num_heteroatoms        3867.913     48.790   79.277   <2e-16 ***
## tpsa                   -292.212      3.774  -77.436   <2e-16 ***
## mol_w                 -1137.060      2.376 -478.526   <2e-16 ***
## num_valence_electrons  2721.414      6.229  436.926   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17440 on 48880 degrees of freedom
## Multiple R-squared:  0.8547, Adjusted R-squared:  0.8547 
## F-statistic: 7.19e+04 on 4 and 48880 DF,  p-value: < 2.2e-16
predict_test <- predict(modelo,test)

data.frame(R2 = cor(test$total_energy_pbe,predict_test)^2,
           MSE = mean((predict_test - test$total_energy_pbe)^2),
           RMSE = rmse(predict_test,test$total_energy_pbe))
##          R2       MSE     RMSE
## 1 0.8472647 316386360 17787.25
#Study Different degree all variables
cv_MSE_k5 <- rep(NA,5)
for (i in 1:5) {
  modelo <- glm(total_energy_pbe ~ poly(num_heteroatoms, i) + poly(tpsa, i) 
                + poly(mol_w, i) + poly(num_valence_electrons, i), data = df)
  set.seed(17)
  cv_MSE_k5[i] <- cv.glm(data = df, glmfit = modelo, K = 5)$delta[1]
}
p4 <- ggplot(data = data.frame(polinomio = 1:5, cv_MSE = cv_MSE_k5),
             aes(x = polinomio, y = cv_MSE)) +
  geom_point(colour = c("firebrick3")) +
  geom_path()
p4 <- p4 + theme(panel.grid.major = element_line(colour  =  'gray90'))
p4 <- p4 + theme(plot.title = element_text(face  =  'bold'))
p4 <- p4 + theme(panel.background = element_rect(fill  =  'gray98'))
p4 <- p4 + labs(title  =  'Test Error ~ Grado del polinomio')
p4 <- p4 + scale_x_continuous(breaks = 1:5)
p4

modelo_poli2 <- lm(total_energy_pbe ~ poly(num_heteroatoms, 2) + poly(tpsa, 2) 
                   + poly(mol_w, 2) + poly(num_valence_electrons, 2), data = df)
summary(modelo_poli2)
## 
## Call:
## lm(formula = total_energy_pbe ~ poly(num_heteroatoms, 2) + poly(tpsa, 
##     2) + poly(mol_w, 2) + poly(num_valence_electrons, 2), data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -336395   -6995     828    8533  118161 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                     -4.706e+04  6.996e+01 -672.72   <2e-16 ***
## poly(num_heteroatoms, 2)1        2.736e+06  2.987e+04   91.58   <2e-16 ***
## poly(num_heteroatoms, 2)2       -5.611e+05  2.176e+04  -25.79   <2e-16 ***
## poly(tpsa, 2)1                  -2.588e+06  2.862e+04  -90.44   <2e-16 ***
## poly(tpsa, 2)2                   4.856e+05  2.080e+04   23.34   <2e-16 ***
## poly(mol_w, 2)1                 -3.355e+07  6.349e+04 -528.54   <2e-16 ***
## poly(mol_w, 2)2                  1.442e+06  4.297e+04   33.56   <2e-16 ***
## poly(num_valence_electrons, 2)1  2.895e+07  5.974e+04  484.57   <2e-16 ***
## poly(num_valence_electrons, 2)2 -1.399e+06  4.231e+04  -33.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17280 on 61028 degrees of freedom
## Multiple R-squared:  0.8571, Adjusted R-squared:  0.8571 
## F-statistic: 4.574e+04 on 8 and 61028 DF,  p-value: < 2.2e-16
#########
#MOL2VEC#
#########


mol2vec =  read.csv("C:/Users/User/OneDrive/Aalto/ML_MS/ML_MS/dataset_componentes_separadas.csv")

names(mol2vec)
##   [1] "X"                "component_0"      "component_1"     
##   [4] "component_2"      "component_3"      "component_4"     
##   [7] "component_5"      "component_6"      "component_7"     
##  [10] "component_8"      "component_9"      "component_10"    
##  [13] "component_11"     "component_12"     "component_13"    
##  [16] "component_14"     "component_15"     "component_16"    
##  [19] "component_17"     "component_18"     "component_19"    
##  [22] "component_20"     "component_21"     "component_22"    
##  [25] "component_23"     "component_24"     "component_25"    
##  [28] "component_26"     "component_27"     "component_28"    
##  [31] "component_29"     "component_30"     "component_31"    
##  [34] "component_32"     "component_33"     "component_34"    
##  [37] "component_35"     "component_36"     "component_37"    
##  [40] "component_38"     "component_39"     "component_40"    
##  [43] "component_41"     "component_42"     "component_43"    
##  [46] "component_44"     "component_45"     "component_46"    
##  [49] "component_47"     "component_48"     "component_49"    
##  [52] "component_50"     "component_51"     "component_52"    
##  [55] "component_53"     "component_54"     "component_55"    
##  [58] "component_56"     "component_57"     "component_58"    
##  [61] "component_59"     "component_60"     "component_61"    
##  [64] "component_62"     "component_63"     "component_64"    
##  [67] "component_65"     "component_66"     "component_67"    
##  [70] "component_68"     "component_69"     "component_70"    
##  [73] "component_71"     "component_72"     "component_73"    
##  [76] "component_74"     "component_75"     "component_76"    
##  [79] "component_77"     "component_78"     "component_79"    
##  [82] "component_80"     "component_81"     "component_82"    
##  [85] "component_83"     "component_84"     "component_85"    
##  [88] "component_86"     "component_87"     "component_88"    
##  [91] "component_89"     "component_90"     "component_91"    
##  [94] "component_92"     "component_93"     "component_94"    
##  [97] "component_95"     "component_96"     "component_97"    
## [100] "component_98"     "component_99"     "total_energy_pbe"
modelo_1 <- lm(df$total_energy_pbe ~., data = mol2vec )
summary(modelo_1)
## 
## Call:
## lm(formula = df$total_energy_pbe ~ ., data = mol2vec)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -497364   -8182    2433   12764  184676 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -7.905e+03  4.279e+02 -18.473  < 2e-16 ***
## X             1.431e-02  6.382e-03   2.243 0.024928 *  
## component_0   1.665e+03  1.293e+02  12.871  < 2e-16 ***
## component_1   3.665e+03  1.208e+02  30.349  < 2e-16 ***
## component_2  -1.369e+03  1.116e+02 -12.271  < 2e-16 ***
## component_3   3.865e+02  1.116e+02   3.463 0.000534 ***
## component_4  -4.589e+02  1.086e+02  -4.225 2.40e-05 ***
## component_5   1.527e+03  1.389e+02  10.991  < 2e-16 ***
## component_6   1.531e+02  1.297e+02   1.181 0.237658    
## component_7  -3.738e+03  1.270e+02 -29.439  < 2e-16 ***
## component_8   5.022e+02  1.176e+02   4.271 1.95e-05 ***
## component_9  -3.248e+03  1.152e+02 -28.188  < 2e-16 ***
## component_10 -7.851e+02  1.232e+02  -6.374 1.85e-10 ***
## component_11 -6.910e+03  1.249e+02 -55.342  < 2e-16 ***
## component_12  2.295e+02  1.260e+02   1.822 0.068391 .  
## component_13 -3.123e+03  1.231e+02 -25.357  < 2e-16 ***
## component_14 -1.526e+03  1.174e+02 -13.003  < 2e-16 ***
## component_15 -4.056e+03  1.143e+02 -35.476  < 2e-16 ***
## component_16 -2.889e+00  1.122e+02  -0.026 0.979455    
## component_17 -3.659e+02  1.206e+02  -3.033 0.002421 ** 
## component_18 -1.283e+03  1.215e+02 -10.560  < 2e-16 ***
## component_19  1.114e+03  1.390e+02   8.015 1.12e-15 ***
## component_20 -1.471e+03  1.255e+02 -11.714  < 2e-16 ***
## component_21 -2.689e+03  1.316e+02 -20.425  < 2e-16 ***
## component_22 -5.436e+02  1.267e+02  -4.291 1.78e-05 ***
## component_23  6.138e+02  1.235e+02   4.971 6.69e-07 ***
## component_24 -2.227e+03  1.315e+02 -16.943  < 2e-16 ***
## component_25 -2.985e+03  1.138e+02 -26.223  < 2e-16 ***
## component_26 -2.082e+03  1.218e+02 -17.090  < 2e-16 ***
## component_27  4.570e+02  1.255e+02   3.642 0.000271 ***
## component_28  1.096e+01  1.247e+02   0.088 0.929932    
## component_29 -9.964e+02  1.191e+02  -8.368  < 2e-16 ***
## component_30  8.611e+02  1.132e+02   7.606 2.86e-14 ***
## component_31  1.874e+03  1.444e+02  12.981  < 2e-16 ***
## component_32  1.882e+03  1.217e+02  15.460  < 2e-16 ***
## component_33 -9.981e+02  1.147e+02  -8.700  < 2e-16 ***
## component_34  3.739e+03  1.345e+02  27.803  < 2e-16 ***
## component_35 -1.973e+03  1.405e+02 -14.046  < 2e-16 ***
## component_36 -1.819e+03  1.208e+02 -15.054  < 2e-16 ***
## component_37 -7.048e+02  1.187e+02  -5.940 2.87e-09 ***
## component_38 -5.525e+02  1.274e+02  -4.338 1.44e-05 ***
## component_39 -1.935e+03  1.149e+02 -16.841  < 2e-16 ***
## component_40 -8.042e+02  1.237e+02  -6.500 8.09e-11 ***
## component_41  3.433e+03  1.087e+02  31.596  < 2e-16 ***
## component_42  1.933e+03  1.097e+02  17.615  < 2e-16 ***
## component_43  2.771e+03  1.328e+02  20.865  < 2e-16 ***
## component_44 -9.619e+02  1.285e+02  -7.488 7.06e-14 ***
## component_45  3.563e+03  1.328e+02  26.836  < 2e-16 ***
## component_46 -2.102e+03  1.236e+02 -17.011  < 2e-16 ***
## component_47  2.735e+00  1.083e+02   0.025 0.979842    
## component_48 -1.694e+03  1.232e+02 -13.750  < 2e-16 ***
## component_49  1.404e+03  1.216e+02  11.548  < 2e-16 ***
## component_50  1.447e+03  1.225e+02  11.810  < 2e-16 ***
## component_51 -1.209e+03  1.284e+02  -9.417  < 2e-16 ***
## component_52  1.775e+03  1.238e+02  14.334  < 2e-16 ***
## component_53  3.385e+03  1.191e+02  28.417  < 2e-16 ***
## component_54  7.971e+02  1.370e+02   5.819 5.96e-09 ***
## component_55 -2.210e+03  1.121e+02 -19.708  < 2e-16 ***
## component_56 -4.251e+03  1.171e+02 -36.286  < 2e-16 ***
## component_57  6.322e+02  1.206e+02   5.241 1.60e-07 ***
## component_58 -3.990e+02  1.267e+02  -3.149 0.001642 ** 
## component_59  2.229e+03  1.334e+02  16.706  < 2e-16 ***
## component_60  9.774e+02  1.049e+02   9.314  < 2e-16 ***
## component_61 -3.188e+03  1.152e+02 -27.665  < 2e-16 ***
## component_62  6.448e+02  1.221e+02   5.280 1.30e-07 ***
## component_63  1.991e+03  1.335e+02  14.909  < 2e-16 ***
## component_64  4.697e+03  1.216e+02  38.624  < 2e-16 ***
## component_65 -3.524e+03  1.251e+02 -28.171  < 2e-16 ***
## component_66 -1.072e+03  1.200e+02  -8.931  < 2e-16 ***
## component_67 -2.494e+02  1.186e+02  -2.103 0.035476 *  
## component_68 -3.494e+01  1.296e+02  -0.270 0.787476    
## component_69 -2.056e+03  1.314e+02 -15.648  < 2e-16 ***
## component_70 -2.732e+02  1.143e+02  -2.390 0.016846 *  
## component_71 -4.057e+03  1.277e+02 -31.776  < 2e-16 ***
## component_72 -3.272e+03  1.215e+02 -26.918  < 2e-16 ***
## component_73  1.975e+03  1.193e+02  16.550  < 2e-16 ***
## component_74 -3.218e+03  1.269e+02 -25.362  < 2e-16 ***
## component_75  1.035e+03  1.239e+02   8.355  < 2e-16 ***
## component_76 -5.436e+02  1.176e+02  -4.623 3.78e-06 ***
## component_77  4.769e+02  1.315e+02   3.626 0.000288 ***
## component_78  2.299e+03  1.073e+02  21.427  < 2e-16 ***
## component_79  1.132e+03  1.123e+02  10.081  < 2e-16 ***
## component_80  1.640e+03  1.326e+02  12.365  < 2e-16 ***
## component_81  1.062e+03  1.345e+02   7.896 2.92e-15 ***
## component_82  4.644e+02  1.227e+02   3.786 0.000153 ***
## component_83 -1.694e+03  1.321e+02 -12.825  < 2e-16 ***
## component_84 -1.050e+03  1.238e+02  -8.480  < 2e-16 ***
## component_85  4.670e+03  1.234e+02  37.839  < 2e-16 ***
## component_86 -3.095e+03  1.279e+02 -24.196  < 2e-16 ***
## component_87 -4.133e+03  1.152e+02 -35.869  < 2e-16 ***
## component_88  2.615e+03  1.287e+02  20.326  < 2e-16 ***
## component_89  7.797e+02  1.104e+02   7.062 1.66e-12 ***
## component_90  6.062e+03  1.165e+02  52.037  < 2e-16 ***
## component_91 -4.348e+03  1.224e+02 -35.517  < 2e-16 ***
## component_92  6.472e+02  1.245e+02   5.200 2.00e-07 ***
## component_93  3.108e+03  1.210e+02  25.679  < 2e-16 ***
## component_94 -2.535e+03  1.086e+02 -23.345  < 2e-16 ***
## component_95  2.550e+03  1.274e+02  20.021  < 2e-16 ***
## component_96  7.937e+03  1.111e+02  71.470  < 2e-16 ***
## component_97 -2.919e+03  1.208e+02 -24.169  < 2e-16 ***
## component_98  2.827e+03  1.213e+02  23.300  < 2e-16 ***
## component_99 -2.814e+03  1.362e+02 -20.665  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27350 on 60935 degrees of freedom
## Multiple R-squared:  0.6428, Adjusted R-squared:  0.6422 
## F-statistic:  1085 on 101 and 60935 DF,  p-value: < 2.2e-16
mol2Vec = mol2vec[,c(2:101)]
final_data = cbind(df,mol2Vec)


train <- final_data[ind==1,]
test <- final_data[ind==2,]

modelo_final <- lm(train$total_energy_pbe ~., data = train )
summary(modelo_final)
## 
## Call:
## lm(formula = train$total_energy_pbe ~ ., data = train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -145078   -3269     251    3744   87967 
## 
## Coefficients:
##                        Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)            -335.254    147.893   -2.267 0.023403 *  
## num_heteroatoms        6655.729    108.193   61.517  < 2e-16 ***
## tpsa                   -535.755      5.727  -93.541  < 2e-16 ***
## mol_w                 -1462.588      2.531 -577.785  < 2e-16 ***
## num_valence_electrons  4994.009     33.753  147.957  < 2e-16 ***
## component_0             123.495     47.082    2.623 0.008720 ** 
## component_1             552.435     43.553   12.684  < 2e-16 ***
## component_2            -952.952     39.838  -23.921  < 2e-16 ***
## component_3           -1398.596     40.005  -34.961  < 2e-16 ***
## component_4            -815.682     38.898  -20.970  < 2e-16 ***
## component_5             551.239     50.488   10.918  < 2e-16 ***
## component_6            1914.470     47.422   40.371  < 2e-16 ***
## component_7            1357.369     48.066   28.240  < 2e-16 ***
## component_8           -1123.951     42.198  -26.635  < 2e-16 ***
## component_9            -832.890     40.968  -20.330  < 2e-16 ***
## component_10           -780.458     45.045  -17.326  < 2e-16 ***
## component_11           -157.492     45.991   -3.424 0.000617 ***
## component_12            491.886     46.016   10.690  < 2e-16 ***
## component_13          -1331.892     45.372  -29.355  < 2e-16 ***
## component_14           1348.015     43.325   31.114  < 2e-16 ***
## component_15             56.334     42.024    1.341 0.180080    
## component_16            345.249     40.425    8.540  < 2e-16 ***
## component_17          -1845.846     43.771  -42.170  < 2e-16 ***
## component_18           -406.943     43.706   -9.311  < 2e-16 ***
## component_19             17.254     49.716    0.347 0.728551    
## component_20            183.623     46.564    3.943 8.04e-05 ***
## component_21             92.951     47.016    1.977 0.048046 *  
## component_22           -617.558     45.512  -13.569  < 2e-16 ***
## component_23            735.527     44.684   16.461  < 2e-16 ***
## component_24          -1187.606     47.484  -25.011  < 2e-16 ***
## component_25          -1243.576     41.359  -30.068  < 2e-16 ***
## component_26             67.608     46.147    1.465 0.142911    
## component_27            -27.117     46.800   -0.579 0.562298    
## component_28            510.280     45.778   11.147  < 2e-16 ***
## component_29            868.845     43.918   19.783  < 2e-16 ***
## component_30           -387.663     40.563   -9.557  < 2e-16 ***
## component_31            351.160     51.778    6.782 1.20e-11 ***
## component_32            685.554     44.103   15.544  < 2e-16 ***
## component_33             78.758     42.272    1.863 0.062448 .  
## component_34           -665.348     49.349  -13.483  < 2e-16 ***
## component_35            184.276     50.544    3.646 0.000267 ***
## component_36            594.585     43.926   13.536  < 2e-16 ***
## component_37            -64.423     43.937   -1.466 0.142584    
## component_38           -351.492     45.857   -7.665 1.82e-14 ***
## component_39           -696.308     42.008  -16.576  < 2e-16 ***
## component_40            606.396     44.857   13.518  < 2e-16 ***
## component_41           -199.482     39.721   -5.022 5.13e-07 ***
## component_42           -192.612     39.360   -4.894 9.93e-07 ***
## component_43           1090.071     49.034   22.231  < 2e-16 ***
## component_44            966.294     45.983   21.014  < 2e-16 ***
## component_45            -53.873     48.656   -1.107 0.268202    
## component_46           -621.614     45.766  -13.582  < 2e-16 ***
## component_47           -322.366     39.762   -8.107 5.29e-16 ***
## component_48          -2001.646     44.112  -45.377  < 2e-16 ***
## component_49          -1263.813     45.033  -28.064  < 2e-16 ***
## component_50             14.839     45.420    0.327 0.743899    
## component_51          -1017.514     46.425  -21.918  < 2e-16 ***
## component_52            338.537     44.444    7.617 2.64e-14 ***
## component_53           -294.878     42.885   -6.876 6.23e-12 ***
## component_54           -204.784     48.906   -4.187 2.83e-05 ***
## component_55           -128.576     40.964   -3.139 0.001698 ** 
## component_56          -1542.347     42.057  -36.673  < 2e-16 ***
## component_57           -153.170     44.775   -3.421 0.000625 ***
## component_58           -340.283     45.265   -7.518 5.67e-14 ***
## component_59           -235.009     48.191   -4.877 1.08e-06 ***
## component_60            225.742     38.500    5.863 4.56e-09 ***
## component_61           -133.619     41.433   -3.225 0.001261 ** 
## component_62             75.985     45.107    1.685 0.092082 .  
## component_63            157.213     48.940    3.212 0.001317 ** 
## component_64           -226.528     44.870   -5.049 4.47e-07 ***
## component_65            196.812     46.086    4.271 1.95e-05 ***
## component_66            679.484     42.935   15.826  < 2e-16 ***
## component_67           -349.782     43.149   -8.106 5.33e-16 ***
## component_68            903.284     46.470   19.438  < 2e-16 ***
## component_69           -814.751     46.822  -17.401  < 2e-16 ***
## component_70           -556.316     43.996  -12.645  < 2e-16 ***
## component_71           1106.370     47.057   23.511  < 2e-16 ***
## component_72           -823.096     43.724  -18.825  < 2e-16 ***
## component_73           1436.441     42.678   33.658  < 2e-16 ***
## component_74            -69.298     45.962   -1.508 0.131630    
## component_75           1226.558     44.442   27.599  < 2e-16 ***
## component_76            341.044     42.844    7.960 1.76e-15 ***
## component_77            349.049     46.843    7.452 9.38e-14 ***
## component_78            799.828     41.302   19.366  < 2e-16 ***
## component_79           1333.916     42.741   31.209  < 2e-16 ***
## component_80           -193.024     48.151   -4.009 6.11e-05 ***
## component_81            878.512     48.810   17.999  < 2e-16 ***
## component_82            897.481     43.978   20.407  < 2e-16 ***
## component_83            350.527     48.481    7.230 4.89e-13 ***
## component_84            771.825     44.434   17.370  < 2e-16 ***
## component_85              6.499     45.290    0.143 0.885901    
## component_86           -423.478     45.663   -9.274  < 2e-16 ***
## component_87           -390.092     41.829   -9.326  < 2e-16 ***
## component_88            890.567     47.583   18.716  < 2e-16 ***
## component_89           1815.620     41.446   43.807  < 2e-16 ***
## component_90           -379.630     43.012   -8.826  < 2e-16 ***
## component_91           -176.626     45.008   -3.924 8.71e-05 ***
## component_92           -623.164     44.697  -13.942  < 2e-16 ***
## component_93           -455.406     43.689  -10.424  < 2e-16 ***
## component_94           -350.222     39.832   -8.792  < 2e-16 ***
## component_95            499.921     47.823   10.454  < 2e-16 ***
## component_96           1202.418     40.832   29.448  < 2e-16 ***
## component_97            448.836     43.839   10.238  < 2e-16 ***
## component_98            874.396     43.789   19.968  < 2e-16 ***
## component_99            407.517     50.235    8.112 5.08e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8705 on 48780 degrees of freedom
## Multiple R-squared:  0.9639, Adjusted R-squared:  0.9638 
## F-statistic: 1.252e+04 on 104 and 48780 DF,  p-value: < 2.2e-16
predict_test <- predict(modelo_final,test)

data.frame(R2 = cor(test$total_energy_pbe,predict_test)^2,
           MSE = mean((predict_test - test$total_energy_pbe)^2),
           RMSE = rmse(predict_test,test$total_energy_pbe),
           MAE = mae(predict_test,test$total_energy_pbe))
##          R2      MSE     RMSE      MAE
## 1 0.9646093 73676580 8583.506 5328.238