This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ISLR)
library(corrplot)
## corrplot 0.92 loaded
library(ggplot2)
library(GGally)
## Warning: package 'GGally' was built under R version 4.2.2
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
library(Metrics)
library(boot)
library(boot)
output <- read_excel("output.xlsx")
## New names:
## • `` -> `...1`
#View(output)
df = output %>% select(num_heteroatoms,tpsa, mol_w, num_valence_electrons, total_energy_pbe)
ggplot(df, aes(x=total_energy_pbe)) +
geom_histogram(aes(y=..density..), colour="black", fill="white", bins=100)+
geom_density(alpha=.2, fill="#FF6666")
#COR
ggpairs(df, lower = list(continuous = "smooth"),
diag = list(continuous = "barDiag"), axisLabels = "none")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#With the more correlated
ggplot(data = df, aes(x = total_energy_pbe, y = mol_w)) +
geom_point(color = "grey30", alpha = 0.3) +
labs(title = "Total_Energy_PBE ~ mol_w") +
theme_bw()
modelo_1 <- lm(total_energy_pbe ~ mol_w, data = df)
summary(modelo_1)
##
## Call:
## lm(formula = total_energy_pbe ~ mol_w, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -704145 -398 8601 16384 92207
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14002.90 487.94 28.7 <2e-16 ***
## mol_w -183.22 1.38 -132.8 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 40270 on 61035 degrees of freedom
## Multiple R-squared: 0.2241, Adjusted R-squared: 0.2241
## F-statistic: 1.763e+04 on 1 and 61035 DF, p-value: < 2.2e-16
######## ENERGY VS MOL_W
ggplot(data = df, aes(x = total_energy_pbe, y = mol_w)) +
geom_point(color = "grey30", alpha = 0.3) +
geom_smooth(method = "lm", formula = y ~ poly(x, 4), color = "red") +
labs(title = "POLY 4: total_energy_pbe ~ mol_w") +
theme_bw() +
theme(plot.title = element_text(hjust = 0.5))
##Polinomical Regresion Study Different degree Mol_w
cv_MSE_k5 <- rep(NA,5)
for (i in 1:5) {
modelo <- glm(total_energy_pbe ~ poly(mol_w, i), data = df)
set.seed(17)
cv_MSE_k5[i] <- cv.glm(data = df, glmfit = modelo, K = 5)$delta[1]
}
p5 <- ggplot(data = data.frame(polinomio = 1:5, cv_MSE = cv_MSE_k5),
aes(x = polinomio, y = cv_MSE)) +
geom_point(colour = c("firebrick3")) +
geom_path()
p5 <- p5 + theme(panel.grid.major = element_line(colour = 'gray90'))
p5 <- p5 + theme(plot.title = element_text(face = 'bold'))
p5 <- p5 + theme(panel.background = element_rect(fill = 'gray98'))
p5 <- p5 + labs(title = 'Test Error ~ Grado del polinomio')
p5 <- p5 + scale_x_continuous(breaks = 1:5)
p5
#Multilinear Regresion
# ----------------------------------------
modelo_poli4 <- lm(total_energy_pbe ~., data = df)
summary(modelo_poli4)
##
## Call:
## lm(formula = total_energy_pbe ~ ., data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -302474 -6568 842 8232 187011
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2536.695 229.156 11.07 <2e-16 ***
## num_heteroatoms 3931.100 43.780 89.79 <2e-16 ***
## tpsa -295.503 3.378 -87.47 <2e-16 ***
## mol_w -1140.134 2.141 -532.41 <2e-16 ***
## num_valence_electrons 2728.553 5.616 485.86 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17510 on 61032 degrees of freedom
## Multiple R-squared: 0.8533, Adjusted R-squared: 0.8532
## F-statistic: 8.872e+04 on 4 and 61032 DF, p-value: < 2.2e-16
#Partitioning whole data into 2 subset with probability 80 and 20
ind = sample(2,nrow(df),replace = T,prob = c(0.8,0.2))
#Separating training and testing data
train <- df[ind==1,]
test <- df[ind==2,]
modelo <- lm(train$total_energy_pbe ~. , data = train)
summary(modelo)
##
## Call:
## lm(formula = train$total_energy_pbe ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -303536 -6558 853 8215 186429
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2519.647 255.153 9.875 <2e-16 ***
## num_heteroatoms 3867.913 48.790 79.277 <2e-16 ***
## tpsa -292.212 3.774 -77.436 <2e-16 ***
## mol_w -1137.060 2.376 -478.526 <2e-16 ***
## num_valence_electrons 2721.414 6.229 436.926 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17440 on 48880 degrees of freedom
## Multiple R-squared: 0.8547, Adjusted R-squared: 0.8547
## F-statistic: 7.19e+04 on 4 and 48880 DF, p-value: < 2.2e-16
predict_test <- predict(modelo,test)
data.frame(R2 = cor(test$total_energy_pbe,predict_test)^2,
MSE = mean((predict_test - test$total_energy_pbe)^2),
RMSE = rmse(predict_test,test$total_energy_pbe))
## R2 MSE RMSE
## 1 0.8472647 316386360 17787.25
#Study Different degree all variables
cv_MSE_k5 <- rep(NA,5)
for (i in 1:5) {
modelo <- glm(total_energy_pbe ~ poly(num_heteroatoms, i) + poly(tpsa, i)
+ poly(mol_w, i) + poly(num_valence_electrons, i), data = df)
set.seed(17)
cv_MSE_k5[i] <- cv.glm(data = df, glmfit = modelo, K = 5)$delta[1]
}
p4 <- ggplot(data = data.frame(polinomio = 1:5, cv_MSE = cv_MSE_k5),
aes(x = polinomio, y = cv_MSE)) +
geom_point(colour = c("firebrick3")) +
geom_path()
p4 <- p4 + theme(panel.grid.major = element_line(colour = 'gray90'))
p4 <- p4 + theme(plot.title = element_text(face = 'bold'))
p4 <- p4 + theme(panel.background = element_rect(fill = 'gray98'))
p4 <- p4 + labs(title = 'Test Error ~ Grado del polinomio')
p4 <- p4 + scale_x_continuous(breaks = 1:5)
p4
modelo_poli2 <- lm(total_energy_pbe ~ poly(num_heteroatoms, 2) + poly(tpsa, 2)
+ poly(mol_w, 2) + poly(num_valence_electrons, 2), data = df)
summary(modelo_poli2)
##
## Call:
## lm(formula = total_energy_pbe ~ poly(num_heteroatoms, 2) + poly(tpsa,
## 2) + poly(mol_w, 2) + poly(num_valence_electrons, 2), data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -336395 -6995 828 8533 118161
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4.706e+04 6.996e+01 -672.72 <2e-16 ***
## poly(num_heteroatoms, 2)1 2.736e+06 2.987e+04 91.58 <2e-16 ***
## poly(num_heteroatoms, 2)2 -5.611e+05 2.176e+04 -25.79 <2e-16 ***
## poly(tpsa, 2)1 -2.588e+06 2.862e+04 -90.44 <2e-16 ***
## poly(tpsa, 2)2 4.856e+05 2.080e+04 23.34 <2e-16 ***
## poly(mol_w, 2)1 -3.355e+07 6.349e+04 -528.54 <2e-16 ***
## poly(mol_w, 2)2 1.442e+06 4.297e+04 33.56 <2e-16 ***
## poly(num_valence_electrons, 2)1 2.895e+07 5.974e+04 484.57 <2e-16 ***
## poly(num_valence_electrons, 2)2 -1.399e+06 4.231e+04 -33.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17280 on 61028 degrees of freedom
## Multiple R-squared: 0.8571, Adjusted R-squared: 0.8571
## F-statistic: 4.574e+04 on 8 and 61028 DF, p-value: < 2.2e-16
#########
#MOL2VEC#
#########
mol2vec = read.csv("C:/Users/User/OneDrive/Aalto/ML_MS/ML_MS/dataset_componentes_separadas.csv")
names(mol2vec)
## [1] "X" "component_0" "component_1"
## [4] "component_2" "component_3" "component_4"
## [7] "component_5" "component_6" "component_7"
## [10] "component_8" "component_9" "component_10"
## [13] "component_11" "component_12" "component_13"
## [16] "component_14" "component_15" "component_16"
## [19] "component_17" "component_18" "component_19"
## [22] "component_20" "component_21" "component_22"
## [25] "component_23" "component_24" "component_25"
## [28] "component_26" "component_27" "component_28"
## [31] "component_29" "component_30" "component_31"
## [34] "component_32" "component_33" "component_34"
## [37] "component_35" "component_36" "component_37"
## [40] "component_38" "component_39" "component_40"
## [43] "component_41" "component_42" "component_43"
## [46] "component_44" "component_45" "component_46"
## [49] "component_47" "component_48" "component_49"
## [52] "component_50" "component_51" "component_52"
## [55] "component_53" "component_54" "component_55"
## [58] "component_56" "component_57" "component_58"
## [61] "component_59" "component_60" "component_61"
## [64] "component_62" "component_63" "component_64"
## [67] "component_65" "component_66" "component_67"
## [70] "component_68" "component_69" "component_70"
## [73] "component_71" "component_72" "component_73"
## [76] "component_74" "component_75" "component_76"
## [79] "component_77" "component_78" "component_79"
## [82] "component_80" "component_81" "component_82"
## [85] "component_83" "component_84" "component_85"
## [88] "component_86" "component_87" "component_88"
## [91] "component_89" "component_90" "component_91"
## [94] "component_92" "component_93" "component_94"
## [97] "component_95" "component_96" "component_97"
## [100] "component_98" "component_99" "total_energy_pbe"
modelo_1 <- lm(df$total_energy_pbe ~., data = mol2vec )
summary(modelo_1)
##
## Call:
## lm(formula = df$total_energy_pbe ~ ., data = mol2vec)
##
## Residuals:
## Min 1Q Median 3Q Max
## -497364 -8182 2433 12764 184676
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.905e+03 4.279e+02 -18.473 < 2e-16 ***
## X 1.431e-02 6.382e-03 2.243 0.024928 *
## component_0 1.665e+03 1.293e+02 12.871 < 2e-16 ***
## component_1 3.665e+03 1.208e+02 30.349 < 2e-16 ***
## component_2 -1.369e+03 1.116e+02 -12.271 < 2e-16 ***
## component_3 3.865e+02 1.116e+02 3.463 0.000534 ***
## component_4 -4.589e+02 1.086e+02 -4.225 2.40e-05 ***
## component_5 1.527e+03 1.389e+02 10.991 < 2e-16 ***
## component_6 1.531e+02 1.297e+02 1.181 0.237658
## component_7 -3.738e+03 1.270e+02 -29.439 < 2e-16 ***
## component_8 5.022e+02 1.176e+02 4.271 1.95e-05 ***
## component_9 -3.248e+03 1.152e+02 -28.188 < 2e-16 ***
## component_10 -7.851e+02 1.232e+02 -6.374 1.85e-10 ***
## component_11 -6.910e+03 1.249e+02 -55.342 < 2e-16 ***
## component_12 2.295e+02 1.260e+02 1.822 0.068391 .
## component_13 -3.123e+03 1.231e+02 -25.357 < 2e-16 ***
## component_14 -1.526e+03 1.174e+02 -13.003 < 2e-16 ***
## component_15 -4.056e+03 1.143e+02 -35.476 < 2e-16 ***
## component_16 -2.889e+00 1.122e+02 -0.026 0.979455
## component_17 -3.659e+02 1.206e+02 -3.033 0.002421 **
## component_18 -1.283e+03 1.215e+02 -10.560 < 2e-16 ***
## component_19 1.114e+03 1.390e+02 8.015 1.12e-15 ***
## component_20 -1.471e+03 1.255e+02 -11.714 < 2e-16 ***
## component_21 -2.689e+03 1.316e+02 -20.425 < 2e-16 ***
## component_22 -5.436e+02 1.267e+02 -4.291 1.78e-05 ***
## component_23 6.138e+02 1.235e+02 4.971 6.69e-07 ***
## component_24 -2.227e+03 1.315e+02 -16.943 < 2e-16 ***
## component_25 -2.985e+03 1.138e+02 -26.223 < 2e-16 ***
## component_26 -2.082e+03 1.218e+02 -17.090 < 2e-16 ***
## component_27 4.570e+02 1.255e+02 3.642 0.000271 ***
## component_28 1.096e+01 1.247e+02 0.088 0.929932
## component_29 -9.964e+02 1.191e+02 -8.368 < 2e-16 ***
## component_30 8.611e+02 1.132e+02 7.606 2.86e-14 ***
## component_31 1.874e+03 1.444e+02 12.981 < 2e-16 ***
## component_32 1.882e+03 1.217e+02 15.460 < 2e-16 ***
## component_33 -9.981e+02 1.147e+02 -8.700 < 2e-16 ***
## component_34 3.739e+03 1.345e+02 27.803 < 2e-16 ***
## component_35 -1.973e+03 1.405e+02 -14.046 < 2e-16 ***
## component_36 -1.819e+03 1.208e+02 -15.054 < 2e-16 ***
## component_37 -7.048e+02 1.187e+02 -5.940 2.87e-09 ***
## component_38 -5.525e+02 1.274e+02 -4.338 1.44e-05 ***
## component_39 -1.935e+03 1.149e+02 -16.841 < 2e-16 ***
## component_40 -8.042e+02 1.237e+02 -6.500 8.09e-11 ***
## component_41 3.433e+03 1.087e+02 31.596 < 2e-16 ***
## component_42 1.933e+03 1.097e+02 17.615 < 2e-16 ***
## component_43 2.771e+03 1.328e+02 20.865 < 2e-16 ***
## component_44 -9.619e+02 1.285e+02 -7.488 7.06e-14 ***
## component_45 3.563e+03 1.328e+02 26.836 < 2e-16 ***
## component_46 -2.102e+03 1.236e+02 -17.011 < 2e-16 ***
## component_47 2.735e+00 1.083e+02 0.025 0.979842
## component_48 -1.694e+03 1.232e+02 -13.750 < 2e-16 ***
## component_49 1.404e+03 1.216e+02 11.548 < 2e-16 ***
## component_50 1.447e+03 1.225e+02 11.810 < 2e-16 ***
## component_51 -1.209e+03 1.284e+02 -9.417 < 2e-16 ***
## component_52 1.775e+03 1.238e+02 14.334 < 2e-16 ***
## component_53 3.385e+03 1.191e+02 28.417 < 2e-16 ***
## component_54 7.971e+02 1.370e+02 5.819 5.96e-09 ***
## component_55 -2.210e+03 1.121e+02 -19.708 < 2e-16 ***
## component_56 -4.251e+03 1.171e+02 -36.286 < 2e-16 ***
## component_57 6.322e+02 1.206e+02 5.241 1.60e-07 ***
## component_58 -3.990e+02 1.267e+02 -3.149 0.001642 **
## component_59 2.229e+03 1.334e+02 16.706 < 2e-16 ***
## component_60 9.774e+02 1.049e+02 9.314 < 2e-16 ***
## component_61 -3.188e+03 1.152e+02 -27.665 < 2e-16 ***
## component_62 6.448e+02 1.221e+02 5.280 1.30e-07 ***
## component_63 1.991e+03 1.335e+02 14.909 < 2e-16 ***
## component_64 4.697e+03 1.216e+02 38.624 < 2e-16 ***
## component_65 -3.524e+03 1.251e+02 -28.171 < 2e-16 ***
## component_66 -1.072e+03 1.200e+02 -8.931 < 2e-16 ***
## component_67 -2.494e+02 1.186e+02 -2.103 0.035476 *
## component_68 -3.494e+01 1.296e+02 -0.270 0.787476
## component_69 -2.056e+03 1.314e+02 -15.648 < 2e-16 ***
## component_70 -2.732e+02 1.143e+02 -2.390 0.016846 *
## component_71 -4.057e+03 1.277e+02 -31.776 < 2e-16 ***
## component_72 -3.272e+03 1.215e+02 -26.918 < 2e-16 ***
## component_73 1.975e+03 1.193e+02 16.550 < 2e-16 ***
## component_74 -3.218e+03 1.269e+02 -25.362 < 2e-16 ***
## component_75 1.035e+03 1.239e+02 8.355 < 2e-16 ***
## component_76 -5.436e+02 1.176e+02 -4.623 3.78e-06 ***
## component_77 4.769e+02 1.315e+02 3.626 0.000288 ***
## component_78 2.299e+03 1.073e+02 21.427 < 2e-16 ***
## component_79 1.132e+03 1.123e+02 10.081 < 2e-16 ***
## component_80 1.640e+03 1.326e+02 12.365 < 2e-16 ***
## component_81 1.062e+03 1.345e+02 7.896 2.92e-15 ***
## component_82 4.644e+02 1.227e+02 3.786 0.000153 ***
## component_83 -1.694e+03 1.321e+02 -12.825 < 2e-16 ***
## component_84 -1.050e+03 1.238e+02 -8.480 < 2e-16 ***
## component_85 4.670e+03 1.234e+02 37.839 < 2e-16 ***
## component_86 -3.095e+03 1.279e+02 -24.196 < 2e-16 ***
## component_87 -4.133e+03 1.152e+02 -35.869 < 2e-16 ***
## component_88 2.615e+03 1.287e+02 20.326 < 2e-16 ***
## component_89 7.797e+02 1.104e+02 7.062 1.66e-12 ***
## component_90 6.062e+03 1.165e+02 52.037 < 2e-16 ***
## component_91 -4.348e+03 1.224e+02 -35.517 < 2e-16 ***
## component_92 6.472e+02 1.245e+02 5.200 2.00e-07 ***
## component_93 3.108e+03 1.210e+02 25.679 < 2e-16 ***
## component_94 -2.535e+03 1.086e+02 -23.345 < 2e-16 ***
## component_95 2.550e+03 1.274e+02 20.021 < 2e-16 ***
## component_96 7.937e+03 1.111e+02 71.470 < 2e-16 ***
## component_97 -2.919e+03 1.208e+02 -24.169 < 2e-16 ***
## component_98 2.827e+03 1.213e+02 23.300 < 2e-16 ***
## component_99 -2.814e+03 1.362e+02 -20.665 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27350 on 60935 degrees of freedom
## Multiple R-squared: 0.6428, Adjusted R-squared: 0.6422
## F-statistic: 1085 on 101 and 60935 DF, p-value: < 2.2e-16
mol2Vec = mol2vec[,c(2:101)]
final_data = cbind(df,mol2Vec)
train <- final_data[ind==1,]
test <- final_data[ind==2,]
modelo_final <- lm(train$total_energy_pbe ~., data = train )
summary(modelo_final)
##
## Call:
## lm(formula = train$total_energy_pbe ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -145078 -3269 251 3744 87967
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -335.254 147.893 -2.267 0.023403 *
## num_heteroatoms 6655.729 108.193 61.517 < 2e-16 ***
## tpsa -535.755 5.727 -93.541 < 2e-16 ***
## mol_w -1462.588 2.531 -577.785 < 2e-16 ***
## num_valence_electrons 4994.009 33.753 147.957 < 2e-16 ***
## component_0 123.495 47.082 2.623 0.008720 **
## component_1 552.435 43.553 12.684 < 2e-16 ***
## component_2 -952.952 39.838 -23.921 < 2e-16 ***
## component_3 -1398.596 40.005 -34.961 < 2e-16 ***
## component_4 -815.682 38.898 -20.970 < 2e-16 ***
## component_5 551.239 50.488 10.918 < 2e-16 ***
## component_6 1914.470 47.422 40.371 < 2e-16 ***
## component_7 1357.369 48.066 28.240 < 2e-16 ***
## component_8 -1123.951 42.198 -26.635 < 2e-16 ***
## component_9 -832.890 40.968 -20.330 < 2e-16 ***
## component_10 -780.458 45.045 -17.326 < 2e-16 ***
## component_11 -157.492 45.991 -3.424 0.000617 ***
## component_12 491.886 46.016 10.690 < 2e-16 ***
## component_13 -1331.892 45.372 -29.355 < 2e-16 ***
## component_14 1348.015 43.325 31.114 < 2e-16 ***
## component_15 56.334 42.024 1.341 0.180080
## component_16 345.249 40.425 8.540 < 2e-16 ***
## component_17 -1845.846 43.771 -42.170 < 2e-16 ***
## component_18 -406.943 43.706 -9.311 < 2e-16 ***
## component_19 17.254 49.716 0.347 0.728551
## component_20 183.623 46.564 3.943 8.04e-05 ***
## component_21 92.951 47.016 1.977 0.048046 *
## component_22 -617.558 45.512 -13.569 < 2e-16 ***
## component_23 735.527 44.684 16.461 < 2e-16 ***
## component_24 -1187.606 47.484 -25.011 < 2e-16 ***
## component_25 -1243.576 41.359 -30.068 < 2e-16 ***
## component_26 67.608 46.147 1.465 0.142911
## component_27 -27.117 46.800 -0.579 0.562298
## component_28 510.280 45.778 11.147 < 2e-16 ***
## component_29 868.845 43.918 19.783 < 2e-16 ***
## component_30 -387.663 40.563 -9.557 < 2e-16 ***
## component_31 351.160 51.778 6.782 1.20e-11 ***
## component_32 685.554 44.103 15.544 < 2e-16 ***
## component_33 78.758 42.272 1.863 0.062448 .
## component_34 -665.348 49.349 -13.483 < 2e-16 ***
## component_35 184.276 50.544 3.646 0.000267 ***
## component_36 594.585 43.926 13.536 < 2e-16 ***
## component_37 -64.423 43.937 -1.466 0.142584
## component_38 -351.492 45.857 -7.665 1.82e-14 ***
## component_39 -696.308 42.008 -16.576 < 2e-16 ***
## component_40 606.396 44.857 13.518 < 2e-16 ***
## component_41 -199.482 39.721 -5.022 5.13e-07 ***
## component_42 -192.612 39.360 -4.894 9.93e-07 ***
## component_43 1090.071 49.034 22.231 < 2e-16 ***
## component_44 966.294 45.983 21.014 < 2e-16 ***
## component_45 -53.873 48.656 -1.107 0.268202
## component_46 -621.614 45.766 -13.582 < 2e-16 ***
## component_47 -322.366 39.762 -8.107 5.29e-16 ***
## component_48 -2001.646 44.112 -45.377 < 2e-16 ***
## component_49 -1263.813 45.033 -28.064 < 2e-16 ***
## component_50 14.839 45.420 0.327 0.743899
## component_51 -1017.514 46.425 -21.918 < 2e-16 ***
## component_52 338.537 44.444 7.617 2.64e-14 ***
## component_53 -294.878 42.885 -6.876 6.23e-12 ***
## component_54 -204.784 48.906 -4.187 2.83e-05 ***
## component_55 -128.576 40.964 -3.139 0.001698 **
## component_56 -1542.347 42.057 -36.673 < 2e-16 ***
## component_57 -153.170 44.775 -3.421 0.000625 ***
## component_58 -340.283 45.265 -7.518 5.67e-14 ***
## component_59 -235.009 48.191 -4.877 1.08e-06 ***
## component_60 225.742 38.500 5.863 4.56e-09 ***
## component_61 -133.619 41.433 -3.225 0.001261 **
## component_62 75.985 45.107 1.685 0.092082 .
## component_63 157.213 48.940 3.212 0.001317 **
## component_64 -226.528 44.870 -5.049 4.47e-07 ***
## component_65 196.812 46.086 4.271 1.95e-05 ***
## component_66 679.484 42.935 15.826 < 2e-16 ***
## component_67 -349.782 43.149 -8.106 5.33e-16 ***
## component_68 903.284 46.470 19.438 < 2e-16 ***
## component_69 -814.751 46.822 -17.401 < 2e-16 ***
## component_70 -556.316 43.996 -12.645 < 2e-16 ***
## component_71 1106.370 47.057 23.511 < 2e-16 ***
## component_72 -823.096 43.724 -18.825 < 2e-16 ***
## component_73 1436.441 42.678 33.658 < 2e-16 ***
## component_74 -69.298 45.962 -1.508 0.131630
## component_75 1226.558 44.442 27.599 < 2e-16 ***
## component_76 341.044 42.844 7.960 1.76e-15 ***
## component_77 349.049 46.843 7.452 9.38e-14 ***
## component_78 799.828 41.302 19.366 < 2e-16 ***
## component_79 1333.916 42.741 31.209 < 2e-16 ***
## component_80 -193.024 48.151 -4.009 6.11e-05 ***
## component_81 878.512 48.810 17.999 < 2e-16 ***
## component_82 897.481 43.978 20.407 < 2e-16 ***
## component_83 350.527 48.481 7.230 4.89e-13 ***
## component_84 771.825 44.434 17.370 < 2e-16 ***
## component_85 6.499 45.290 0.143 0.885901
## component_86 -423.478 45.663 -9.274 < 2e-16 ***
## component_87 -390.092 41.829 -9.326 < 2e-16 ***
## component_88 890.567 47.583 18.716 < 2e-16 ***
## component_89 1815.620 41.446 43.807 < 2e-16 ***
## component_90 -379.630 43.012 -8.826 < 2e-16 ***
## component_91 -176.626 45.008 -3.924 8.71e-05 ***
## component_92 -623.164 44.697 -13.942 < 2e-16 ***
## component_93 -455.406 43.689 -10.424 < 2e-16 ***
## component_94 -350.222 39.832 -8.792 < 2e-16 ***
## component_95 499.921 47.823 10.454 < 2e-16 ***
## component_96 1202.418 40.832 29.448 < 2e-16 ***
## component_97 448.836 43.839 10.238 < 2e-16 ***
## component_98 874.396 43.789 19.968 < 2e-16 ***
## component_99 407.517 50.235 8.112 5.08e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8705 on 48780 degrees of freedom
## Multiple R-squared: 0.9639, Adjusted R-squared: 0.9638
## F-statistic: 1.252e+04 on 104 and 48780 DF, p-value: < 2.2e-16
predict_test <- predict(modelo_final,test)
data.frame(R2 = cor(test$total_energy_pbe,predict_test)^2,
MSE = mean((predict_test - test$total_energy_pbe)^2),
RMSE = rmse(predict_test,test$total_energy_pbe),
MAE = mae(predict_test,test$total_energy_pbe))
## R2 MSE RMSE MAE
## 1 0.9646093 73676580 8583.506 5328.238