Modelo de Regresión Polinómica
setwd("/cloud/project/")
datos<-read.csv("DerramesEEUU.csv", header = TRUE, sep=";" , dec=",",na.strings ="-")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
barriles <- as.numeric(datos$LiberacionInvoluntariaBarriles)
costo <- as.numeric(datos$CostosRemediacionAmbiental)
datos_interes <- data.frame(barriles,costo)
datos_interes <- na.omit(datos_interes)
mu_b <- mean(datos_interes$barriles)
sd_b <- sd(datos_interes$barriles)
mu_c <- mean(datos_interes$costo)
sd_c <- sd(datos_interes$costo)
set.seed(321)
n <- 80
barriles <- runif(n, 50, 1200)
area_afectada <- 0.03*barriles + rnorm(n, 0, 5)
tiempo_respuesta <- 2 + 0.01*barriles + rnorm(n, 0, 1)
costo <- 40000 + 1200*barriles - 0.9*barriles^2 + 0.0008*barriles^3 + 15000*area_afectada + 8000*tiempo_respuesta + rnorm(n, 0, 30000)
datos <- data.frame(barriles,area_afectada,tiempo_respuesta,costo)
m_lineal <- lm(costo ~ barriles, data = datos)
m_poli3 <- lm(costo ~ poly(barriles, 3, raw = TRUE), data = datos)
m_multiple <- lm(
costo ~ poly(barriles, 3, raw = TRUE) +
area_afectada +
tiempo_respuesta,
data = datos
)
summary(m_lineal)
##
## Call:
## lm(formula = costo ~ barriles, data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -239932 -65442 -1999 55290 223111
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19015.57 24897.02 -0.764 0.447
## barriles 1714.06 34.99 48.983 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 104700 on 78 degrees of freedom
## Multiple R-squared: 0.9685, Adjusted R-squared: 0.9681
## F-statistic: 2399 on 1 and 78 DF, p-value: < 2.2e-16
summary(m_poli3)
##
## Call:
## lm(formula = costo ~ poly(barriles, 3, raw = TRUE), data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -187168 -52009 4414 47331 157431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.729e+04 4.492e+04 1.275 0.2061
## poly(barriles, 3, raw = TRUE)1 1.792e+03 2.980e+02 6.011 5.97e-08 ***
## poly(barriles, 3, raw = TRUE)2 -1.082e+00 5.394e-01 -2.005 0.0485 *
## poly(barriles, 3, raw = TRUE)3 9.216e-04 2.814e-04 3.274 0.0016 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 74630 on 76 degrees of freedom
## Multiple R-squared: 0.9844, Adjusted R-squared: 0.9838
## F-statistic: 1598 on 3 and 76 DF, p-value: < 2.2e-16
summary(m_multiple)
##
## Call:
## lm(formula = costo ~ poly(barriles, 3, raw = TRUE) + area_afectada +
## tiempo_respuesta, data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -71138 -24339 -3020 24681 71842
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.448e+04 2.066e+04 2.153 0.0345 *
## poly(barriles, 3, raw = TRUE)1 1.265e+03 1.435e+02 8.812 3.78e-13 ***
## poly(barriles, 3, raw = TRUE)2 -1.016e+00 2.375e-01 -4.275 5.62e-05 ***
## poly(barriles, 3, raw = TRUE)3 8.698e-04 1.238e-04 7.024 8.88e-10 ***
## area_afectada 1.454e+04 8.138e+02 17.862 < 2e-16 ***
## tiempo_respuesta 7.472e+03 4.031e+03 1.853 0.0678 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32820 on 74 degrees of freedom
## Multiple R-squared: 0.9971, Adjusted R-squared: 0.9969
## F-statistic: 5024 on 5 and 74 DF, p-value: < 2.2e-16
cat("R² Lineal:", summary(m_lineal)$r.squared, "\n")
## R² Lineal: 0.9685141
cat("R² Polinómico G3:", summary(m_poli3)$r.squared, "\n")
## R² Polinómico G3: 0.984398
cat("R² Múltiple:", summary(m_multiple)$r.squared, "\n")
## R² Múltiple: 0.9970629
plot(datos$barriles, datos$costo,
pch = 16,
col = rgb(0.4,0.4,0.4,0.6),
xlab = "Liberación involuntaria de barriles",
ylab = "Costo de remediación ambiental",
main = "Regresión polinómica")
x_seq <- seq(min(barriles), max(barriles), length.out = 400)
y_pred <- predict(
m_poli3,
newdata = data.frame(barriles = x_seq)
)
lines(x_seq, y_pred, col = "red", lwd = 3)
summary(m_poli3)
##
## Call:
## lm(formula = costo ~ poly(barriles, 3, raw = TRUE), data = datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -187168 -52009 4414 47331 157431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.729e+04 4.492e+04 1.275 0.2061
## poly(barriles, 3, raw = TRUE)1 1.792e+03 2.980e+02 6.011 5.97e-08 ***
## poly(barriles, 3, raw = TRUE)2 -1.082e+00 5.394e-01 -2.005 0.0485 *
## poly(barriles, 3, raw = TRUE)3 9.216e-04 2.814e-04 3.274 0.0016 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 74630 on 76 degrees of freedom
## Multiple R-squared: 0.9844, Adjusted R-squared: 0.9838
## F-statistic: 1598 on 3 and 76 DF, p-value: < 2.2e-16