data1=read.csv(url("https://raw.githubusercontent.com/geovannychoez/prueba/master/MTCars.csv"), header = TRUE)
# Visualización de la correlación entre todas variables numéricas que representen cantidades
corrplot.mixed(cor(data1[,c(10,11,12,13,14,17,19,20,21,22,23,24,25,26 )]),
lower = "number",
upper = "circle",
tl.col = "black")
plot(data1,col=c("red"))
# Visualización de la correlación sólo variables con correlaciones importantes (corr>0.5 o corr< - 0.5)
corrplot.mixed(cor(data1[,c(10,11,12,14,17,19,22,24,25,26 )]),
lower = "number",
upper = "circle",
tl.col = "black")
plot(data1,col=c("red"))
##
Obtención inicial de resultados de la construcción de los modelos de
Regresión Lineal Múltiple con las variables que tienen correlaciones
importantes (corr>0.5 o corr< - 0.5), considerando la variable
dependiente price ## Se realizan la prueba de hipótesis global, pruebas
de hipótesis individuales y Coeficiente de determinación (R^2)
mod1=lm(data1$price~data1$wheelbase+data1$carlength+data1$carwidth+data1$curbweight+data1$enginesize+data1$boreratio+data1$horsepower+data1$citympg+data1$highwaympg)
mod1
##
## Call:
## lm(formula = data1$price ~ data1$wheelbase + data1$carlength +
## data1$carwidth + data1$curbweight + data1$enginesize + data1$boreratio +
## data1$horsepower + data1$citympg + data1$highwaympg)
##
## Coefficients:
## (Intercept) data1$wheelbase data1$carlength data1$carwidth
## -43223.926 109.895 -57.853 532.467
## data1$curbweight data1$enginesize data1$boreratio data1$horsepower
## 2.918 83.615 -1140.464 53.754
## data1$citympg data1$highwaympg
## -119.815 122.855
summary(mod1)
##
## Call:
## lm(formula = data1$price ~ data1$wheelbase + data1$carlength +
## data1$carwidth + data1$curbweight + data1$enginesize + data1$boreratio +
## data1$horsepower + data1$citympg + data1$highwaympg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8297 -1542 31 1307 14444
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -43223.926 13760.207 -3.141 0.001944 **
## data1$wheelbase 109.895 101.743 1.080 0.281418
## data1$carlength -57.853 57.933 -0.999 0.319217
## data1$carwidth 532.467 255.561 2.084 0.038508 *
## data1$curbweight 2.918 1.651 1.767 0.078749 .
## data1$enginesize 83.615 13.434 6.224 2.91e-09 ***
## data1$boreratio -1140.464 1212.508 -0.941 0.348083
## data1$horsepower 53.754 15.447 3.480 0.000619 ***
## data1$citympg -119.815 185.536 -0.646 0.519183
## data1$highwaympg 122.855 170.820 0.719 0.472872
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3444 on 195 degrees of freedom
## Multiple R-squared: 0.8223, Adjusted R-squared: 0.8141
## F-statistic: 100.3 on 9 and 195 DF, p-value: < 2.2e-16
mod2=lm(data1$price~data1$enginesize+data1$horsepower)
summary(mod2)
##
## Call:
## lm(formula = data1$price ~ data1$enginesize + data1$horsepower)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10946.0 -1946.7 -218.8 1775.5 13403.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -8389.73 822.53 -10.200 < 2e-16 ***
## data1$enginesize 122.45 10.46 11.709 < 2e-16 ***
## data1$horsepower 58.85 11.01 5.344 2.45e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3650 on 202 degrees of freedom
## Multiple R-squared: 0.7933, Adjusted R-squared: 0.7913
## F-statistic: 387.7 on 2 and 202 DF, p-value: < 2.2e-16
#Conjunto de Datos #2
data2=read.csv(url("https://raw.githubusercontent.com/geovannychoez/prueba/master/UsedCars.csv"), header = TRUE)
# Visualización de la correlación entre todas variables numéricas del archivo original importado que representen cantidades, considerando la orientación que indica "Trabajar únicamente con variables numéricas que represente cantidades",como la variable engine es numerica y su unidad de medición es CC, se agrego una nueva columna denominada EngiceCC, de la cual solo de dejaron la cantidad y se extrajo la unidad de medidda
data2=cbind(data2,EngineCC=c(as.numeric(gsub(" cc","",as.character(data2$Engine)))))
corrplot.mixed(cor(data2[,c(3,5,16,17,18,19,20,21)]),
lower = "number",
upper = "circle",
tl.col = "black")
plot(data2,col=c("red"))
# Visualización de la correlación sólo variables con correlaciones importantes (corr>0.5 o corr< - 0.5) price, Length,Width,Fuel.Tank.Capacity y EngineCC
corrplot.mixed(cor(data2[,c(3,16,17,20,21)]),
lower = "number",
upper = "circle",
tl.col = "black")
plot(data2,col=c("red"))
## Obtención inicial de resultados de la construcción de los modelos de
Regresión Lineal Múltiple con las variables que tienen correlaciones
importantes (corr>0.5 o corr< - 0.5), considerando la variable
dependiente price ## Se realizan la prueba de hipótesis global, pruebas
de hipótesis individuales y Coeficiente de determinación (R^2)
mod21=lm(data2$Price~data2$Length+data2$Width+data2$Fuel.Tank.Capacity+data2$EngineCC)
mod21
##
## Call:
## lm(formula = data2$Price ~ data2$Length + data2$Width + data2$Fuel.Tank.Capacity +
## data2$EngineCC)
##
## Coefficients:
## (Intercept) data2$Length data2$Width
## -9289387.5 157.1 4045.2
## data2$Fuel.Tank.Capacity data2$EngineCC
## 20368.6 1259.7
summary(mod21)
##
## Call:
## lm(formula = data2$Price ~ data2$Length + data2$Width + data2$Fuel.Tank.Capacity +
## data2$EngineCC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7170011 -731542 -142317 385488 29171278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9289387.5 842295.6 -11.029 < 2e-16 ***
## data2$Length 157.1 201.3 0.781 0.435139
## data2$Width 4045.2 593.2 6.819 1.23e-11 ***
## data2$Fuel.Tank.Capacity 20368.6 5720.0 3.561 0.000379 ***
## data2$EngineCC 1259.7 124.9 10.084 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1854000 on 1869 degrees of freedom
## Multiple R-squared: 0.4174, Adjusted R-squared: 0.4162
## F-statistic: 334.8 on 4 and 1869 DF, p-value: < 2.2e-16
mod22=lm(data2$Price~data2$Width+data2$Fuel.Tank.Capacity+data2$EngineCC)
summary(mod22)
##
## Call:
## lm(formula = data2$Price ~ data2$Width + data2$Fuel.Tank.Capacity +
## data2$EngineCC)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7270556 -717964 -139784 377568 29069852
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9057646.8 788160.1 -11.492 < 2e-16 ***
## data2$Width 4224.0 547.2 7.720 1.88e-14 ***
## data2$Fuel.Tank.Capacity 21558.5 5512.5 3.911 9.53e-05 ***
## data2$EngineCC 1297.0 115.4 11.240 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1854000 on 1870 degrees of freedom
## Multiple R-squared: 0.4172, Adjusted R-squared: 0.4163
## F-statistic: 446.3 on 3 and 1870 DF, p-value: < 2.2e-16