library(tidyverse)
library(dplyr)
library(ggplot2)
library(plotly)
library(psych)
library(GGally)
library(ggthemes)
library(ggpubr)
load("~/Unidad2/RegresionLinealMultiple/moluscos.RData")
df<-data.frame(BD_moluscos)
df
## c_agua molusco cons_o
## 1 100 A 7.16
## 2 100 A 8.26
## 3 100 A 6.78
## 4 100 A 14.00
## 5 100 A 13.60
## 6 100 A 11.10
## 7 100 A 8.93
## 8 100 A 9.66
## 9 100 B 6.14
## 10 100 B 6.14
## 11 100 B 3.68
## 12 100 B 10.00
## 13 100 B 10.40
## 14 100 B 11.60
## 15 100 B 5.49
## 16 100 B 5.80
## 17 75 A 5.20
## 18 75 A 13.20
## 19 75 A 5.20
## 20 75 A 8.39
## 21 75 A 7.18
## 22 75 A 10.40
## 23 75 A 6.37
## 24 75 A 7.18
## 25 75 B 4.47
## 26 75 B 4.95
## 27 75 B 9.96
## 28 75 B 6.49
## 29 75 B 5.75
## 30 75 B 5.44
## 31 75 B 1.80
## 32 75 B 9.90
## 33 50 A 11.11
## 34 50 A 10.50
## 35 50 A 9.74
## 36 50 A 14.60
## 37 50 A 18.80
## 38 50 A 11.11
## 39 50 A 9.74
## 40 50 A 11.80
## 41 50 B 9.63
## 42 50 B 14.50
## 43 50 B 6.38
## 44 50 B 10.20
## 45 50 B 13.40
## 46 50 B 17.70
## 47 50 B 14.50
## 48 50 B 12.30
con100 <- filter(df, df$c_agua == 100)
con100
## c_agua molusco cons_o
## 1 100 A 7.16
## 2 100 A 8.26
## 3 100 A 6.78
## 4 100 A 14.00
## 5 100 A 13.60
## 6 100 A 11.10
## 7 100 A 8.93
## 8 100 A 9.66
## 9 100 B 6.14
## 10 100 B 6.14
## 11 100 B 3.68
## 12 100 B 10.00
## 13 100 B 10.40
## 14 100 B 11.60
## 15 100 B 5.49
## 16 100 B 5.80
describeBy(con100$cons_o, con100$molusco)
##
## Descriptive statistics by group
## group: A
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 9.94 2.75 9.3 9.94 2.92 6.78 14 7.22 0.36 -1.64 0.97
## ------------------------------------------------------------
## group: B
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 7.41 2.84 6.14 7.41 2.31 3.68 11.6 7.92 0.28 -1.75 1.01
caja100<-ggplot(con100, aes(x = molusco, y = cons_o, fill = molusco)) +
stat_boxplot(geom = "errorbar", width = 0.25) + geom_boxplot() + scale_fill_hue(labels = c("Moslusco A", "Molusco B"))+ theme (text = element_text(size=8)) + ggtitle ("Consumo de oxigeno de los diferentes moluscos \n con una concentración de agua de 100%") + theme (plot.title = element_text(family="Comic Sans MS",
size=rel(2), vjust=2, face="bold", color="black",lineheight=1.5)) + labs(x = "Tipo de molusco",y = "Consumo de Oxigeno") + theme(axis.title = element_text(face="italic", colour="black", size=rel(1.5))) + theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="black", size=rel(1.5))) +
theme(axis.title.y = element_text(face="bold", vjust=1.5, colour="black", size=rel(1.5)))
caja100<-ggplotly(caja100)
caja100
con50 <- filter(df, df$c_agua == 50)
con50
## c_agua molusco cons_o
## 1 50 A 11.11
## 2 50 A 10.50
## 3 50 A 9.74
## 4 50 A 14.60
## 5 50 A 18.80
## 6 50 A 11.11
## 7 50 A 9.74
## 8 50 A 11.80
## 9 50 B 9.63
## 10 50 B 14.50
## 11 50 B 6.38
## 12 50 B 10.20
## 13 50 B 13.40
## 14 50 B 17.70
## 15 50 B 14.50
## 16 50 B 12.30
describeBy(con50$cons_o,con50$molusco)
##
## Descriptive statistics by group
## group: A
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 12.18 3.09 11.11 12.18 1.53 9.74 18.8 9.06 1.14 -0.2 1.09
## ------------------------------------------------------------
## group: B
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 12.33 3.52 12.85 12.33 3.19 6.38 17.7 11.32 -0.18 -1.2 1.24
caja50<-ggplot(con100, aes(x = molusco, y = cons_o, fill = molusco)) +
stat_boxplot(geom = "errorbar", width = 0.25) + geom_boxplot() + scale_fill_hue(labels = c("Moslusco A", "Molusco B")) +
stat_boxplot(geom = "errorbar", width = 0.25) + geom_boxplot() + scale_fill_hue(labels = c("Moslusco A", "Molusco B"))+ theme (text = element_text(size=8)) + ggtitle ("Consumo de oxigeno de los diferentes moluscos \n con una concentración de agua de 50%") + theme (plot.title = element_text(family="Comic Sans MS",
size=rel(2), vjust=2, face="bold", color="black",lineheight=1.5)) + labs(x = "Tipo de molusco",y = "Consumo de Oxigeno") + theme(axis.title = element_text(face="italic", colour="black", size=rel(1.5))) + theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="black", size=rel(1.5))) +
theme(axis.title.y = element_text(face="bold", vjust=1.5, colour="black", size=rel(1.5)))
caja50<-ggplotly(caja100)
caja50
con75 <- filter(df, df$c_agua == 75)
con75
## c_agua molusco cons_o
## 1 75 A 5.20
## 2 75 A 13.20
## 3 75 A 5.20
## 4 75 A 8.39
## 5 75 A 7.18
## 6 75 A 10.40
## 7 75 A 6.37
## 8 75 A 7.18
## 9 75 B 4.47
## 10 75 B 4.95
## 11 75 B 9.96
## 12 75 B 6.49
## 13 75 B 5.75
## 14 75 B 5.44
## 15 75 B 1.80
## 16 75 B 9.90
describeBy(con75$cons_o,con75$molusco)
##
## Descriptive statistics by group
## group: A
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 7.89 2.74 7.18 7.89 2.36 5.2 13.2 8 0.74 -0.9 0.97
## ------------------------------------------------------------
## group: B
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 8 6.1 2.74 5.6 6.1 1.5 1.8 9.96 8.16 0.17 -1.26 0.97
caja25<-ggplot(con100, aes(x = molusco, y = cons_o, fill = molusco)) +
stat_boxplot(geom = "errorbar", width = 0.25) + geom_boxplot() + scale_fill_hue(labels = c("Moslusco A", "Molusco B")) +
stat_boxplot(geom = "errorbar", width = 0.25) + geom_boxplot() + scale_fill_hue(labels = c("Moslusco A", "Molusco B"))+ theme (text = element_text(size=8)) + ggtitle ("Consumo de oxigeno de los diferentes moluscos \n con una concentración de agua de 25%") + theme (plot.title = element_text(family="Comic Sans MS",
size=rel(2), vjust=2, face="bold", color="black",lineheight=1.5)) + labs(x = "Tipo de molusco",y = "Consumo de Oxigeno") + theme(axis.title = element_text(face="italic", colour="black", size=rel(1.5))) + theme(axis.title.x = element_text(face="bold", vjust=-0.5, colour="black", size=rel(1.5))) +
theme(axis.title.y = element_text(face="bold", vjust=1.5, colour="black", size=rel(1.5)))
caja25<-ggplotly(caja100)
caja25
ggplot(df, aes(c_agua, cons_o)) + geom_point()+scale_x_continuous("Concentración del agua de mar") + scale_y_continuous("Consumo de Oxigeno") + geom_smooth() + ggtitle ("Comportamiento del Oxigeno en función de la concentración del agua de mar")
ggplot(df, aes(c_agua, cons_o,colour=molusco))+ geom_point()+scale_x_continuous("Concentración del agua de mar") + scale_y_continuous("Consumo de Oxigeno") + geom_smooth() + ggtitle ("Comportamiento del oxigeno en función de la concentración \n del agua de mar segun las especies de moluscos")
#### Es interesante ver que cuando la concentración de agua de mar es
mas baja, el consumo de oxigeno es mas elevado, el mínimo consumo se
obtiene cuando la concentración esta alrededor del 75%. Sin embargo,
existe un aumento en el consumo de oxigeno al tener una mayor
concentración, dicho aumento puede deberse al tipo de molusco, ambos no
crecen igual.
#Es importante definir la variable molusco y concentración del agua como variables categoricas antes de realizar la regresion lineal.
df$molusco=as.factor(df$molusco)
df$c_agua=as.factor(df$c_agua)
#Ahora si hacemos la regresión lineal.
modelo=lm(cons_o~c_agua:molusco, data=df)
summary(modelo)
##
## Call:
## lm(formula = cons_o ~ c_agua:molusco, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.946 -1.736 -0.710 2.237 6.625
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.4063 1.0466 7.077 1.13e-08 ***
## c_agua50:moluscoA 4.7687 1.4800 3.222 0.00246 **
## c_agua75:moluscoA 0.4837 1.4800 0.327 0.74541
## c_agua100:moluscoA 2.5300 1.4800 1.709 0.09476 .
## c_agua50:moluscoB 4.9200 1.4800 3.324 0.00185 **
## c_agua75:moluscoB -1.3113 1.4800 -0.886 0.38069
## c_agua100:moluscoB NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.96 on 42 degrees of freedom
## Multiple R-squared: 0.4226, Adjusted R-squared: 0.3539
## F-statistic: 6.149 on 5 and 42 DF, p-value: 0.0002324
ei=modelo$residuals
par(mfrow=c(2,2))
plot(modelo)
shapiro.test(ei)
##
## Shapiro-Wilk normality test
##
## data: ei
## W = 0.95824, p-value = 0.08571
require(agricolae)
comparacion=LSD.test(modelo,c("molusco","c_agua"))
comparacion
## $statistics
## MSerror Df Mean CV t.value LSD
## 8.762171 42 9.304792 31.8126 2.018082 2.986858
##
## $parameters
## test p.ajusted name.t ntr alpha
## Fisher-LSD none molusco:c_agua 6 0.05
##
## $means
## cons_o std r LCL UCL Min Max Q25 Q50 Q75
## A:100 9.93625 2.747976 8 7.824222 12.048278 6.78 14.00 7.9850 9.295 11.7250
## A:50 12.17500 3.090178 8 10.062972 14.287028 9.74 18.80 10.3100 11.110 12.5000
## A:75 7.89000 2.739578 8 5.777972 10.002028 5.20 13.20 6.0775 7.180 8.8925
## B:100 7.40625 2.844076 8 5.294222 9.518278 3.68 11.60 5.7225 6.140 10.1000
## B:50 12.32625 3.517909 8 10.214222 14.438278 6.38 17.70 10.0575 12.850 14.5000
## B:75 6.09500 2.739108 8 3.982972 8.207028 1.80 9.96 4.8300 5.595 7.3425
##
## $comparison
## NULL
##
## $groups
## cons_o groups
## B:50 12.32625 a
## A:50 12.17500 a
## A:100 9.93625 ab
## A:75 7.89000 bc
## B:100 7.40625 bc
## B:75 6.09500 c
##
## attr(,"class")
## [1] "group"
modelo=lm(cons_o~c_agua+molusco+c_agua:molusco, data=df)
anova(modelo)
## Analysis of Variance Table
##
## Response: cons_o
## Df Sum Sq Mean Sq F value Pr(>F)
## c_agua 2 230.82 115.408 13.1712 3.629e-05 ***
## molusco 1 23.23 23.227 2.6508 0.1110
## c_agua:molusco 2 15.36 7.678 0.8763 0.4238
## Residuals 42 368.01 8.762
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
load("~/Unidad2/RegresionLinealMultiple/Salinidad.RData")
dfsalinidad<-data.frame(Salinidad)
dfsalinidad
## Biomasa pH Salinidad Zinc Potasio
## 1 765.280 5.00 33 16.4524 1441.67
## 2 954.017 4.70 35 13.9852 1299.19
## 3 827.686 4.20 32 15.3276 1154.27
## 4 755.072 4.40 30 17.3128 1045.15
## 5 896.176 5.55 33 22.3312 521.62
## 6 1422.836 5.50 33 12.2778 1273.02
## 7 821.069 4.25 36 17.8225 1346.35
## 8 1008.804 4.45 30 14.3516 1253.88
## 9 1306.494 4.75 38 13.6826 1242.65
## 10 1039.637 4.60 30 11.7566 1282.95
## 11 1193.223 4.10 30 9.8820 553.69
## 12 777.474 3.45 37 16.6752 494.74
## 13 818.127 3.45 33 12.3730 526.97
## 14 1203.568 4.10 36 9.4058 571.14
## 15 977.515 3.50 30 14.9302 408.64
## 16 369.823 3.25 30 31.2865 646.65
## 17 509.872 3.25 27 30.1652 514.03
## 18 448.315 3.20 29 28.5901 350.73
## 19 615.091 3.35 34 17.8795 496.29
## 20 545.538 3.30 36 18.5056 580.92
## 21 436.552 3.25 30 22.1344 535.82
## 22 465.907 3.25 28 28.6101 490.34
## 23 664.601 3.20 31 23.1908 552.39
## 24 502.466 3.20 31 24.6917 661.32
## 25 496.797 3.35 35 22.6758 672.12
## 26 2270.294 7.10 29 0.3729 525.65
## 27 2332.220 7.35 35 0.2703 563.13
## 28 2162.531 7.45 35 0.3205 497.96
## 29 2222.588 7.45 30 0.2648 458.38
## 30 2337.326 7.40 30 0.2105 498.25
## 31 1349.192 4.85 26 18.9875 936.26
## 32 1058.976 4.60 29 20.9687 894.79
## 33 1408.206 5.20 25 23.9841 941.36
## 34 1491.276 4.75 26 19.9727 1038.79
## 35 1254.872 5.20 26 21.3864 898.05
## 36 1152.341 4.55 25 23.7063 989.87
## 37 568.455 3.95 26 30.5589 951.28
## 38 612.447 3.70 26 26.8415 929.83
## 39 654.825 3.75 27 27.7292 925.42
## 40 991.829 4.15 27 21.5699 954.11
## 41 1895.942 5.60 24 19.6531 720.72
## 42 1346.880 5.35 27 20.3295 782.09
## 43 1482.793 5.50 26 19.5880 773.30
## 44 1145.643 5.50 28 20.1328 829.26
## 45 1137.193 5.40 28 19.2420 856.96
cor(dfsalinidad)
## Biomasa pH Salinidad Zinc Potasio
## Biomasa 1.00000000 0.92810235 -0.06657756 -0.78146249 -0.07319518
## pH 0.92810235 1.00000000 -0.04458851 -0.72046995 0.03236212
## Salinidad -0.06657756 -0.04458851 1.00000000 -0.42663388 -0.01963288
## Zinc -0.78146249 -0.72046995 -0.42663388 1.00000000 0.07877268
## Potasio -0.07319518 0.03236212 -0.01963288 0.07877268 1.00000000
ggpairs(select_if(dfsalinidad, is.numeric), lower = list(continuous = "smooth"),diag = list(continuous = "barDiag"), axisLabels = "none")+theme_economist()
ga=ggplot(dfsalinidad, aes(pH, Biomasa))+ geom_point()+scale_x_continuous("pH") + scale_y_continuous("Biomasa") + geom_smooth()
gb=ggplot(dfsalinidad, aes(Salinidad, Biomasa))+ geom_point()+scale_x_continuous("Salinidad") + scale_y_continuous("Biomasa") + geom_smooth()
gc=ggplot(dfsalinidad, aes(Zinc, Biomasa))+ geom_point()+scale_x_continuous("Zinc") + scale_y_continuous("Biomasa") + geom_smooth()
gd=ggplot(dfsalinidad, aes(Potasio, Biomasa))+ geom_point()+scale_x_continuous("Potasio") + scale_y_continuous("Biomasa") + geom_smooth()
ggarrange(ga , gb, gc, gd, labels = c("Biomasa vs pH", "Biomasa vs Salinidad", "Biomasa vs Zinc", "Biomasa vs Potasio" ), ncol = 2, nrow = 2)
#### Se encontro una fuerte correlación positivaentre la biomasa y el
pH, una correlación fuerte negativa con el Zinc y a su vez una ligera
correlación con la Salinidad. Por otra parte no hay una correlación
definida con el potasio.
modelo_biomasa=lm(Biomasa~.,data=dfsalinidad)
summary(modelo_biomasa)
##
## Call:
## lm(formula = Biomasa ~ ., data = dfsalinidad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -293.98 -88.83 -9.48 88.20 387.27
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1492.8076 453.6013 3.291 0.002091 **
## pH 262.8829 33.7304 7.794 1.51e-09 ***
## Salinidad -33.4997 8.6525 -3.872 0.000391 ***
## Zinc -28.9727 5.6643 -5.115 8.20e-06 ***
## Potasio -0.1150 0.0819 -1.404 0.167979
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 158.9 on 40 degrees of freedom
## Multiple R-squared: 0.9231, Adjusted R-squared: 0.9154
## F-statistic: 120 on 4 and 40 DF, p-value: < 2.2e-16
modelo_biomasa_final=step(modelo_biomasa,direction = "both")
## Start: AIC=460.84
## Biomasa ~ pH + Salinidad + Zinc + Potasio
##
## Df Sum of Sq RSS AIC
## <none> 1009974 460.84
## - Potasio 1 49785 1059759 461.01
## - Salinidad 1 378486 1388460 473.17
## - Zinc 1 660588 1670562 481.49
## - pH 1 1533665 2543639 500.41
summary(modelo_biomasa_final)
##
## Call:
## lm(formula = Biomasa ~ pH + Salinidad + Zinc + Potasio, data = dfsalinidad)
##
## Residuals:
## Min 1Q Median 3Q Max
## -293.98 -88.83 -9.48 88.20 387.27
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1492.8076 453.6013 3.291 0.002091 **
## pH 262.8829 33.7304 7.794 1.51e-09 ***
## Salinidad -33.4997 8.6525 -3.872 0.000391 ***
## Zinc -28.9727 5.6643 -5.115 8.20e-06 ***
## Potasio -0.1150 0.0819 -1.404 0.167979
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 158.9 on 40 degrees of freedom
## Multiple R-squared: 0.9231, Adjusted R-squared: 0.9154
## F-statistic: 120 on 4 and 40 DF, p-value: < 2.2e-16
ei1=modelo_biomasa_final$residuals
par(mfrow=c(2,2))
plot(modelo_biomasa_final)
shapiro.test(ei1)
##
## Shapiro-Wilk normality test
##
## data: ei1
## W = 0.96586, p-value = 0.2036