# Librerías empleadas
library(readr)
library(stargazer)
library(lmtest)
library(car)
library(ggplot2)
#base de datos transformada a Log
RM <- read_csv("/cloud/project/Trabajo/RM.csv")
#Resumen de la base de datos
summary(RM)
## Longitud (mm) Peso (gr)
## Min. :0.5444 Min. :-2.6737
## 1st Qu.:0.7465 1st Qu.:-2.2772
## Median :0.9571 Median :-1.7746
## Mean :1.0167 Mean :-1.6044
## 3rd Qu.:1.3434 3rd Qu.:-0.9316
## Max. :1.5646 Max. :-0.3826
#Convertimos RM a Localidad1 y calculamos la correlación
Localidad1<-RM
cor(Localidad1)
## Longitud (mm) Peso (gr)
## Longitud (mm) 1.0000000 0.9648949
## Peso (gr) 0.9648949 1.0000000
#Histogramas
H1peso <- ggplot(Localidad1, aes(x=`Peso (gr)`)) +
geom_histogram(fill='tomato') + theme_classic()
H1peso
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

H2longitud<- ggplot(Localidad1, aes(x=`Longitud (mm)`)) +
geom_histogram(fill='tomato') + theme_classic()
H2longitud
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Normalidad de las variables
shapiro.test(Localidad1$`Longitud (mm)`)
##
## Shapiro-Wilk normality test
##
## data: Localidad1$`Longitud (mm)`
## W = 0.92292, p-value = 0.1456
shapiro.test(Localidad1$`Peso (gr)`)
##
## Shapiro-Wilk normality test
##
## data: Localidad1$`Peso (gr)`
## W = 0.90206, p-value = 0.06242
# Regresión lineal simple
Modelo1<-lm(`Longitud (mm)`~`Peso (gr)`, data = Localidad1)
summary(Modelo1)
##
## Call:
## lm(formula = `Longitud (mm)` ~ `Peso (gr)`, data = Localidad1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.16222 -0.05007 0.01406 0.06030 0.16434
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.65766 0.04843 34.23 < 2e-16 ***
## `Peso (gr)` 0.39951 0.02719 14.70 1.04e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.0893 on 16 degrees of freedom
## Multiple R-squared: 0.931, Adjusted R-squared: 0.9267
## F-statistic: 216 on 1 and 16 DF, p-value: 1.039e-10
# Supuestos
# 1- Normalidad
shapiro.test(Modelo1$residuals)
##
## Shapiro-Wilk normality test
##
## data: Modelo1$residuals
## W = 0.97795, p-value = 0.9265
# 2-Independencia
dwtest(Modelo1)
##
## Durbin-Watson test
##
## data: Modelo1
## DW = 1.1008, p-value = 0.01413
## alternative hypothesis: true autocorrelation is greater than 0
# Varianza constante
ncvTest(Modelo1)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 1.312749, Df = 1, p = 0.2519
lineal<-ggplot(Modelo1, aes(`Longitud (mm)`, `Peso (gr)`)) +
geom_point() +
geom_smooth(method = lm) +
theme_classic() + xlab("LogLongitud (mm)") + ylab("LogPeso (gr)") +
geom_text(x = 0.8, y = -0.6, aes(label = paste("y=", "1.65766", " + ", "0.39951", "(x)"))) +
geom_text(x = 0.8, y = -0.4, aes(label = paste("R^2=0.93")))
lineal
## `geom_smooth()` using formula = 'y ~ x'

#con esta función podemos graficar la tendencia de los residuales y el qqnorm
plot(Modelo1)




Realizamos el mismo ejercicio para la segunda localidad
# Arroyo Pitalito (AP)
AP <- read_csv("/cloud/project/Trabajo/AP.csv")
## Rows: 13 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): Longitud (mm), Peso (gr)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Resumen de la base de datos
summary(AP)
## Longitud (mm) Peso (gr)
## Min. :0.5999 Min. :-2.6615
## 1st Qu.:0.8396 1st Qu.:-1.9996
## Median :1.1021 Median :-1.0626
## Mean :1.0809 Mean :-1.3107
## 3rd Qu.:1.3580 3rd Qu.:-0.5683
## Max. :1.5927 Max. :-0.2813
#Convertimos RM a Localidad2 y calculamos la correlación
Localidad2<-AP
cor(Localidad2)
## Longitud (mm) Peso (gr)
## Longitud (mm) 1.0000000 0.9713897
## Peso (gr) 0.9713897 1.0000000
#Histogramas
H3peso <- ggplot(Localidad2, aes(x=`Peso (gr)`)) +
geom_histogram(fill='tomato') + theme_classic()
H3peso
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

H4longitud<- ggplot(Localidad2, aes(x=`Longitud (mm)`)) +
geom_histogram(fill='tomato') + theme_classic()
H4longitud
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Normalidad de las variables
shapiro.test(Localidad2$`Longitud (mm)`)
##
## Shapiro-Wilk normality test
##
## data: Localidad2$`Longitud (mm)`
## W = 0.94265, p-value = 0.4921
shapiro.test(Localidad2$`Peso (gr)`)
##
## Shapiro-Wilk normality test
##
## data: Localidad2$`Peso (gr)`
## W = 0.90064, p-value = 0.1364
# Regresión lineal simple
Modelo2<-lm(`Longitud (mm)`~`Peso (gr)`, data = Localidad2)
summary(Modelo2)
##
## Call:
## lm(formula = `Longitud (mm)` ~ `Peso (gr)`, data = Localidad2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.11715 -0.06193 0.00755 0.04598 0.13078
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.56602 0.04230 37.02 6.74e-13 ***
## `Peso (gr)` 0.37011 0.02728 13.57 3.27e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08149 on 11 degrees of freedom
## Multiple R-squared: 0.9436, Adjusted R-squared: 0.9385
## F-statistic: 184 on 1 and 11 DF, p-value: 3.267e-08
# Supuestos
# 1- Normalidad
shapiro.test(Modelo2$residuals)
##
## Shapiro-Wilk normality test
##
## data: Modelo2$residuals
## W = 0.95923, p-value = 0.7417
# 2-Independencia
dwtest(Modelo2)
##
## Durbin-Watson test
##
## data: Modelo2
## DW = 1.9728, p-value = 0.4452
## alternative hypothesis: true autocorrelation is greater than 0
# Varianza constante
ncvTest(Modelo2)
## Non-constant Variance Score Test
## Variance formula: ~ fitted.values
## Chisquare = 1.910682, Df = 1, p = 0.16689
lineal<-ggplot(Modelo2, aes(`Longitud (mm)`, `Peso (gr)`)) +
geom_point() +
geom_smooth(method = lm) +
theme_classic() + xlab("LogLongitud (mm)") + ylab("LogPeso (gr)") +
geom_text(x = 0.8, y = -0.6, aes(label = paste("y=", "1.56602", " + ", "0.37011", "(x)"))) +
geom_text(x = 0.8, y = -0.4, aes(label = paste("R^2=0.93")))
lineal
## `geom_smooth()` using formula = 'y ~ x'

#con esta función podemos graficar la tendencia de los residuales y el qqnorm
plot(Modelo2)




#Librería
library(plyr)
LL <- read_csv("/cloud/project/Trabajo/LL.csv")
## Rows: 18 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): AP, RM
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Normalidad
shapiro.test(LL$AP)
##
## Shapiro-Wilk normality test
##
## data: LL$AP
## W = 0.66059, p-value = 2.879e-05
# Establecemos diferencias significativas entre los Y estimados de cada localidad
kruskal.test(AP~RM, data = LL)
##
## Kruskal-Wallis rank sum test
##
## data: AP by RM
## Kruskal-Wallis chi-squared = 17, df = 17, p-value = 0.4544