Proyecto final

# Librerías empleadas
library(readr)
library(stargazer)
library(lmtest)
library(car)
library(ggplot2)

#base de datos transformada a Log
RM <- read_csv("/cloud/project/Trabajo/RM.csv")

#Resumen de la base de datos 
summary(RM)

##  Longitud (mm)      Peso (gr)      
##  Min.   :0.5444   Min.   :-2.6737  
##  1st Qu.:0.7465   1st Qu.:-2.2772  
##  Median :0.9571   Median :-1.7746  
##  Mean   :1.0167   Mean   :-1.6044  
##  3rd Qu.:1.3434   3rd Qu.:-0.9316  
##  Max.   :1.5646   Max.   :-0.3826

#Convertimos RM a Localidad1 y calculamos la correlación
Localidad1<-RM
cor(Localidad1)

##               Longitud (mm) Peso (gr)
## Longitud (mm)     1.0000000 0.9648949
## Peso (gr)         0.9648949 1.0000000

#Histogramas 

H1peso <- ggplot(Localidad1, aes(x=`Peso (gr)`)) + 
  geom_histogram(fill='tomato') + theme_classic()
H1peso

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

H2longitud<- ggplot(Localidad1, aes(x=`Longitud (mm)`)) + 
  geom_histogram(fill='tomato') + theme_classic()
H2longitud

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Normalidad de las variables 
shapiro.test(Localidad1$`Longitud (mm)`)

## 
##  Shapiro-Wilk normality test
## 
## data:  Localidad1$`Longitud (mm)`
## W = 0.92292, p-value = 0.1456

shapiro.test(Localidad1$`Peso (gr)`)

## 
##  Shapiro-Wilk normality test
## 
## data:  Localidad1$`Peso (gr)`
## W = 0.90206, p-value = 0.06242

# Regresión lineal simple 
Modelo1<-lm(`Longitud (mm)`~`Peso (gr)`, data = Localidad1)
summary(Modelo1)

## 
## Call:
## lm(formula = `Longitud (mm)` ~ `Peso (gr)`, data = Localidad1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.16222 -0.05007  0.01406  0.06030  0.16434 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.65766    0.04843   34.23  < 2e-16 ***
## `Peso (gr)`  0.39951    0.02719   14.70 1.04e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0893 on 16 degrees of freedom
## Multiple R-squared:  0.931,  Adjusted R-squared:  0.9267 
## F-statistic:   216 on 1 and 16 DF,  p-value: 1.039e-10

# Supuestos
# 1- Normalidad 
shapiro.test(Modelo1$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  Modelo1$residuals
## W = 0.97795, p-value = 0.9265

# 2-Independencia
dwtest(Modelo1)

## 
##  Durbin-Watson test
## 
## data:  Modelo1
## DW = 1.1008, p-value = 0.01413
## alternative hypothesis: true autocorrelation is greater than 0

# Varianza constante 
ncvTest(Modelo1)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 1.312749, Df = 1, p = 0.2519

lineal<-ggplot(Modelo1, aes(`Longitud (mm)`, `Peso (gr)`)) +

  geom_point() +
  geom_smooth(method = lm) +
  theme_classic() + xlab("LogLongitud (mm)") + ylab("LogPeso (gr)") +
  geom_text(x = 0.8, y = -0.6, aes(label = paste("y=", "1.65766", " + ", "0.39951", "(x)"))) +
  geom_text(x = 0.8, y = -0.4, aes(label = paste("R^2=0.93"))) 

lineal

## `geom_smooth()` using formula = 'y ~ x'

#con esta función podemos graficar la tendencia de los residuales y el qqnorm
plot(Modelo1)

Realizamos el mismo ejercicio para la segunda localidad

# Arroyo Pitalito (AP)
AP <- read_csv("/cloud/project/Trabajo/AP.csv")

## Rows: 13 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): Longitud (mm), Peso (gr)
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

#Resumen de la base de datos 
summary(AP)

##  Longitud (mm)      Peso (gr)      
##  Min.   :0.5999   Min.   :-2.6615  
##  1st Qu.:0.8396   1st Qu.:-1.9996  
##  Median :1.1021   Median :-1.0626  
##  Mean   :1.0809   Mean   :-1.3107  
##  3rd Qu.:1.3580   3rd Qu.:-0.5683  
##  Max.   :1.5927   Max.   :-0.2813

#Convertimos RM a Localidad2 y calculamos la correlación
Localidad2<-AP
cor(Localidad2)

##               Longitud (mm) Peso (gr)
## Longitud (mm)     1.0000000 0.9713897
## Peso (gr)         0.9713897 1.0000000

#Histogramas 

H3peso <- ggplot(Localidad2, aes(x=`Peso (gr)`)) + 
  geom_histogram(fill='tomato') + theme_classic()
H3peso

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

H4longitud<- ggplot(Localidad2, aes(x=`Longitud (mm)`)) + 
  geom_histogram(fill='tomato') + theme_classic()
H4longitud

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Normalidad de las variables 
shapiro.test(Localidad2$`Longitud (mm)`)

## 
##  Shapiro-Wilk normality test
## 
## data:  Localidad2$`Longitud (mm)`
## W = 0.94265, p-value = 0.4921

shapiro.test(Localidad2$`Peso (gr)`)

## 
##  Shapiro-Wilk normality test
## 
## data:  Localidad2$`Peso (gr)`
## W = 0.90064, p-value = 0.1364

# Regresión lineal simple 
Modelo2<-lm(`Longitud (mm)`~`Peso (gr)`, data = Localidad2)
summary(Modelo2)

## 
## Call:
## lm(formula = `Longitud (mm)` ~ `Peso (gr)`, data = Localidad2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.11715 -0.06193  0.00755  0.04598  0.13078 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.56602    0.04230   37.02 6.74e-13 ***
## `Peso (gr)`  0.37011    0.02728   13.57 3.27e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08149 on 11 degrees of freedom
## Multiple R-squared:  0.9436, Adjusted R-squared:  0.9385 
## F-statistic:   184 on 1 and 11 DF,  p-value: 3.267e-08

# Supuestos
# 1- Normalidad 
shapiro.test(Modelo2$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  Modelo2$residuals
## W = 0.95923, p-value = 0.7417

# 2-Independencia
dwtest(Modelo2)

## 
##  Durbin-Watson test
## 
## data:  Modelo2
## DW = 1.9728, p-value = 0.4452
## alternative hypothesis: true autocorrelation is greater than 0

# Varianza constante 
ncvTest(Modelo2)

## Non-constant Variance Score Test 
## Variance formula: ~ fitted.values 
## Chisquare = 1.910682, Df = 1, p = 0.16689

lineal<-ggplot(Modelo2, aes(`Longitud (mm)`, `Peso (gr)`)) +

  geom_point() +
  geom_smooth(method = lm) +
  theme_classic() + xlab("LogLongitud (mm)") + ylab("LogPeso (gr)") +
  geom_text(x = 0.8, y = -0.6, aes(label = paste("y=", "1.56602", " + ", "0.37011", "(x)"))) +
  geom_text(x = 0.8, y = -0.4, aes(label = paste("R^2=0.93"))) 

lineal

## `geom_smooth()` using formula = 'y ~ x'

#con esta función podemos graficar la tendencia de los residuales y el qqnorm
plot(Modelo2)

#Librería
library(plyr)

LL <- read_csv("/cloud/project/Trabajo/LL.csv")

## Rows: 18 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): AP, RM
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Normalidad
shapiro.test(LL$AP)

## 
##  Shapiro-Wilk normality test
## 
## data:  LL$AP
## W = 0.66059, p-value = 2.879e-05

# Establecemos diferencias significativas entre los Y estimados de cada localidad
kruskal.test(AP~RM, data = LL)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  AP by RM
## Kruskal-Wallis chi-squared = 17, df = 17, p-value = 0.4544

Proyecto final

Edwin Bottia, Carlos Taboada, Mario Pinzón

2023-06-17

Realizamos el mismo ejercicio para la segunda localidad