library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(dslabs)
data("murders")

#Graficos de Dispersion 
murders %>% ggplot(aes(x=log10(population/10^6),y=log10(total),color=region,shape =region ))+
  geom_point(show.legend = FALSE)+facet_wrap(~region,nrow = 2)

# Coeficiente de correlacion de pearson

murders %>% summarise(cor(log10(population/10^6),log10(total),method = "pearson"))
##   cor(log10(population/10^6), log10(total), method = "pearson")
## 1                                                     0.8886064
northeast <- murders %>%
  filter(region == "Northeast") %>%              
  mutate(log10pop = log10(population / 10^6), log10tot = log10(total)) %>%            
  dplyr::select(population, log10pop, total, log10tot)
northeast
##   population    log10pop total log10tot
## 1    3574097  0.55316633    97 1.986772
## 2    1328361  0.12331612    11 1.041393
## 3    6547629  0.81608406   118 2.071882
## 4    1316470  0.11941097     5 0.698970
## 5    8791894  0.94408244   246 2.390935
## 6   19378102  1.28731124   517 2.713491
## 7   12702379  1.10388507   457 2.659916
## 8    1052567  0.02224975    16 1.204120
## 9     625741 -0.20360539     2 0.301030
# Filtrar y crear el modelo
modelo <- northeast %>%
  dplyr::select(log10pop, log10tot) %>%  # Uso explícito de dplyr::select
  lm(log10tot ~ log10pop, data = .)  # Uso de la función lm de stats

# Mostrar el resumen del modelo
sumary<-summary(modelo)
intercept<- modelo[["coefficients"]][["(Intercept)"]]
slope<-modelo[["coefficients"]][["log10pop"]]

¿como interpretar los coeficientes? ¿considera que el modelo significaitvo y porque? ¿como es el ajuste del modelo?

northeast %>% ggplot(aes(x=log10pop,y=log10tot)) +
  geom_point() + ggtitle("Regresion linear model") +
  geom_smooth(method = "lm",color="blue")+geom_text(aes(label = 
                                                          paste("y = ",round(slope,2),"x + ",round(intercept,2)),x=0.1,y=3))
## Warning in geom_text(aes(label = paste("y = ", round(slope, 2), "x + ", : All aesthetics have length 1, but the data has 9 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## `geom_smooth()` using formula = 'y ~ x'

ei<-residuals(modelo);ei
##           1           2           3           4           5           6 
##  0.27453592  0.01986875 -0.06282747 -0.31627887  0.05054933 -0.17841806 
##           7           8           9 
##  0.06274895  0.34499620 -0.19517475
plot(ei)

pred<-sort(fitted(modelo));pred
##         9         8         4         2         1         3         5         7 
## 0.4962047 0.8591238 1.0152489 1.0215239 1.7122358 2.1347095 2.3403858 2.5971672 
##         6 
## 2.8919086
plot(cooks.distance(modelo))

##Validacion del Modelo 
t.test(ei)
## 
##  One Sample t-test
## 
## data:  ei
## t = -1.3876e-16, df = 8, p-value = 1
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -0.1664627  0.1664627
## sample estimates:
##     mean of x 
## -1.001682e-17
#p valor es mayor que el nivel de significancia acepto la hipotesis nula 
shapiro.test(ei)
## 
##  Shapiro-Wilk normality test
## 
## data:  ei
## W = 0.96004, p-value = 0.7988
# Instalar el paquete lmtest si no lo tienes instalado
#install.packages("lmtest")

# Cargar el paquete lmtest
library(lmtest)
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
bptest(modelo)
## 
##  studentized Breusch-Pagan test
## 
## data:  modelo
## BP = 2.3144, df = 1, p-value = 0.1282
dwtest(modelo,alternative="two.sided")
## 
##  Durbin-Watson test
## 
## data:  modelo
## DW = 2.0058, p-value = 0.864
## alternative hypothesis: true autocorrelation is not 0