==========================================================================================================================================

Tamaulipas

==========================================================================================================================================

# Librerías necesarias
library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(magrittr)
library(ggplot2)
library(BSDA)
## Loading required package: lattice
## 
## Attaching package: 'BSDA'
## The following object is masked from 'package:datasets':
## 
##     Orange
database_norte <- read_excel("noreste.xlsx")
## New names:
## • `` -> `...1`

==========================================================================================================================================

Pregunta #1

==========================================================================================================================================

# Filtrar solo Tamaulipas
Tamaulipas <- subset(database_norte, Estado == "Tamaulipas")
Tamaulipas_urbano <- subset(Tamaulipas, localidad == "U")
Tamaulipas_rural <- subset(Tamaulipas, localidad == "R")
# Pruebas z y intervalos de confianza para ingreso corriente

# Prueba Z para urbano
z.test(Tamaulipas_urbano$ing_cor, sigma.x = sd(Tamaulipas_urbano$ing_cor))
## 
##  One-sample z-Test
## 
## data:  Tamaulipas_urbano$ing_cor
## z = 32.383, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  61171.56 69053.39
## sample estimates:
## mean of x 
##  65112.47
# Prueba Z para rural
z.test(Tamaulipas_rural$ing_cor, sigma.x = sd(Tamaulipas_rural$ing_cor))
## 
##  One-sample z-Test
## 
## data:  Tamaulipas_rural$ing_cor
## z = 17.338, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  34958.62 43869.73
## sample estimates:
## mean of x 
##  39414.18
# Tabla con intervalos de confianza para cada zona
IC <- tapply(Tamaulipas$ing_cor, list(Tamaulipas$localidad),
             function(x) z.test(x, sigma.x = sd(x))$conf.int)

IC_df <- data.frame(
  inferior = sapply(IC, function(x) x[1]),
  superior = sapply(IC, function(x) x[2]),
  names = c("R", "U")
)

# Gráfica de Intervalos de Confianza
options(scipen=999) 
plot(NA, xlim = c(0, 100000), ylim = c(1,7),
     ylab = "localidad", xlab = "ingresos")

# Zona rural
arrows(IC_df[1,1], 2, IC_df[1,2], 2,
       code = 3, angle = 90, col = "blue", lwd = 1, cex = 0.7)

# Zona urbana
arrows(IC_df[2,1], 5, IC_df[2,2], 5,
       code = 3, angle = 90, col = "green", lwd = 1, cex = 0.7)

# Etiquetas
text(1, 2, "R", col = "blue", cex = 0.7)
text(1, 5, "U", col = "green", cex = 0.7)

==========================================================================================================================================

Pregunta #2

==========================================================================================================================================

shapiro.test(Tamaulipas_rural$gasto_mon)
## 
##  Shapiro-Wilk normality test
## 
## data:  Tamaulipas_rural$gasto_mon
## W = 0.75596, p-value = 0.00000000000004382
shapiro.test(Tamaulipas_urbano$gasto_mon)
## 
##  Shapiro-Wilk normality test
## 
## data:  Tamaulipas_urbano$gasto_mon
## W = 0.87097, p-value < 0.00000000000000022
# Z test para zona urbana
z.test(Tamaulipas_urbano$gasto_mon, sigma.x=sd(Tamaulipas_urbano$gasto_mon))
## 
##  One-sample z-Test
## 
## data:  Tamaulipas_urbano$gasto_mon
## z = 31.489, p-value < 0.00000000000000022
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  38037.40 43086.76
## sample estimates:
## mean of x 
##  40562.08
# Z test para zona rural
z.test(Tamaulipas_rural$gasto_mon, sigma.x=sd(Tamaulipas_rural$gasto_mon))
## 
##  One-sample z-Test
## 
## data:  Tamaulipas_rural$gasto_mon
## z = 15.321, p-value < 0.00000000000000022
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  21969.64 28415.07
## sample estimates:
## mean of x 
##  25192.36

==========================================================================================================================================

Pregunta #3

==========================================================================================================================================

# Hipótesis para Tamaulipas
# H0: μ = 15766.92  (el ingreso promedio en Tamaulipas es igual al salario mínimo trimestral)
# H1: μ > 15766.92  (el ingreso promedio en Tamaulipas es mayor al salario mínimo trimestral)

miu <- 15766.92  # salario mínimo trimestral en 2022

# Cálculo desviación estándar de la muestra para ingreso corriente
sTam <- sd(Tamaulipas$ing_cor)

# Prueba Z para una muestra
z.test(Tamaulipas$ing_cor, 
       mu = miu, 
       alternative = "greater", 
       sigma.x = sTam)  # usamos desviación muestral (porque no conocemos la poblacional)
## 
##  One-sample z-Test
## 
## data:  Tamaulipas$ing_cor
## z = 25.625, p-value < 0.00000000000000022
## alternative hypothesis: true mean is greater than 15766.92
## 95 percent confidence interval:
##  56067.39       NA
## sample estimates:
## mean of x 
##  58831.65