This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
#Estadistica Inferencial
#22/01/2026
#Cargar Datos
# =========================
# ESTADÍSTICA INFERENCIAL
# Fecha: 22/01/2026
# =========================
library(gt)
library(dplyr)
# -------------------------
# Cargar datos
# -------------------------
setwd("C:/Users/Alexander/Downloads")
datos <- read.csv(
"soil_pollution_diseases.csv",
sep = ",",
stringsAsFactors = FALSE
)
datos$Date_Reported <- as.Date(datos$Date_Reported, format = "%Y-%m-%d")
# Convertir la columna a fecha (día/mes/año)
datos$Date_Reported <- as.Date(datos$Date_Reported, format = "%d/%m/%Y")
# Extraemos la variable discreta (Dia)
Dia <- as.integer(format(datos$Date_Reported, "%d"))
# TABLA DE DISTRIBUCIÓN DE FRECUENCIAS DE MESES
TDF_dia <- table(Dia)
Tabla_dia <- as.data.frame(TDF_dia)
# Frecuencia absoluta y relativa
ni <- Tabla_dia$Freq
hi <- round((ni / sum(ni)) * 100, 2)
Tabla_dia_final <- data.frame(
Dia = as.character(Tabla_dia$Dia),
ni = ni,
`hi (%)` = hi
)
# Fila TOTAL
Total <- data.frame(
Dia = "Total",
ni = sum(ni),
`hi (%)` = 100
)
TDF_dia_Total <- rbind(Tabla_dia_final, Total)
# =========================
# FORMATO gt
# =========================
tabla_dia_gt <- TDF_dia_Total %>%
gt() %>%
tab_header(
title = md("*Tabla N° 3*"),
subtitle = md("**Tabla de distribución de frecuencias de los dias del estudio**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(rows = Dia == "Total")
)
tabla_dia_gt
| Tabla N° 3 | ||
| Tabla de distribución de frecuencias de los dias del estudio | ||
| Dia | ni | hi.... |
|---|---|---|
| 1 | 109 | 3.63 |
| 2 | 99 | 3.30 |
| 3 | 107 | 3.57 |
| 4 | 92 | 3.07 |
| 5 | 91 | 3.03 |
| 6 | 96 | 3.20 |
| 7 | 97 | 3.23 |
| 8 | 99 | 3.30 |
| 9 | 84 | 2.80 |
| 10 | 103 | 3.43 |
| 11 | 86 | 2.87 |
| 12 | 103 | 3.43 |
| 13 | 100 | 3.33 |
| 14 | 101 | 3.37 |
| 15 | 86 | 2.87 |
| 16 | 115 | 3.83 |
| 17 | 102 | 3.40 |
| 18 | 92 | 3.07 |
| 19 | 87 | 2.90 |
| 20 | 107 | 3.57 |
| 21 | 89 | 2.97 |
| 22 | 104 | 3.47 |
| 23 | 81 | 2.70 |
| 24 | 115 | 3.83 |
| 25 | 114 | 3.80 |
| 26 | 101 | 3.37 |
| 27 | 114 | 3.80 |
| 28 | 86 | 2.87 |
| 29 | 95 | 3.17 |
| 30 | 98 | 3.27 |
| 31 | 47 | 1.57 |
| Total | 3000 | 100.00 |
| Autor: Grupo 3 | ||
# Criterio de agrupación por semanas (1, 2, 3, 4)
Semana <- sapply(Dia, function(d) {
if (d >= 1 & d <= 7) {
1
} else if (d >= 8 & d <= 14) {
2
} else if (d >= 15 & d <= 21) {
3
} else {
4
}
})
Semana <- factor(Semana, levels = c(1, 2, 3, 4))
# Tabla de frecuencias por semanas numéricas
TDF_Semana <- table(Semana)
Tabla_Semana <- as.data.frame(TDF_Semana)
# Frecuencias relativas (porcentaje)
hi <- (Tabla_Semana$Freq / sum(Tabla_Semana$Freq)) * 100
ni <- Tabla_Semana$Freq
Tabla_Semana <- data.frame(Tabla_Semana, hi)
# Acumuladas
Niasc <- cumsum(ni)
Hiasc <- cumsum(hi)
Nidsc <- rev(cumsum(rev(ni)))
Hidsc <- rev(cumsum(rev(hi)))
Tabla_semana_final <- data.frame(
Semana = Tabla_Semana$Semana,
ni = Tabla_Semana$Freq,
`hi (%)` = round(Tabla_Semana$hi, 2),
Ni_asc = Niasc,
`Hi_asc(%)` = round(Hiasc, 2),
Ni_dsc = Nidsc,
`Hi_dsc(%)` = round(Hidsc, 2)
)
# Totales
totales <- data.frame(
Semana = "Total",
ni = sum(ni),
`hi (%)` = 100,
Ni_asc = "-",
`Hi_asc(%)` = "-",
Ni_dsc = "-",
`Hi_dsc(%)` = "-",
stringsAsFactors = FALSE
)
TDF_semanas_total <- rbind(Tabla_semana_final, totales)
# Formato gt para semanas
tabla_semanas_gt <- TDF_semanas_total %>%
gt() %>%
tab_header(
title = md("*Tabla N°6*"),
subtitle = md("**Distribución de frecuencias de los días agrupados por semanas**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Semana == "Total"
)
)
tabla_semanas_gt
| Tabla N°6 | ||||||
| Distribución de frecuencias de los días agrupados por semanas | ||||||
| Semana | ni | hi.... | Ni_asc | Hi_asc... | Ni_dsc | Hi_dsc... |
|---|---|---|---|---|---|---|
| 1 | 691 | 23.03 | 691 | 23.03 | 3000 | 100 |
| 2 | 676 | 22.53 | 1367 | 45.57 | 2309 | 76.97 |
| 3 | 678 | 22.60 | 2045 | 68.17 | 1633 | 54.43 |
| 4 | 955 | 31.83 | 3000 | 100 | 955 | 31.83 |
| Total | 3000 | 100.00 | - | - | - | - |
| Autor: Grupo 3 | ||||||
# Gráfica — Porcentaje por Mes
barplot(hi,
main = "Gráfica N°9: Distribución porcentual de la contaminación
del suelo según las semanas",
xlab = "Semana ",
ylab = "%",
col = "lightgreen",
names.arg = Tabla_Semana$Semana,
las = 1,
cex.names = 0.8)
#Conjetura del modelo: mi variable y sus barras se comportan como el modelo Uniforme
# =========================
# MODELO Geometrico (1–4)
# =========================
x <- ni
hi_1_4 <- x / sum(x)
X <- 1:length(hi_1_4)
P_uniforme <- rep(1 / length(X), length(X))
barplot(
rbind(hi_1_4, P_uniforme),
beside = TRUE,
col = c("lightgreen", "gray"),
names.arg = 1:4,
main = "Gráfica N°XX: Comparación modelo vs la realidad,
de las semanas de la contaminación del suelo
y sus efectos en la salud (Semanas 1–4)",
ylab = "Probabilidad",
xlab = "Semana",
ylim = c(0, max(hi_1_4,1))
)
legend("topright",
legend = c("Real", "Modelo uniforme"),
fill = c("lightgreen", "gray"),
cex = 0.8)
# Tests de bondad
# Test Pearson:La correlación de Pearson no puede calcularse debido a que el modelo uniforme presenta varianza nula.
# Test Chi-Cuadrado :
# Frecuencias observadas y esperadas
Fo <- hi_1_4
Fe <- P_uniforme
# Estadístico Chi-cuadrado
x2 <- sum(((Fo - Fe)^2) / Fe)
x2
## [1] 0.02496267
# Valor crítico
vc <- qchisq(0.95, length(Fo) - 1)
vc
## [1] 7.814728
x2 < vc
## [1] TRUE