# Carga de datos
setwd("C:/Users/LENOVO/OneDrive/Escritorio/ESTADISTICA")
datos <- read.csv("china_water_pollution_data.csv")
# Variable cualitativa
Estacion <- datos$Monitoring_Station
# Tabla de frecuencia
TDF_Estacion <- data.frame(table(Estacion))
# Renombrar columnas correctamente
colnames(TDF_Estacion) <- c("Estacion", "ni")
# Frecuencia relativa
TDF_Estacion$hi <- (TDF_Estacion$ni / sum(TDF_Estacion$ni)) * 100
# Fila de sumatoria
Sumatoria <- data.frame(
Estacion = "Sumatoria",
ni = sum(TDF_Estacion$ni),
hi = sum(TDF_Estacion$hi)
)
# Unir tabla + sumatoria
TDF_Estacion_suma <- rbind(TDF_Estacion, Sumatoria)
#librerias
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
## Warning: package 'knitr' was built under R version 4.5.2
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.5.2
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(stringr)
## Warning: package 'stringr' was built under R version 4.5.2
# Tabla final
kable(TDF_Estacion_suma, align = 'c',
caption = "Tabla:1 Distribución de Frecuencias de las Estaciones de
Monitoreo del agua en China") %>%
kable_styling(full_width = FALSE, position = "center",
bootstrap_options = c("striped", "hover", "condensed"))
Tabla:1 Distribución de Frecuencias de las Estaciones de Monitoreo del
agua en China
|
Estacion
|
ni
|
hi
|
|
Beijing_Station_1
|
26
|
0.8666667
|
|
Beijing_Station_10
|
29
|
0.9666667
|
|
Beijing_Station_2
|
34
|
1.1333333
|
|
Beijing_Station_3
|
29
|
0.9666667
|
|
Beijing_Station_4
|
27
|
0.9000000
|
|
Beijing_Station_5
|
33
|
1.1000000
|
|
Beijing_Station_6
|
26
|
0.8666667
|
|
Beijing_Station_7
|
42
|
1.4000000
|
|
Beijing_Station_8
|
15
|
0.5000000
|
|
Beijing_Station_9
|
38
|
1.2666667
|
|
Chengdu_Station_1
|
17
|
0.5666667
|
|
Chengdu_Station_10
|
17
|
0.5666667
|
|
Chengdu_Station_2
|
20
|
0.6666667
|
|
Chengdu_Station_3
|
15
|
0.5000000
|
|
Chengdu_Station_4
|
16
|
0.5333333
|
|
Chengdu_Station_5
|
18
|
0.6000000
|
|
Chengdu_Station_6
|
14
|
0.4666667
|
|
Chengdu_Station_7
|
11
|
0.3666667
|
|
Chengdu_Station_8
|
18
|
0.6000000
|
|
Chengdu_Station_9
|
19
|
0.6333333
|
|
Dali_Station_1
|
13
|
0.4333333
|
|
Dali_Station_10
|
12
|
0.4000000
|
|
Dali_Station_2
|
20
|
0.6666667
|
|
Dali_Station_3
|
14
|
0.4666667
|
|
Dali_Station_4
|
12
|
0.4000000
|
|
Dali_Station_5
|
11
|
0.3666667
|
|
Dali_Station_6
|
15
|
0.5000000
|
|
Dali_Station_7
|
20
|
0.6666667
|
|
Dali_Station_8
|
14
|
0.4666667
|
|
Dali_Station_9
|
13
|
0.4333333
|
|
Guangzhou_Station_1
|
20
|
0.6666667
|
|
Guangzhou_Station_10
|
11
|
0.3666667
|
|
Guangzhou_Station_2
|
13
|
0.4333333
|
|
Guangzhou_Station_3
|
13
|
0.4333333
|
|
Guangzhou_Station_4
|
12
|
0.4000000
|
|
Guangzhou_Station_5
|
17
|
0.5666667
|
|
Guangzhou_Station_6
|
14
|
0.4666667
|
|
Guangzhou_Station_7
|
19
|
0.6333333
|
|
Guangzhou_Station_8
|
17
|
0.5666667
|
|
Guangzhou_Station_9
|
10
|
0.3333333
|
|
Hangzhou_Station_1
|
15
|
0.5000000
|
|
Hangzhou_Station_10
|
12
|
0.4000000
|
|
Hangzhou_Station_2
|
16
|
0.5333333
|
|
Hangzhou_Station_3
|
10
|
0.3333333
|
|
Hangzhou_Station_4
|
14
|
0.4666667
|
|
Hangzhou_Station_5
|
20
|
0.6666667
|
|
Hangzhou_Station_6
|
21
|
0.7000000
|
|
Hangzhou_Station_7
|
13
|
0.4333333
|
|
Hangzhou_Station_8
|
9
|
0.3000000
|
|
Hangzhou_Station_9
|
18
|
0.6000000
|
|
Jinan_Station_1
|
16
|
0.5333333
|
|
Jinan_Station_10
|
21
|
0.7000000
|
|
Jinan_Station_2
|
17
|
0.5666667
|
|
Jinan_Station_3
|
13
|
0.4333333
|
|
Jinan_Station_4
|
25
|
0.8333333
|
|
Jinan_Station_5
|
10
|
0.3333333
|
|
Jinan_Station_6
|
19
|
0.6333333
|
|
Jinan_Station_7
|
16
|
0.5333333
|
|
Jinan_Station_8
|
14
|
0.4666667
|
|
Jinan_Station_9
|
9
|
0.3000000
|
|
Kunming_Station_1
|
20
|
0.6666667
|
|
Kunming_Station_10
|
15
|
0.5000000
|
|
Kunming_Station_2
|
16
|
0.5333333
|
|
Kunming_Station_3
|
12
|
0.4000000
|
|
Kunming_Station_4
|
9
|
0.3000000
|
|
Kunming_Station_5
|
14
|
0.4666667
|
|
Kunming_Station_6
|
19
|
0.6333333
|
|
Kunming_Station_7
|
15
|
0.5000000
|
|
Kunming_Station_8
|
15
|
0.5000000
|
|
Kunming_Station_9
|
17
|
0.5666667
|
|
Luoyang_Station_1
|
18
|
0.6000000
|
|
Luoyang_Station_10
|
11
|
0.3666667
|
|
Luoyang_Station_2
|
12
|
0.4000000
|
|
Luoyang_Station_3
|
12
|
0.4000000
|
|
Luoyang_Station_4
|
11
|
0.3666667
|
|
Luoyang_Station_5
|
10
|
0.3333333
|
|
Luoyang_Station_6
|
19
|
0.6333333
|
|
Luoyang_Station_7
|
13
|
0.4333333
|
|
Luoyang_Station_8
|
13
|
0.4333333
|
|
Luoyang_Station_9
|
19
|
0.6333333
|
|
Mianyang_Station_1
|
15
|
0.5000000
|
|
Mianyang_Station_10
|
9
|
0.3000000
|
|
Mianyang_Station_2
|
10
|
0.3333333
|
|
Mianyang_Station_3
|
16
|
0.5333333
|
|
Mianyang_Station_4
|
18
|
0.6000000
|
|
Mianyang_Station_5
|
21
|
0.7000000
|
|
Mianyang_Station_6
|
18
|
0.6000000
|
|
Mianyang_Station_7
|
20
|
0.6666667
|
|
Mianyang_Station_8
|
10
|
0.3333333
|
|
Mianyang_Station_9
|
9
|
0.3000000
|
|
Nanjing_Station_1
|
8
|
0.2666667
|
|
Nanjing_Station_10
|
25
|
0.8333333
|
|
Nanjing_Station_2
|
11
|
0.3666667
|
|
Nanjing_Station_3
|
17
|
0.5666667
|
|
Nanjing_Station_4
|
20
|
0.6666667
|
|
Nanjing_Station_5
|
18
|
0.6000000
|
|
Nanjing_Station_6
|
14
|
0.4666667
|
|
Nanjing_Station_7
|
16
|
0.5333333
|
|
Nanjing_Station_8
|
12
|
0.4000000
|
|
Nanjing_Station_9
|
12
|
0.4000000
|
|
Ningbo_Station_1
|
16
|
0.5333333
|
|
Ningbo_Station_10
|
12
|
0.4000000
|
|
Ningbo_Station_2
|
14
|
0.4666667
|
|
Ningbo_Station_3
|
13
|
0.4333333
|
|
Ningbo_Station_4
|
18
|
0.6000000
|
|
Ningbo_Station_5
|
12
|
0.4000000
|
|
Ningbo_Station_6
|
19
|
0.6333333
|
|
Ningbo_Station_7
|
14
|
0.4666667
|
|
Ningbo_Station_8
|
20
|
0.6666667
|
|
Ningbo_Station_9
|
18
|
0.6000000
|
|
Qingdao_Station_1
|
15
|
0.5000000
|
|
Qingdao_Station_10
|
15
|
0.5000000
|
|
Qingdao_Station_2
|
13
|
0.4333333
|
|
Qingdao_Station_3
|
13
|
0.4333333
|
|
Qingdao_Station_4
|
9
|
0.3000000
|
|
Qingdao_Station_5
|
18
|
0.6000000
|
|
Qingdao_Station_6
|
15
|
0.5000000
|
|
Qingdao_Station_7
|
18
|
0.6000000
|
|
Qingdao_Station_8
|
13
|
0.4333333
|
|
Qingdao_Station_9
|
11
|
0.3666667
|
|
Shanghai_Station_1
|
34
|
1.1333333
|
|
Shanghai_Station_10
|
28
|
0.9333333
|
|
Shanghai_Station_2
|
39
|
1.3000000
|
|
Shanghai_Station_3
|
29
|
0.9666667
|
|
Shanghai_Station_4
|
39
|
1.3000000
|
|
Shanghai_Station_5
|
30
|
1.0000000
|
|
Shanghai_Station_6
|
28
|
0.9333333
|
|
Shanghai_Station_7
|
33
|
1.1000000
|
|
Shanghai_Station_8
|
26
|
0.8666667
|
|
Shanghai_Station_9
|
26
|
0.8666667
|
|
Shenzhen_Station_1
|
20
|
0.6666667
|
|
Shenzhen_Station_10
|
10
|
0.3333333
|
|
Shenzhen_Station_2
|
13
|
0.4333333
|
|
Shenzhen_Station_3
|
16
|
0.5333333
|
|
Shenzhen_Station_4
|
21
|
0.7000000
|
|
Shenzhen_Station_5
|
18
|
0.6000000
|
|
Shenzhen_Station_6
|
9
|
0.3000000
|
|
Shenzhen_Station_7
|
15
|
0.5000000
|
|
Shenzhen_Station_8
|
13
|
0.4333333
|
|
Shenzhen_Station_9
|
20
|
0.6666667
|
|
Suzhou_Station_1
|
11
|
0.3666667
|
|
Suzhou_Station_10
|
12
|
0.4000000
|
|
Suzhou_Station_2
|
12
|
0.4000000
|
|
Suzhou_Station_3
|
17
|
0.5666667
|
|
Suzhou_Station_4
|
11
|
0.3666667
|
|
Suzhou_Station_5
|
9
|
0.3000000
|
|
Suzhou_Station_6
|
15
|
0.5000000
|
|
Suzhou_Station_7
|
21
|
0.7000000
|
|
Suzhou_Station_8
|
16
|
0.5333333
|
|
Suzhou_Station_9
|
16
|
0.5333333
|
|
Wuhan_Station_1
|
16
|
0.5333333
|
|
Wuhan_Station_10
|
18
|
0.6000000
|
|
Wuhan_Station_2
|
13
|
0.4333333
|
|
Wuhan_Station_3
|
16
|
0.5333333
|
|
Wuhan_Station_4
|
15
|
0.5000000
|
|
Wuhan_Station_5
|
11
|
0.3666667
|
|
Wuhan_Station_6
|
19
|
0.6333333
|
|
Wuhan_Station_7
|
18
|
0.6000000
|
|
Wuhan_Station_8
|
11
|
0.3666667
|
|
Wuhan_Station_9
|
17
|
0.5666667
|
|
Yichang_Station_1
|
14
|
0.4666667
|
|
Yichang_Station_10
|
8
|
0.2666667
|
|
Yichang_Station_2
|
16
|
0.5333333
|
|
Yichang_Station_3
|
11
|
0.3666667
|
|
Yichang_Station_4
|
14
|
0.4666667
|
|
Yichang_Station_5
|
17
|
0.5666667
|
|
Yichang_Station_6
|
19
|
0.6333333
|
|
Yichang_Station_7
|
19
|
0.6333333
|
|
Yichang_Station_8
|
5
|
0.1666667
|
|
Yichang_Station_9
|
15
|
0.5000000
|
|
Zhengzhou_Station_1
|
13
|
0.4333333
|
|
Zhengzhou_Station_10
|
17
|
0.5666667
|
|
Zhengzhou_Station_2
|
19
|
0.6333333
|
|
Zhengzhou_Station_3
|
17
|
0.5666667
|
|
Zhengzhou_Station_4
|
20
|
0.6666667
|
|
Zhengzhou_Station_5
|
12
|
0.4000000
|
|
Zhengzhou_Station_6
|
12
|
0.4000000
|
|
Zhengzhou_Station_7
|
17
|
0.5666667
|
|
Zhengzhou_Station_8
|
15
|
0.5000000
|
|
Zhengzhou_Station_9
|
12
|
0.4000000
|
|
Sumatoria
|
3000
|
100.0000000
|
# Debido al gran número de Estaciones de Monitoreo, se realizó una agrupación por regiones
#Extraer ciudad automáticamente
datos <- datos %>%
mutate(
Ciudad = str_extract(Monitoring_Station, "^[A-Za-z]+")
)
#Crear variable REGION según ciudad
datos <- datos %>%
mutate(
Region = case_when(
Ciudad %in% c("Beijing", "Jinan", "Qingdao") ~ "Norte",
Ciudad %in% c("Guangzhou", "Shenzhen", "Dali", "Kunming") ~ "Sur",
Ciudad %in% c("Wuhan", "Zhengzhou", "Luoyang", "Yichang", "Mianyang") ~ "Centro",
Ciudad %in% c("Shanghai", "Suzhou", "Hangzhou", "Ningbo", "Nanjing") ~ "Este",
Ciudad %in% c("Chengdu") ~ "Oeste",
TRUE ~ "Otra"
)
)
#Tabla de frecuencias por región
TDF_Region <- datos %>%
count(Region, name = "ni") %>%
mutate(
hi = round((ni / sum(ni)) * 100, 2)
)
# Enumeración
TDF_Region$N <- 1:nrow(TDF_Region)
TDF_Region <- TDF_Region[, c("N", "Region", "ni", "hi")]
#Fila de sumatoria
Sumatoria <- data.frame(
N = "",
Region = "Sumatoria",
ni = sum(TDF_Region$ni),
hi = sum(TDF_Region$hi)
)
TDF_Region_suma <- rbind(TDF_Region, Sumatoria)
# Tabla final
kable(
TDF_Region_suma,
align = "c",
caption = "Tabla Nº 4:Distribución de frecuencias de las estaciones de
monitoreo por región en el estudio de contaminación del agua en China,
año 2023"
) |>
kable_styling(
full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed"),
position = "center"
)
Tabla Nº 4:Distribución de frecuencias de las estaciones de monitoreo
por región en el estudio de contaminación del agua en China, año 2023
|
N
|
Region
|
ni
|
hi
|
|
1
|
Centro
|
730
|
24.33
|
|
2
|
Este
|
909
|
30.30
|
|
3
|
Norte
|
599
|
19.97
|
|
4
|
Oeste
|
165
|
5.50
|
|
5
|
Sur
|
597
|
19.90
|
|
|
Sumatoria
|
3000
|
100.00
|
# Crear vector de frecuencias
ni <- TDF_Region$ni
#Gráficas
# Gráfica de barras local
barplot(
ni,
main = "Gráfica N°1: Distribución de las estaciones de
monitoreo por región en el estudio de contaminación del agua en China,
año 2023",
xlab = "Región",
ylab = "Cantidad",
col = "skyblue",
ylim = c(0, 1000),
names.arg = TDF_Region$Region,
las = 2
)

# Crear vector de frecuencias relativas
hi <- TDF_Region$hi
# Gráfica de barras porcentual local
barplot(
hi,
main = "Gráfica N°2: Distribución Distribución de frecuencias de las
estaciones de monitoreo por región en el estudio de contaminación del agua en
China, año 2023",
xlab = "Región",
ylab = "Porcentaje",
col = "green",
ylim = c(0, 30),
names.arg = TDF_Region$Region,
las = 2)

# Diagrama de barras global
barplot(
ni,
main = "Gráfica N°3: Distribución de frecuencias de las estaciones de
monitoreo por región en el estudio de contaminación del agua en China,
año 2023",
xlab = "Fuentes de Agua",
ylab = "Cantidad",
col = "pink",
ylim = c(0, 3000),
names.arg = TDF_Region$Region,
las = 2)

# Gráfica de barras porcentual global
barplot(
hi,
main = "Gráfica N°4: Distribución de frecuencias de las estaciones de
monitoreo por región en el estudio de contaminación del agua en China, 2023",
xlab = "Región",
ylab = "Porcentaje",
col = "green",
ylim = c(0, 100),
names.arg = TDF_Region$Region,
las = 2)

# Colores para 5 regiones
colores <- rev(heat.colors(length(hi)))
# Diagrama circular
pie(
hi,
main = "Gráfica N°5: Distribución de frecuencias de las estaciones de
monitoreo por región en el estudio de contaminación del agua en China, 2023",
radius = 1,
labels = paste0(hi, "%"),
col = colores,
cex = 1,
cex.main = 1
)
# Leyenda
legend(
"bottomright",
legend = TDF_Region$Region,
fill = colores,
cex = 0.9,
title = "Regiones"
)
