knitr::opts_knit$set(root.dir = "~/Desktop/Semestre 4/Ciencia de Datos para la Toma de Decisiones/Reto/Reto_CienciaDatosI")
# install.packages('tidyverse')
library('tidyverse')
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# install.packages('ggalluvial')
library('ggalluvial')
# install.packages('ggplot2')
library('ggplot2')
# install.packages('dplyr')
library('dplyr')
# install.packages('tidygraph')
library('tidygraph')
##
## Attaching package: 'tidygraph'
##
## The following object is masked from 'package:stats':
##
## filter
# install.packages('igraph')
library('igraph')
##
## Attaching package: 'igraph'
##
## The following object is masked from 'package:tidygraph':
##
## groups
##
## The following objects are masked from 'package:lubridate':
##
## %--%, union
##
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
##
## The following objects are masked from 'package:purrr':
##
## compose, simplify
##
## The following object is masked from 'package:tidyr':
##
## crossing
##
## The following object is masked from 'package:tibble':
##
## as_data_frame
##
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
##
## The following object is masked from 'package:base':
##
## union
# install.packages('ggraph')
library('ggraph')
# install.packages('ggrepel')
library('ggrepel')
# install.packages('GGally')
library('GGally')
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
# install.packages('janitor')
library('janitor')
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
# install.packages('FactoMineR')
library('FactoMineR')
# install.packages('factoextra')
library('factoextra')
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# install.packages('readxl')
library('readxl')
# install.packages('p2distance')
library('p2distance')
# install.packages('stratification')
library('stratification')
# install.packages('sf')
library('sf')
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
# install.packages('RColorBrewer')
library('RColorBrewer')
# install.packages('dendextend')
library('dendextend')
##
## ---------------------
## Welcome to dendextend version 1.19.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
##
## Attaching package: 'dendextend'
##
## The following object is masked from 'package:stats':
##
## cutree
# install.packages("viridis")
library('viridis')
## Loading required package: viridisLite
# install.packages(ggdendro)
library('ggdendro')
##
## Attaching package: 'ggdendro'
##
## The following object is masked from 'package:dendextend':
##
## theme_dendro
# install.packages('sysfonts')
library('sysfonts')
# install.packages('showtext')
library('showtext')
## Loading required package: showtextdb
df <- read.csv('data/economy_foreign_trade_nat_2025-05-28T00_31_14.973Z.csv') %>%
clean_names() %>%
select(-hs4) %>%
mutate(hs4_id = as.character(hs4_id -160000))
df_2 <- read.csv('data/economy_foreign_trade_nat_2025-05-28T15_38_42.698Z.csv') %>%
clean_names()
df_3 <- read.csv('data/economy_foreign_trade_ent_2025-05-28T16_19_50.381Z.csv') %>%
clean_names()
df_4 <- read.csv('data/economy_foreign_trade_ent_2025-05-31T08_49_02.497Z.csv') %>%
clean_names() %>%
select(-hs4) %>%
mutate(hs4_id = as.character(hs4_id -160000))
mapa <- st_read('data/ne_10m_admin_0_countries')
## Reading layer `ne_10m_admin_0_countries' from data source
## `/Users/Diego1/Desktop/Semestre 4/Ciencia de Datos para la Toma de Decisiones/Reto/Reto_CienciaDatosI/data/ne_10m_admin_0_countries'
## using driver `ESRI Shapefile'
## Simple feature collection with 258 features and 168 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -180 ymin: -90 xmax: 180 ymax: 83.6341
## Geodetic CRS: WGS 84
mexico <- st_read('data/entidades') %>%
clean_names() %>%
select(-cvegeo, -cve_ent)
## Reading layer `00ent' from data source
## `/Users/Diego1/Desktop/Semestre 4/Ciencia de Datos para la Toma de Decisiones/Reto/Reto_CienciaDatosI/data/entidades'
## using driver `ESRI Shapefile'
## Simple feature collection with 32 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 911292 ymin: 319149.1 xmax: 4082997 ymax: 2349615
## Projected CRS: MEXICO_ITRF_2008_LCC
font_add_google("Montserrat", "Montserrat")
¿Qué oportunidades comerciales están siendo desaprovechadas en la industria mexicana de semiconductores en función de los patrones de exportación actuales?
La industria mexicana de semiconductores presenta oportunidades comerciales no aprovechadas hacia países fuera de sus socios tradicionales, las cuales pueden identificarse mediante análisis jerárquico y visualización geográfica de flujos de exportación.
Para encontrar un grupo de productos que representan una oportunidad, se hace una estratificación jerárquica con ward.D2, que se centra en minimizar la varianza dentro de cada cluster. Se resaltan los productos que se encuentran en el mismo cluster. La lógica es que los productos con volúmen de exportaciones similares presentan relaciones ya existentes pero no fortalecidas, oportunidades para la industria de Semiconductores.
distancias = df %>%
column_to_rownames(var = 'hs4_id') %>%
dist(method = "euclidean")
complete_fit = hclust(distancias, method = "ward.D2") # Se crea una estratificación
# jerárquica con método ward.D2 (que busca minimizar las varianzas dentro de los
# clusters)
dendograma = as.dendrogram(complete_fit) # Se crea el dendrograma
clusters = cutree(complete_fit, k = 5) %>% # Se establecen cinco
# clusters a partir de los datos
as_tibble(rownames = 'hs4_id') %>%
rename(cluster = value) %>%
mutate(cluster = factor(cluster))
labels_in_cluster <- clusters %>%
filter(cluster == 1) %>%
pull(hs4_id)
all_labels_colors <- rep("#C69C6D", length(labels(dendograma)))
names(all_labels_colors) <- labels(dendograma)
all_labels_colors[labels_in_cluster] <- "#B21B40"
all_labels_colors["8541"] <- "#5E1224"
dendograma_coloreado <- dendograma %>%
set("labels_colors", all_labels_colors[labels(dendograma)]) %>%
set("labels", ifelse(labels(dendograma) == "8541",
expression(bold("8541")),
labels(dendograma)))
colores_para_bordes_rect <- c("#C69C6D", "#C69C6D", "#C69C6D", "#C69C6D", "#B21B40")
plot( # Se grafica el dendograma con las configuraciones visuales
x = dendograma_coloreado,
family = 'Montserrat',
main = 'Aglomeración por volumen de exportación totales',
ylab = 'Distancia entre cluster',
cex.axis = 0.8,
cex.lab = 0.9,
cex.main = 1,
mar = c(12, 4, 4, 2) + 0.1,
sub = 'Elaboración propia con datos de Data México'
)
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in axis(side = side, at = at, labels = labels, ...): no font could be
## found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
## Warning in title(...): no font could be found for family "Montserrat"
rect.hclust(complete_fit, k = 5, border = colores_para_bordes_rect) # Se
# grafican los clusters con las configuraciones visuales
Bajo la misma lógica, se buscan identificar los destinos de las rutas que representan una oportunidad para los semiconductores. Para esto, se seleccionan los productos en el cluster del dendograma pasado y se observa el volúmen de exportaciones delcluster entero, para ver qué mercados convendría priorizar. Se eliminan E.E.U.U.y China para resaltar oportunidades externas a posibles tensiones con el T-MEC.
mapa_mundial_limpio <- mapa %>% # Se crea el mapa con datos de los productos
# seleccionados en el clustering
clean_names() %>%
select(iso_a3, geometry) %>%
mutate(iso_a3 = tolower(iso_a3))
robinson_proj_string <- "+proj=robin +lon_0=0 +x_0=0 +y_0=0 +ellps=WGS84 +datum=WGS84 +units=m +no_defs"
mapa_mundial_proyectado <- mapa_mundial_limpio %>%
st_transform(crs = robinson_proj_string)
mapa_mundial_disuelto <- mapa_mundial_proyectado %>%
mutate(geometry = st_make_valid(geometry)) %>%
group_by(iso_a3) %>%
summarise(geometry = st_union(geometry), .groups = 'drop') %>%
ungroup()
mapa_final <- mapa_mundial_disuelto %>%
left_join(df_2, by = c("iso_a3" = "country_id")) %>%
mutate(
trade_value = ifelse(is.na(trade_value), 0, trade_value),
trade_value = case_when(
iso_a3 == "mex" ~ NA,
iso_a3 == "chn" ~ NA,
iso_a3 == "usa" ~NA,
TRUE ~ trade_value
)
)
showtext_auto()
ggplot(data = mapa_final) + # Se grafica el mapa con las configuraciones visuales
geom_sf(aes(fill = log(trade_value)), color = "gray40") +
scale_fill_gradientn(
colors = c('antiquewhite', '#E6C79C', '#23675A'),
na.value = "white",
name = "Valor de Exportaciones \n(Escala logarítmica)"
) +
labs(
caption = 'Elaboración propia con datos de Data México y geometría de Natural Earth Data'
) +
theme_minimal(base_family = "Montserrat") +
theme(
plot.background = element_rect(fill = "antiquewhite", color = NA),
panel.background = element_rect(fill = "antiquewhite", color = NA),
plot.caption = element_text(face = "italic", hjust = 1),
legend.text = element_text(face = "bold"),
legend.title = element_text(face = "bold")
) +
guides(fill = guide_colorbar(
frame.colour = "black",
frame.linewidth = 0.1
))
Bajo la misma lógica, se buscan identificar los destinos de las rutas que representan una oportunidad para los semiconductores. Para esto, se seleccionan los productos en el cluster del dendograma pasado y se observa el volúmen de exportaciones del cluster entero, para ver qué mercados convendría priorizar. Se eliminan E.E.U.U.y China para resaltar oportunidades externas a posibles tensiones con el T-MEC.
total_estado <- df_3 %>%
group_by(state) %>%
summarise(total_trade_value_estado = sum(trade_value, na.rm = TRUE)) %>%
ungroup()
map_mex <- mexico %>% # Se crea el mapa con datos de los productos seleccionados
# en el clustering
left_join(total_estado, by = c('nomgeo' = 'state'))
setwd("~/Desktop/Semestre 4/Ciencia de Datos para la Toma de Decisiones/Reto/Reto_CienciaDatosI")
ggplot(data = map_mex) + # Se grafica el mapa con las configuraciones visuales
geom_sf(aes(fill = log(total_trade_value_estado)), color = "gray40", lwd = 0.2) +
scale_fill_gradientn(
colors = c('antiquewhite', '#E6C79C', '#23675A'),
na.value = "antiquewhite",
name = "Valor de Exportaciones\n(Escala logarítmica)"
) +
theme_minimal(base_family = "Montserrat") +
theme(
plot.background = element_rect(fill = "antiquewhite", color = NA),
panel.background = element_rect(fill = "antiquewhite", color = NA),
plot.caption = element_text(face = "italic", hjust = 1),
axis.text = element_text(face = "bold"),
legend.text = element_text(face = "bold"),
legend.title = element_text(face = "bold")
) +
labs(
caption = 'Elaboración propia con datos de Data México y geometría de INEGI'
) +
guides(fill = guide_colorbar(
frame.colour = "black",
frame.linewidth = 0.1
))
Para identificar los flujos comerciales de la industria, notando las tendencias de el volúmen de exportaciones de los productos, se seleccionan los cinco productos más exportados. De aquí, se seleccionan los cinco estados con mayor volúmen de exportaciones de la sección HS85, así como los cinco países que más importan a México. Se dejan los demás elementos como “Otros”.
top_5_states_names <- df_4 %>% # Se toman los cinco estados con mayor volúmen de
# exportaciones
group_by(state) %>%
summarise(total_trade_value = sum(trade_value, na.rm = TRUE)) %>%
ungroup() %>%
arrange(desc(total_trade_value)) %>%
head(5) %>%
pull(state)
top_5_hs4_ids <- df_4 %>% # Se toman los cinco productos con mayor volúmen de
# exportaciones
group_by(hs4_id) %>%
summarise(total_trade_value = sum(trade_value, na.rm = TRUE)) %>%
ungroup() %>%
arrange(desc(total_trade_value)) %>%
head(5) %>%
pull(hs4_id)
top_5_countries <- df_4 %>% # # Se toman los cinco países con mayor volúmen de
# importaciones a México
group_by(country) %>%
summarise(total_trade_value = sum(trade_value, na.rm = TRUE)) %>%
ungroup() %>%
arrange(desc(total_trade_value)) %>%
head(5) %>%
pull(country)
df_clasificado_base <- df_4 %>% # Se juntan todas las categorías pasadas y se deja
# como "Otros" a todo registro no incluido en ellas
mutate(
state_category = ifelse(
state %in% top_5_states_names,
as.character(state),
"Otras Entidades \nFederativas"
),
hs4_category = ifelse(
hs4_id %in% top_5_hs4_ids,
as.character(hs4_id),
"Otros Subsectores \nde HS85"
),
country_category = ifelse(
country %in% top_5_countries,
as.character(country),
"Otros Países"
)
)
state_order <- df_clasificado_base %>%
group_by(state_category) %>%
summarise(total_val = sum(trade_value, na.rm = TRUE)) %>%
arrange(total_val) %>%
pull(state_category)
hs4_order <- df_clasificado_base %>%
group_by(hs4_category) %>%
summarise(total_val = sum(trade_value, na.rm = TRUE)) %>%
arrange(total_val) %>%
pull(hs4_category)
country_order <- df_clasificado_base %>%
group_by(country_category) %>%
summarise(total_val = sum(trade_value, na.rm = TRUE)) %>%
arrange(total_val) %>%
pull(country_category)
df_alluvial_data <- df_clasificado_base %>%
group_by(state_category, hs4_category, country_category) %>%
summarise(trade_value_flow = sum(trade_value, na.rm = TRUE)) %>%
ungroup() %>%
mutate(
state_category = factor(state_category, levels = state_order),
hs4_category = factor(hs4_category, levels = hs4_order),
country_category = factor(country_category, levels = country_order),
trade_value_flow = (trade_value_flow / sum(trade_value_flow, na.rm = TRUE)) * 100
) %>%
group_by(state_category, hs4_category, country_category)
## `summarise()` has grouped output by 'state_category', 'hs4_category'. You can
## override using the `.groups` argument.
my_all_strata_colors <- c(
# Estados
"Ciudad de México" = "gray20",
"Tamaulipas" = "gray30",
"Jalisco" = "gray40",
"Baja California" = "gray50",
"Chihuahua" = "gray60",
"Otras Entidades \nFederativas" = "gray70",
# Códigos HS4
"8536" = "#E6C79C",
"8544" = "#C69C6D",
"8528" = "#B21B40",
"8542" = "#5E1224",
"8517" = "#23675A",
"Otros Subsectores \nde HS85" = "#00332A",
# Países
"Japón" = "gray20",
"Corea del Sur" = "gray30",
"Malasia" = "gray40",
"China" = "gray50",
"Otros Países" = "gray60",
"Estados Unidos" = "gray70"
)
ggplot(data = df_alluvial_data,
aes(axis1 = state_category,
axis2 = hs4_category,
axis3 = country_category,
y = trade_value_flow)) +
geom_alluvium(aes(fill = hs4_category),
curve_type = "quintic",
alpha = 0.70
) +
geom_stratum(aes(fill = after_stat(stratum)), color = NA) +
geom_text(stat = "stratum",
aes(label = after_stat(stratum)), color = 'white',
fontface = 'bold',
family = 'Montserrat',
size = 2
) +
theme_minimal() +
scale_fill_manual(values = my_all_strata_colors) +
scale_x_discrete(limits = c("Entidades Federativas", "Subsector", "Países"),
expand = c(0.15, 0.05)) +
labs(
title = '\nFlujo Comercial por Entidad Federativa',
y = 'Porcentaje del flujo comercial agregado del sector HS85',
caption = 'Elaboración propia con datos de Data México'
) +
theme_minimal(base_family = "Montserrat") +
theme(
plot.background = element_rect(fill = "white", color = NA),
panel.background = element_rect(fill = "white", color = NA),
plot.title = element_text(face = "bold", hjust = 0.5,
family = 'Montserrat'),
legend.position = 'none',
axis.text = element_text(face = "bold"),
axis.title.y = element_text(face = "bold"),
axis.title.x = element_blank(),
plot.caption = element_text(face = "italic", hjust = 1)
)
ggsave("graf_alluvial.jpg", width = 14, height = 8, dpi = 300)