library(dplyr) library(readxl) library(forcats) library(ggplot2) library(scales) library(plotly) library(countrycode) library(rmarkdown)
TB <- read_excel(“Documents/Maestria UNAM/Metodos /TB.xlsx”) View(TB)
str(TB)
summary(TB)
names (TB)
head(TB)
head(TB, 10)
tail(TB)
ncol(TB)
nrow(TB)
unique(TB$Country)
length(unique(TB$Country))
table(TB$Country)
summary(TB$Deaths)
summary(TB$Cases)
summary(TB$Cases HIV)
Mexico_2024 <- TB %>% filter(Country == “Mexico”, Year == 2024)
Mexico_2024
population_mexico_2024 <- TB %>% filter(Country == “Mexico”,
Year == 2024) %>% select(Estimated population)
population_mexico_2024
cases_mexico_2024 <- TB %>% filter(Country == “Mexico”, Year ==
2024) %>% select(Cases)
cases_mexico_2024
deaths_mexico_2024 <- TB %>% filter(Country == “Mexico”, Year
== 2024) %>% select(Deaths)
deaths_mexico_2024
top10_2024 <- TB %>% filter(Year == 2024) %>%
group_by(Country) %>%
summarize(total_cases = sum(Cases, na.rm = TRUE)) %>%
arrange(desc(total_cases)) %>%
slice(1:10)
top10_2024
ggplot(top10_2024, aes(x = reorder(Country, total_cases), y = total_cases)) + geom_bar(stat = “identity”) + coord_flip() + labs(title = “Top 10 countries by number of cases 2024”, x = “Country”, y = “Cases”)
bottom10_2024 <- TB %>% filter(Year == 2024) %>%
group_by(Country) %>%
summarize(total_cases = sum(Cases, na.rm = TRUE)) %>%
arrange(total_cases) %>%
slice(1:10)
bottom10_2024
ggplot(bottom10_2024, aes(x = reorder(Country, total_cases), y = total_cases)) + geom_bar(stat = “identity”) + coord_flip() + labs(title = “Top 10 countries with the fewest cases”, x = “Country”, y = “Cases”)
top10_incidence_2024 <- TB %>% filter(Year == 2024)
%>%
group_by(Country) %>%
summarize(total_incidence = mean(Incidence, na.rm = TRUE)) %>%
arrange(desc(total_incidence)) %>%
slice(1:10)
top10_incidence_2024
ggplot(top10_incidence_2024, aes(x = reorder(Country, total_incidence), y = total_incidence)) + geom_bar(stat = “identity”) + coord_flip() + labs(title = “Top 10 incidence”, x = “Country”, y = “Incidence per 100,000 inhabitants”)
top10_incidence_2024 <- TB %>% filter(Year == 2024) %>% group_by(Country) %>% summarize(total_incidence = mean(Incidence, na.rm = TRUE)) %>% arrange(desc(total_incidence)) %>% slice(1:10)
ggplot(top10_incidence_2024, aes(x = reorder(Country, total_incidence), y = total_incidence, fill = total_incidence)) + geom_col(width = 0.7, alpha = 0.9) + coord_flip() + geom_text(aes(label = round(total_incidence, 1)), hjust = -0.2, size = 4.5) + scale_y_continuous(expand = expansion(mult = c(0, 0.15))) + scale_fill_gradient(low = “lightgray”, high = “black”) + labs( title = “Top 10 incidence TB”, subtitle = “Countries”, x = ““, y =”Incidence per 100,000 inhabitants”, fill = “Incidence” ) + theme_minimal(base_size = 14) + theme( plot.title = element_text(face = “bold”, size = 18), plot.subtitle = element_text(size = 13, color = “gray30”), axis.text = element_text(color = “gray10”), panel.grid.minor = element_blank(), panel.grid.major.y = element_blank(), legend.position = “none” )
top10_mortality_2024 <- TB %>% filter(Year == 2024) %>% group_by(Country) %>% summarize(total_mortality = sum(Deaths, na.rm = TRUE)) %>% arrange(desc(total_mortality)) %>% slice(1:10)
top10_mortality_2024
top10_coinfection_2024 <- TB %>% filter(Year == 2024) %>%
group_by(Country) %>% summarize(total_coinfection =
sum(Cases HIV, na.rm = TRUE)) %>%
arrange(desc(total_coinfection)) %>% slice(1:10)
top10_coinfection_2024
ggplot(top10_coinfection_2024, aes(x = reorder(Country, total_coinfection), y = total_coinfection, fill = total_coinfection)) + geom_col(width = 0.7, alpha = 0.9) + coord_flip() + geom_text(aes(label = comma(total_coinfection)), hjust = -0.2, size = 4.5) + scale_y_continuous(expand = expansion(mult = c(0, 0.15))) + scale_fill_gradient(low = “lightgray”, high = “black”) + labs( title = “Top 10 coninfection TB/HIV by country in 2024)”, subtitle = “Coinfection”, x = ““, y =”Coinfección cases TB/VIH”, fill = “Cases” ) + theme_minimal(base_size = 14) + theme( plot.title = element_text(face = “bold”, size = 18), plot.subtitle = element_text(size = 13, color = “gray30”), axis.text = element_text(color = “gray10”), panel.grid.minor = element_blank(), panel.grid.major.y = element_blank(), legend.position = “none” )
coinfection_countries <- top10_coinfection_2024$Country
mortality_coinfection_group <- TB %>% filter(Year == 2024,
Country %in% coinfection_countries) %>% group_by(Country) %>%
summarize(total_mortality = sum(Deaths in HIV/Tb, na.rm =
TRUE)) %>% arrange(desc(total_mortality))
mortality_coinfection_group
data_filtered <- TB %>% filter(Year >= 2000 & Year <= 2025)
top_country <- data_filtered %>% group_by(Country) %>% summarise(total_cases = sum(Cases, na.rm = TRUE)) %>% arrange(desc(total_cases)) %>% slice(1) %>% pull(Country)
top_country
mexico_cases <- data_filtered %>% filter(Country == “Mexico”) %>% pull(Cases)
top_cases <- data_filtered %>% filter(Country == top_country) %>% pull(Cases)
t_test_result <- t.test(mexico_cases, top_cases)
t_test_result
data_filtered <- TB %>% filter(Year >= 2000 & Year <= 2025)
top_2_countries <- data_filtered %>% group_by(Country) %>% summarise(total_cases = sum(Cases, na.rm = TRUE)) %>% arrange(desc(total_cases)) %>% slice(1:2) %>% pull(Country)
top_2_countries
country1_cases <- data_filtered %>% filter(Country == top_2_countries[1]) %>% pull(Cases)
country2_cases <- data_filtered %>% filter(Country == top_2_countries[2]) %>% pull(Cases)
t_test_result <- t.test(country1_cases, country2_cases)
t_test_result
plot_data <- data_filtered %>% filter(Country %in% top_2_countries)
ggplot(plot_data, aes(x = Year, y = Cases, color = Country, group = Country)) +
geom_line(size = 1.8) +
geom_point(size = 3.5, alpha = 0.9) +
geom_text( data = plot_data %>% filter(Year %% 5 == 0), aes(label = comma(Cases)), vjust = -0.6, size = 4 ) +
scale_y_continuous(labels = comma, expand = expansion(mult = c(0.05, 0.15))) +
scale_color_manual(values = c(“#0072B2”, “#D55E00”)) +
labs( title = paste0(“Cases in India vs China from 2000 to 2025”), subtitle = paste(“Cases”, paste(top_2_countries, collapse = ” y “)), x =”Year”, y = “Cases”, color = “Country” ) +
theme_minimal(base_size = 15) + theme( plot.title = element_text(size = 19, face = “bold”, color = “#222222”), plot.subtitle = element_text(size = 14, color = “gray30”), legend.position = “top”, panel.grid.minor = element_blank(), panel.grid.major.x = element_blank(), axis.text = element_text(color = “#333333”), axis.title = element_text(color = “#222222”, face = “bold”) )
plot_data <- data_filtered %>% filter(Country %in% top_2_countries)
interactive_plot <- plot_ly( data = plot_data, x = ~Year, y =
~Cases, color = ~Country, colors = c(“#0072B2”, “#D55E00”), type =
‘scatter’, mode = ‘lines+markers’, marker = list(size = 9), line =
list(width = 4) ) %>% layout( title = list( text = paste0(“📈 Cases
in India vs China from 2000 to 2025
”, “Cases”,
paste(top_2_countries, collapse = ” y “),”“) ), xaxis = list(title
=”Year”), yaxis = list(title = “Cases”, tickformat = “,”), legend =
list(orientation = “h”, y = -0.2) )
interactive_plot
TB\(continent <- countrycode(TB\)Country, origin = “country.name”, destination = “continent”)
unique(TB\(Country[is.na(TB\)continent)])
continent_Cases <- TB %>% filter(Year >= 2000 & Year <= 2025) %>% group_by(continent, Year) %>% summarise(total_Cases = sum(Cases, na.rm = TRUE)) %>% ungroup()
model <- aov(total_Cases ~ continent, data = continent_Cases) summary(model)
TukeyHSD(model)
interactive_box <- plot_ly( continent_Cases, x = ~continent, y = ~total_Cases, type = “box”, color = ~continent, colors = “Set2”, boxmean = TRUE ) %>% layout( title = “Analysis by continent (2000–2025)”, xaxis = list(title = “Continent”), yaxis = list(title = “Cases”, tickformat = “,”), showlegend = FALSE )
interactive_box