# Load Packages  ----------------------------------------------------------

pacman::p_load(
  rio,          # for importing data
  RODBC,        # loads package into the R environment
  here,         # for locating files
  janitor,      # for data cleaning
  skimr,        # for reviewing the data
  lubridate,    # for date cleaning
  epikit,       # creating age categories
  gtsummary,    # creating tables
  knitr,        # neat tables
  scales,       # percents in tables
  flextable,    # for making pretty tables
  ggExtra,      # add additional plotting functions
  gghighlight,  # highlight proportions of the plots
  RODBC,        #
  pacman,
  dplyr,
  visdat,
  naniar,
  kableExtra,
  shiny,
  visdat,       #
  tidyverse,     # for data management and visualization
  pixiedust,
  kableExtra,
  formattable, 
  kableExtra,
  viridis,
  sf
)
DRIVERINFO <-  "Driver={Microsoft Access Driver (*.mdb, *.accdb)};"
MDBPATH <- "C:/eFHSIS/Database/eFHSIS_be.mdb"
PATH <- paste0(DRIVERINFO, "DBQ=", MDBPATH)

## Establish connection in the database
region_fhsis_db <- odbcConnectAccess2007(PATH)

# Fetch data
morb_2024 <- sqlFetch(region_fhsis_db, "M2 BHS")

## Importing icd and history data
icd10_code <- import(here("data", "ICD10_Reference.xlsx"))
morb_2023 <- import(here("data", "MORB_2023.xlsx"))
population <- import(here("data", "POPULATION.xlsx"))
# Function to pivot and clean the data
pivot_and_clean <- function(data) {
  data %>%
    pivot_longer(
      cols = "UNDER1_M":"29DAYS_11MOS_F",
      names_to = "AGE_GROUP",
      values_to = "CASES"
    )
}

# Function to filter out rows with 0 cases
filter_zero_cases <- function(data) {
  data %>%
    filter_if(is.numeric, all_vars(. != 0))
}

# List of original DataFrames
morb_dataframes <- list(
  morb_2024,
  morb_2023
)

# Apply pivot function to each DataFrame and store results
pivoted_dataframes <- lapply(morb_dataframes, pivot_and_clean)

# Apply filter function to each pivoted DataFrame and store results
cleaned_dataframes <- lapply(pivoted_dataframes, filter_zero_cases)

# Naming the cleaned DataFrames
names(cleaned_dataframes) <- c("morb_2024_cleaned", "morb_2023_cleaned")

# Output cleaned DataFrames into separate variables
morb_2024_cleaned <- cleaned_dataframes[[1]]
morb_2023_cleaned <- cleaned_dataframes[[2]]
# Function to clean morbidity data
clean_morbidity_data <- function(data) {
  data %>%
    mutate(
      SEX = recode(substr(AGE_GROUP, nchar(AGE_GROUP), nchar(AGE_GROUP)), "M" = "MALE", "F" = "FEMALE"),
      MONTH = format(DATE, "%B"),
      YEAR = format(DATE, "%Y"),
      
      # Extract age group
      AGE_GROUP_CL = gsub("_", "-", substr(AGE_GROUP, 1, nchar(AGE_GROUP) - 2)),
      
      # Create age categories using case_when
      AGE_CATEGORIES = case_when(
        AGE_GROUP_CL %in% c("UNDER1", "1-4", "5-9", "10-14", "0-6DAYS", "7-28DAYS", "29DAYS-11MOS") ~ "CHILDREN",
        AGE_GROUP_CL %in% c("10_14", "15-19") ~ "ADOLESCENT",
        AGE_GROUP_CL %in% c("20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59") ~ "ADULT",
        AGE_GROUP_CL %in% c("60-64", "65ABOVE", "65-69", "70ABOVE") ~ "SENIOR",
        TRUE ~ "NO AGE"
      )
    )
}

cleaned_dataframes <- list(
  morb_2024_cleaned,
  morb_2023_cleaned
)

# Apply the cleaning function to each DataFrame and store results
final_cleaned_dataframes <- lapply(cleaned_dataframes, clean_morbidity_data)

# Naming the final cleaned DataFrames
names(final_cleaned_dataframes) <- c("morb_2024_final", "morb_2023_final")

# Function to add ICD10_CODE to each dataframe in the list
final_cleaned_dataframes <- lapply(final_cleaned_dataframes, function(df) {
  df %>%
    mutate(ICD_CODE = substr(DISEASE, 1, 3))
})


# Output cleaned DataFrames into separate variables
morb_2024_final <- final_cleaned_dataframes[[1]]
morb_2023_final <- final_cleaned_dataframes[[2]]
icd10_code <- icd10_code %>%
  mutate(extracted_category = str_extract(CATEGORY_NAME, "\\(([^)]+)\\)")) %>%
  mutate(extracted_category = str_replace_all(extracted_category, "[()]", ""))


icd10_code_unique <- icd10_code %>% 
  distinct(ICD_CODE, .keep_all = TRUE)
morbidity_all <- bind_rows(final_cleaned_dataframes)

# Putting ICD Code in the morbidity database
morbidity_all_icdcode <- morbidity_all %>%
  left_join(icd10_code_unique %>%
              select(ICD_CODE, ICD10_DESC, ICD10_CAT, ICD_CAT_CODE, CATEGORY_NAME, extracted_category), 
            by = "ICD_CODE")

Rationale


In recent years, the morbidity rate in CALABARZON, has shown a noticeable increase. This trend has raised concerns about the public health status and the effectiveness of the healthcare infrastructure.

Understanding the factors that contribute to the rising morbidity is crucial for health policymakers and professionals. By analyzing morbidity data for the years 2023 and 2024, we aim to identify key patterns and insights that may inform better healthcare strategies.

# Filter population data for 2024 and get the total population for the region
total_population_2024 <- population %>%
  filter(Year == "2024") %>%
  summarise(total_population = sum(Total, na.rm = TRUE))

# Get the total morbidity cases for 2024 in the region
total_cases_2024 <- morbidity_all_icdcode %>%
  filter(YEAR == "2024") %>%
  summarise(total_cases = sum(CASES, na.rm = TRUE))

# Format total cases without scientific notation
total_cases_2024_formatted <- format(total_cases_2024$total_cases, big.mark = ",", scientific = FALSE)

# Compute the percentage of morbidity cases over the total population
percentage_cases <- comma(round((total_cases_2024$total_cases / total_population_2024$total_population) * 100000, 2))
# Calculate morbidity cases by AGE_CATEGORIES for 2024
morbidity_by_age <- morbidity_all_icdcode %>%
  filter(YEAR == "2024") %>%
  group_by(AGE_CATEGORIES) %>%
  summarise(total_cases = sum(CASES, na.rm = TRUE)) %>%
  ungroup()

# Get the total cases for 2024
total_cases_2024 <- sum(morbidity_by_age$total_cases)

# Calculate percentage distribution for each AGE_CATEGORIES
morbidity_by_age <- morbidity_by_age %>%
  mutate(percentage = (total_cases / total_cases_2024) * 100)

# Format numbers for output with comma and rounding
morbidity_by_age$total_cases <- scales::comma(morbidity_by_age$total_cases)
morbidity_by_age$percentage <- round(morbidity_by_age$percentage, 2)

As of December 2024, the total number of morbidity cases in CALABARZON was 1,532,640, where the morbidity rate was 9,190.79 per 100,000 of the population. In comparison to 2023, the total number of cases in 2024 represents a 17.79% increase (see Table 1).


Table 1. Morbidity Rate in CALABARZON, 2024

morbidity_counts <- morbidity_all_icdcode %>%
  group_by(PROV_CODE, YEAR) %>%
  summarise(count = n(), .groups = 'drop') %>%
  filter(YEAR %in% c("2023", "2024")) %>%
  pivot_wider(names_from = YEAR, values_from = count, values_fill = list(count = 0)) %>%
  arrange(PROV_CODE)

# Add a total row at the bottom
total_row <- morbidity_counts %>%
  summarise(PROV_CODE = "TOTAL", 
            `2023` = sum(`2023`), 
            `2024` = sum(`2024`))

# Combine the original table with the total row
morbidity_counts_with_total <- bind_rows(morbidity_counts, total_row)

# Rename the 'PROV_CODE' column to 'Province'
morbidity_counts_with_total <- morbidity_counts_with_total %>%
  rename(PROVINCE = PROV_CODE)

# Ensure the '2023' and '2024' columns are numeric for percentage calculation
morbidity_counts_with_total$`2023` <- as.numeric(morbidity_counts_with_total$`2023`)
morbidity_counts_with_total$`2024` <- as.numeric(morbidity_counts_with_total$`2024`)

# Calculate the percentage change between 2023 and 2024 for each province (before formatting)
morbidity_counts_with_total <- morbidity_counts_with_total %>%
  mutate(percentage_change = ifelse(`2023` != 0, 
                                   (`2024` - `2023`) / `2023` * 100, 
                                   NA))

# Format the columns with commas after calculating percentage change
morbidity_counts_with_total$`2023` <- scales::comma(morbidity_counts_with_total$`2023`)
morbidity_counts_with_total$`2024` <- scales::comma(morbidity_counts_with_total$`2024`)
morbidity_counts_with_total$percentage_change <- scales::comma(morbidity_counts_with_total$percentage_change, accuracy = 0.01)

# Append '%' sign to percentage_change values
morbidity_counts_with_total$percentage_change <- paste0(morbidity_counts_with_total$percentage_change, "%")

# Rename the percentage_change column to PERCENTAGE CHANGE
colnames(morbidity_counts_with_total)[which(names(morbidity_counts_with_total) == "percentage_change")] <- "PERCENTAGE CHANGE"

# Create and format the kable table
kable(morbidity_counts_with_total, 
      format = "html", 
      caption = "",
      digits = 0,
      align = c("l", "c", "c", "c")) %>%  # Align columns: left for the first (PROVINCE), center for the rest
  kable_styling(full_width = F, position = "center", 
                bootstrap_options = c("striped", "hover", "responsive", "bordered")) %>%
  column_spec(1, bold = FALSE, color = "black", width = "5cm") %>%   # Province column style
  column_spec(2, color = "black", width = "3cm") %>%  # Year 2023 column style
  column_spec(3, color = "black", width = "3cm" ) %>%  # Year 2024 column style
  row_spec(nrow(morbidity_counts_with_total), bold = TRUE, background = "#dcdcdc") %>%   # Bold the last row (total row)
  row_spec(0, bold = TRUE, color = "white", background = "black", align = "center")  # Center the column headers
PROVINCE 2023 2024 PERCENTAGE CHANGE
BATANGAS 134,827 169,314 25.58%
CAVITE 159,075 190,460 19.73%
LAGUNA 178,067 201,815 13.34%
QUEZON 188,665 228,365 21.04%
RIZAL 106,176 113,248 6.66%
TOTAL 766,810 903,202 17.79%


Over the course of the months, January and October have recorded the highest peaks in morbidity cases. Additionally, when comparing the reported cases for the year 2023, January and August exhibit the highest peaks (see Figure 1).

Figure 1. Distribution of Morbidity Cases in CALABARZON for 2023 and 2024 by Month

# Summarize data for 2023 and 2024
morbidity_by_year_month <- morbidity_all_icdcode %>%
  filter(YEAR %in% c("2023", "2024")) %>%
  group_by(YEAR, MONTH) %>%
  summarise(total_cases = sum(CASES, na.rm = TRUE), .groups = 'drop')

# Create the line plot with adjusted width and lighter grid lines
p <- ggplot(morbidity_by_year_month, aes(x = MONTH, y = total_cases, color = YEAR, group = YEAR)) +
  geom_line(size = 1.2) + 
  geom_point(size = 3) +  # Adding points on the line
  theme_minimal() +
  labs(title = "",
       x = "Month",
       y = "Total Cases",
       color = "Year") +
  scale_color_viridis_d() +  # Using viridis color scale for the lines
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  scale_x_discrete(limits = c("January", "February", "March", "April", "May", 
                              "June", "July", "August", "September", "October", 
                              "November", "December")) +
  scale_y_continuous(labels = scales::comma) +  # Add comma formatting to the y-axis
  theme(
    panel.grid.major = element_line(color = "white", size = 0.5),  # Lighten major grid lines
    panel.grid.minor = element_line(color = "white", size = 0.25), # Lighten minor grid lines
    plot.margin = unit(c(0.1, 0.1, 0.1, 0.1), "cm") # Adjust margins for a better fit
  )

# Save the plot as a PNG file
ggsave("morbidity_cases_trend_2023_2024.png", plot = p, width = 20, height = 15, dpi = 600)

p

# Summarize morbidity cases for 2024 by PROV_CODE
morbidity_2024_summary <- morbidity_all %>%
  filter(YEAR == "2024") %>%
  group_by(PROV_CODE) %>%
  summarise(Total_Cases_2024 = sum(CASES), .groups = 'drop')

# Filter the population data for 2024
population_2024 <- population %>%
  filter(Year == 2024) %>%
  group_by(Province) %>%
  summarise(Total_Population = sum(Total), .groups = 'drop')

# Join the morbidity cases with the population data
morbidity_with_population_2024 <- morbidity_2024_summary %>%
  left_join(population_2024, by = c("PROV_CODE" = "Province"))

# Ensure that Total_Cases_2024 and Total_Population are numeric
morbidity_with_population_2024$Total_Cases_2024 <- as.numeric(morbidity_with_population_2024$Total_Cases_2024)
morbidity_with_population_2024$Total_Population <- as.numeric(morbidity_with_population_2024$Total_Population)

# Calculate the morbidity rate per 100,000 population for 2024
morbidity_with_population_2024 <- morbidity_with_population_2024 %>%
  mutate(morbidity_rate_2024 = (Total_Cases_2024 / Total_Population) * 100000)

# Calculate the total row for summary (sum of cases and population)
total_row <- morbidity_with_population_2024 %>%
  summarise(PROV_CODE = "TOTAL", 
            Total_Cases_2024 = sum(Total_Cases_2024, na.rm = TRUE), 
            Total_Population = sum(Total_Population, na.rm = TRUE)) %>%
  mutate(morbidity_rate_2024 = (Total_Cases_2024 / Total_Population) * 100000)

# Add the total row to the original dataframe
morbidity_with_population_2024 <- bind_rows(morbidity_with_population_2024, total_row)

# Rename the last row's PROV_CODE to "TOTAL"
morbidity_with_population_2024$PROV_CODE[nrow(morbidity_with_population_2024)] <- "TOTAL"

# Remove the extra column (if it was introduced as NA or redundant)
morbidity_with_population_2024 <- morbidity_with_population_2024 %>%
  select(PROV_CODE, Total_Population, Total_Cases_2024, morbidity_rate_2024)

# Format the columns with commas for better readability (after calculations)
morbidity_with_population_2024$Total_Cases_2024 <- scales::comma(morbidity_with_population_2024$Total_Cases_2024)
morbidity_with_population_2024$Total_Population <- scales::comma(morbidity_with_population_2024$Total_Population)
morbidity_with_population_2024$morbidity_rate_2024 <- scales::comma(morbidity_with_population_2024$morbidity_rate_2024)

morbidity_with_population_2024 <- morbidity_with_population_2024 %>%
  rename(PROVINCE = PROV_CODE, 
         CASES = Total_Cases_2024, 
         POPULATION = Total_Population, 
         RATE = morbidity_rate_2024)


In 2024, CALABARZON’s total population was approximately 16.68 million, with 1.53 million morbidity cases reported, resulting in an average morbidity rate of 9,191 cases per 100,000.

Quezon had the highest morbidity rate at 15,081 per 100,000, significantly exceeding the regional average, while Rizal recorded the lowest at 7,197 per 100,000. Laguna reported the highest total number of cases (374,801), followed by Cavite with 330,091 cases. Despite its large population, Cavite’s morbidity rate was lower than Laguna’s, which indicates that factors beyond population size, such as healthcare access or disease prevalence, contribute to the disparities.

The findings highlight the need for targeted public health strategies in high-burden provinces like Quezon while maintaining prevention efforts in Rizal and other areas (see Table 2).

Table 2. Morbidity Cases and Rate Per Province, 2024

kable(morbidity_with_population_2024, 
      format = "html", 
      caption = "",
      digits = 0, 
      align = c("l", "c", "c", "c")) %>%  # Align columns: left for the first (PROVINCE), center for the rest
  kable_styling(full_width = F, position = "center", 
                bootstrap_options = c("striped", "hover", "responsive", "bordered")) %>%
  column_spec(1, bold = TRUE, color = "black", 
              extra_css = ifelse(morbidity_with_population_2024$PROVINCE == "Rizal", "font-weight: bold;", "")) %>% 
  column_spec(2, color = "black", width = "3cm") %>%  # Population column style
  column_spec(3, color = "black", width = "3cm") %>%  # Total Cases 2024 column style
  column_spec(4, color = "black", width = "3cm") %>%  # Morbidity Rate column style
  row_spec(nrow(morbidity_with_population_2024), bold = TRUE, background = "#f0f0f0") %>%  # Bold the last row (total row)
  row_spec(0, bold = TRUE, color = "white", background = "black", align = "center")  # Center the column headers
PROVINCE POPULATION CASES RATE
BATANGAS 2,976,731 234,818 7,888
CAVITE 4,482,531 330,091 7,364
LAGUNA 3,480,921 374,801 10,767
QUEZON 2,285,009 344,601 15,081
RIZAL 3,450,627 248,329 7,197
TOTAL 16,675,819 1,532,640 9,191

The morbidity cases in the region, 2024, reveals a significant concentration of cases among children, followed by adults, seniors, and adolescents. Specifically, children aged 0-9 years exhibit the highest total morbidity cases across all provinces, with Laguna reporting the highest number of cases at 170,254, followed by Cavite and Quezon. The total morbidity in children was notably higher compared to other age groups in all provinces.

Following children, adults show the second highest morbidity burden, with Cavite (120,519 cases) and Laguna (130,666 cases) leading the other provinces. Seniors also contribute significantly to the total morbidity burden, with Laguna recording the highest morbidity among seniors at 54,627 cases, and Rizal presenting the lowest at 30,751 cases.

Meanwhile, adolescents recorded the lowest number of morbidity cases across the region, with the highest recorded in Laguna (19,254 cases) and the lowest in Rizal (13,400 cases).

These findings highlight the varied morbidity impact across age categories, with children consistently presenting the highest number of cases. For a detailed breakdown by province, refer to Table 1 and Figure 2.


Table 3. Morbidity Case Distribution by Age Group and Province, 2024

# Filter the data for the year 2024
data_2024 <- morbidity_all_icdcode %>%
  filter(YEAR == "2024")

# Aggregate cases by AGE_CATEGORIES and PROV_CODE
cases_by_age_province <- data_2024 %>%
  group_by(AGE_CATEGORIES, PROV_CODE) %>%
  summarise(total_cases = sum(CASES, na.rm = TRUE), .groups = "drop")

# Pivot the data to have PROV_CODE as columns
cases_table <- cases_by_age_province %>%
  spread(key = PROV_CODE, value = total_cases, fill = 0)

# Apply comma formatting and remove decimals
cases_table[] <- lapply(cases_table, function(x) if(is.numeric(x)) formatC(x, format = "f", big.mark = ",", digits = 0) else x)

# Create the kable table with styling
kable_table <- kable(cases_table, 
                     format = "html", 
                     caption = "") %>%  # No need for 'digits' argument
  kable_styling(bootstrap_options = c("striped", "hover", "responsive", "bordered"),
                full_width = F, 
                position = "center") %>%
  column_spec(1, bold = TRUE, color = "black") %>%  # Bold AGE_CATEGORIES column
  column_spec(2:ncol(cases_table), color = "black") %>%  # Style the PROV_CODE columns
  row_spec(0, bold = TRUE, color = "white", background = "black")  # Header styling

kable_table
AGE_CATEGORIES BATANGAS CAVITE LAGUNA QUEZON RIZAL
ADOLESCENT 12,456 16,403 19,254 14,963 13,400
ADULT 83,225 120,519 130,666 117,721 80,454
CHILDREN 106,120 148,513 170,254 152,423 123,724
SENIOR 33,017 44,656 54,627 59,494 30,751


Among children, the highest incidence of cases occurred in January and October. In contrast, the adult age group experienced the greatest number of morbidity cases in December 2024, indicating a seasonal trend or potential outbreak during the final month of the year, best timing for health interventions and resource allocation for different age groups throughout the year.

Figure 2. Distribution of Morbidity Cases per Age Group by Month, 2024

# Assuming your original dataset is called `cases_all_data`
cases_by_age_month <- morbidity_all %>%
  group_by(AGE_CATEGORIES, MONTH) %>%
  summarise(total_cases = sum(CASES, na.rm = TRUE), .groups = "drop")

cases_by_age_month$MONTH <- factor(cases_by_age_month$MONTH, 
                                   levels = c("January", "February", "March", "April", "May", 
                                              "June", "July", "August", "September", "October", 
                                              "November", "December"))
# Plot using ggplot
ggplot(cases_by_age_month, aes(x = MONTH, y = total_cases, fill = AGE_CATEGORIES)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
  labs(title = "",
       x = "",
       y = "Total Cases") +
  scale_fill_viridis(discrete = TRUE, option = "plasma") +  # Discrete scale for AGE_CATEGORIES
  scale_y_continuous(labels = comma) +  # Add comma formatting to the y-axis
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +  # Rotate x-axis labels
  theme(legend.title = element_blank())  # Optionally remove the legend title


In terms of the leading morbidity causes in Calabarzon were dominated by respiratory diseases (3,562 cases per 100,000 population), accounting for the highest incidence at 1,184,484 cases. Following closely were injuries and poisonings (939.8 rate), and infectious diseases (803.7 rate), indicating significant public health concerns. Other notable conditions included circulatory diseases (670.5 rate), genitourinary disorders (511.4 rate), and digestive system diseases (269.8 rate). The data highlights a substantial burden from both infectious diseases and chronic conditions like cardiovascular and musculoskeletal disorders, emphasizing the need for focused public health initiatives targeting prevention, early diagnosis, and management of these prevalent health issues.

Table 4. Distribution of Morbidity Cases per Age Group by Month, 2024

library(tidyverse)
library(dplyr)
library(magrittr)
library(kableExtra)

# Summary of morbidity cases
morbidity_summary <- morbidity_all_icdcode %>% 
  group_by(CATEGORY_NAME) %>%
  summarise(total_cases = sum(CASES, na.rm = TRUE), .groups = "drop")

# Get total population
population_total <- population %>%
  summarise(total_population = sum(Total, na.rm = TRUE)) %>%
  pull(total_population)

# Calculate morbidity rate per 100,000 population
morbidity_summary <- morbidity_summary %>%
  mutate(morbidity_rate = (total_cases / population_total) * 100000)

# Sort the data by morbidity rate in descending order and get the top 10
top_10_morbidity <- morbidity_summary %>%
  arrange(desc(morbidity_rate)) %>%
  head(10)

# Add Rank column and format values
top_10_morbidity <- top_10_morbidity %>%
  mutate(Rank = row_number()) %>%  # Add Rank column
  rename(
    `ICD CATEGORIES` = CATEGORY_NAME,
    CASES = total_cases,
    RATE = morbidity_rate
  ) %>% 
  mutate(
    CASES = formatC(CASES, format = "d", big.mark = ","),
    RATE = formatC(round(RATE, 2), big.mark = ",")
  ) %>%
  select(Rank, `ICD CATEGORIES`, CASES, RATE)  # Move Rank to the first column

# Create the kable table with styling
kable_table <- kable(top_10_morbidity, 
                     format = "html", 
                     caption = "", 
                     digits = c(0, 0, 2)) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "responsive", "bordered"),
                full_width = F, 
                position = "center") %>%
  column_spec(1, bold = TRUE, color = "black") %>%  # Bold Rank
  column_spec(2, color = "black") %>% 
  column_spec(3, color = "black") %>% 
  column_spec(4, color = "black") %>%  
  row_spec(0, bold = TRUE, color = "white", background = "black") 

kable_table
Rank ICD CATEGORIES CASES RATE
1 Diseases of the respiratory system (J00-J99) 1,184,484 3,562
2 Injury, poisoning and certain other consequences of external causes (S00-T98) 312,538 939.8
3 Certain Infectious and parasitic diseases (A00-B99) 267,261 803.7
4 Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99) 232,926 700.4
5 Diseases of the circulatory system (I00-I99) 222,958 670.5
6 Diseases of the genitourinary system (N00-N99) 170,064 511.4
7 Diseases of the digestive system (K00-K93) 89,710 269.8
8 Endocrine, nutritional and metabolic diseases (E00-E90) 88,738 266.8
9 Diseases of the skin and subcutaneous tissue (L00-L99) 84,978 255.5
10 Diseases of the musculoskeletal system and connective tissue (M00-M99) 59,742 179.7


In examining the distribution of morbidity cases across different age groups for various ICD categories, we can observe key trends that highlight the health challenges faced by specific demographics in Calabarzon. The N00-N99 category, which includes diseases of the genitourinary system, shows the highest total cases, with adults (84,066 cases) and children (53,461 cases) being most affected. This suggests that genitourinary disorders are prevalent across all age groups, though adults experience the highest burden. Diseases of the digestive system (K00-K93) also show significant prevalence, especially in adults (43,127 cases), but children (29,669 cases) and adolescents (5,956 cases) are notably affected as well, indicating concerns such as gastrointestinal infections or chronic conditions affecting younger populations (see Figure 3).

Figure 3. Distribution of Top 5 Morbidity Cases Across Age Categories, 2024

library(scales) 

# Function to filter by ICD categories and return age distribution
get_age_distribution_by_icd <- function(icd_range, icd_label) {
  morbidity_all_icdcode %>%
    filter(grepl(icd_range, ICD_CODE)) %>%
    group_by(AGE_CATEGORIES) %>%
    summarise(total_cases = sum(CASES, na.rm = TRUE)) %>%
    mutate(ICD_category = icd_label)  # Add the ICD category label
}

# Create datasets for each of the specified ICD ranges
data_J00_J99 <- get_age_distribution_by_icd("^J[0-9]{2}$", "J00-J99")
data_S00_T98 <- get_age_distribution_by_icd("^S[0-9]{2}$", "S00-T98")
data_A00_B99 <- get_age_distribution_by_icd("^A[0-9]{2}$", "A00-B99")
data_R00_R99 <- get_age_distribution_by_icd("^R[0-9]{2}$", "R00-R99")
data_I00_I99 <- get_age_distribution_by_icd("^I[0-9]{2}$", "I00-I99")

# Combine all datasets into one
all_data <- bind_rows(data_J00_J99, data_S00_T98, data_A00_B99, 
                      data_R00_R99, data_I00_I99)

# Reorder ICD_category factor levels to control the order of the heatmap rows
all_data$ICD_category <- factor(all_data$ICD_category, 
                                levels = c("J00-J99", "S00-T98", "A00-B99", 
                                           "R00-R99", "I00-I99"))

# Create the heatmap with formatted legend values
ggplot(all_data, aes(x = AGE_CATEGORIES, y = ICD_category, fill = total_cases)) +
  geom_tile() +  # This creates the heatmap
  scale_fill_viridis(name = "Total Cases", option = "C", trans = "log", 
                     labels = label_comma(big.mark = ",", accuracy = 0.01)) +  # Format legend with commas and 2 decimal places
  labs(
    title = "",
    x = "Age Categories",
    y = "ICD Categories"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 0),  # Rotate x-axis labels for better readability
    plot.title = element_text(hjust = 0.5),
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),  
    axis.title.x = element_text(margin = margin(t = 10)),  # Spacing for x-axis title
    axis.title.y = element_text(margin = margin(r = 10))   
  )


For endocrine, nutritional, and metabolic diseases (E00-E90), seniors (42,267 cases) represent the highest incidence, reflecting the growing health challenges of aging populations, such as diabetes and thyroid conditions. Adolescents and children also experience some cases, although at much lower rates. The diseases of the skin and subcutaneous tissue (L00-L99) and musculoskeletal system (M00-M99) primarily affect children (46,709 cases in L00-L99) and adults (35,568 cases in M00-M99), with musculoskeletal disorders particularly impacting adults, likely due to lifestyle and aging-related factors. These findings emphasize the need for age-specific health strategies, with a focus on managing chronic conditions in adults and seniors, while also addressing preventable health issues in children and adolescents.

Figure 4. Distribution of Top Six to Ten Morbidity Cases Across Age Categories, 2024

# Filter by ICD categories and return age distribution
get_age_distribution_by_icd <- function(icd_range, icd_label) {
  morbidity_all_icdcode %>%
    filter(grepl(icd_range, ICD_CODE)) %>%
    group_by(AGE_CATEGORIES) %>%
    summarise(total_cases = sum(CASES, na.rm = TRUE)) %>%
    mutate(ICD_category = icd_label)  # Add the ICD category label
}


data_N00_N99 <- get_age_distribution_by_icd("^N[0-9]{2}$", "N00-N99")
data_K00_K93 <- get_age_distribution_by_icd("^K[0-9]{2}$", "K00-K93")
data_E00_E90 <- get_age_distribution_by_icd("^E[0-9]{2}$", "E00-E90")
data_L00_L99 <- get_age_distribution_by_icd("^L[0-9]{2}$", "L00-L99")
data_M00_M99 <- get_age_distribution_by_icd("^M[0-9]{2}$", "M00-M99")

# Combine all datasets into one
all_data <- bind_rows(data_N00_N99, 
                      data_K00_K93, data_E00_E90, data_L00_L99, 
                      data_M00_M99)

# Reorder ICD_category factor levels to control the order of the heatmap rows
all_data$ICD_category <- factor(all_data$ICD_category, 
                                levels = c("N00-N99", "K00-K93", "E00-E90", "L00-L99", "M00-M99"))

# Create the heatmap with formatted legend values
ggplot(all_data, aes(x = AGE_CATEGORIES, y = ICD_category, fill = total_cases)) +
  geom_tile() +  # This creates the heatmap
  scale_fill_viridis(name = "Total Cases", option = "C", trans = "log", 
                     labels = label_comma(big.mark = ",", accuracy = 0.01)) +  # Format legend with commas and 2 decimal places
  labs(
    title = "",
    x = "Age Categories",
    y = "ICD Categories"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 0),
    plot.title = element_text(hjust = 0.5),
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    axis.title.x = element_text(margin = margin(t = 10)),  # Spacing for x-axis title
    axis.title.y = element_text(margin = margin(r = 10))
  )