Import Packages

library(readxl)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(scales)

library(ggplot2)
library(shiny)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(gapminder)
library(RColorBrewer)

###Do this again with the population estimate data

# Create a function to process each dataset
process_NHS_data <- function(file_path, sheet_name = NULL, range, year, col_names) {
  data <- read_excel(file_path, sheet = sheet_name, range = range) %>%
    mutate(Year = year) %>%
    rename_with(~ col_names, everything()) %>%
    mutate(across(where(is.character), ~ gsub("-", "NA", .)),  # Replace '-' with 'NA'
           across(where(is.character), ~ na_if(., "np")),      # Replace 'np' with NA
           across(-Condition, as.numeric)) %>%                # Convert columns to numeric
    # Standardizing 'Condition' variable
    mutate(Condition = recode(Condition,
                              "Short sighted / myopia" = "Short sighted/myopia",
                              "Long sighted / hyperopia" = "Long sighted/hyperopia",
                              "Blindness (complete and partial)(h)" = "Blindness (complete and partial)",
                              "Blindness (complete and partial)(f)" = "Blindness (complete and partial)",
                              "Blindness" = "Blindness (complete and partial)",
                              "Other diseases of the eye and adnexa(i)" = "Other diseases of the eye and adnexa",
                              "Other diseases of eye and adnexa" = "Other diseases of the eye and adnexa",
                              "Total diseases of the eye and  adnexa" = "Total diseases of the eye and adnexa",
                              "Total diseases of the eye and adnexa" = "Total diseases of the eye and adnexa")) %>%
    # Apply Classification after standardizing 'Condition'
    mutate(Classification = case_when(
      Condition %in% c("Cataract", "Glaucoma", "Astigmatism", "Macular degeneration", 
                       "Presbyopia",
                       "Other disorders of ocular muscles binocular", 
                       "Other disorders of choroid and retina", "Other visual disturbances or loss of vision",
                       "Other diseases of the eye and adnexa") ~ "Low Vision",
      Condition %in% c("Short sighted/myopia", "Long sighted/hyperopia") ~ "Long/Short sighted",
      Condition == "Blindness (complete and partial)" ~ "Blindness",
      Condition == "Total diseases of the eye and adnexa" ~ "Total",
      Condition == "Colour blind" ~ "Colour Blind",
      TRUE ~ NA_character_  # This handles any other conditions not explicitly classified
    )) %>%
    pivot_longer(cols = -c(Condition, Year, Classification),
                 names_to = "Age_group", 
                 values_to = "ESTIMATE ('000)")
  
  return(data)
}


# Define file paths, ranges, and column names for each dataset
datasets <- list(
  list(file = "National Health Survey 2022/NHS 2022 Vision Data.xlsx", range = "A1:H12", year = 2022, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65+", "Year")),
  list(file = "National Health Survey 2017-18/NHS 2017-18 Vision Data.xlsx", range = "A1:H15", year = 2018, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65+", "Year")),
  list(file = "National Health Survey 2014-15/NHS 2014-15 Vision Data.xlsx", range = "A1:I15", year = 2015, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+", "Year")),
  list(file = "National Health Survey 2011-12/NHS 2011-12 Vision Data.xlsx", range = "A1:I15", year = 2012, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+", "Year")),
  list(file = "National Health Survey 2007-08/NHS 2007-08 Vision Data.xlsx", range = "A1:I14", year = 2008, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+", "Year"))
)

# Loop through each dataset and apply the processing function
NHS_Age_Est <- lapply(datasets, function(ds) {
  process_NHS_data(ds$file, range = ds$range, year = ds$year, col_names = ds$col_names)
}) %>%
  bind_rows()  # Combine all datasets

## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(-Condition, as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.

rm(datasets)

colSums(is.na(NHS_Age_Est))

##       Condition            Year  Classification       Age_group ESTIMATE ('000) 
##               0               0               0               0              33

Import population data

# Create a function to process each dataset
process_pop_data <- function(file_path, sheet_name, range, year, col_names) {
  data <- read_excel(file_path, sheet = sheet_name, range = range, col_names = col_names) %>%
    mutate(Year = year) %>%
    pivot_longer(cols = -Year,                # Exclude 'Year' column from pivoting
                 names_to = "Age_group",
                 values_to = "ESTIMATE ('000)")

  return(data)
}

# Define file paths, ranges, and column names for each dataset
datasets <- list(
  list(file = "National Health Survey 2022/NHSDC03.xlsx", sheet_name = "Table 3.1_Estimates Persons", range = "B151:H151", year = 2022, col_names = c("0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65+")),
  list(file = "National Health Survey 2017-18/4364055001do003_20172018.xls", sheet_name = "Table 3.1_Estimate, persons", range = "B160:H160", year = 2018, col_names = c("0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65+")),
  list(file = "National Health Survey 2014-15/4364055001do003_20142015.xls", sheet_name = "Table_3_1 Estimates, persons", range = "B159:I159", year = 2015, col_names = c("0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+")),
  list(file = "National Health Survey 2011-12/43640do003_20112012v2.xls", sheet_name = "Table_3_1", range = "B140:I140", year = 2012, col_names = c("0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+"))
)

# Loop through each dataset and apply the processing function
NHS_pop <- lapply(datasets, function(ds) {
  process_pop_data(ds$file, sheet_name = ds$sheet_name, range = ds$range,
                   year = ds$year, col_names = ds$col_names)
}) %>%
  bind_rows()  # Combine all datasets

rm(datasets)

colSums(is.na(NHS_pop))

##            Year       Age_group ESTIMATE ('000) 
##               0               0               0

#Create Data Visualisations

# Filter for the 65–74 and 75+ age groups and sum them into a new "65+" variable
NHS_Age_Est_65plus <- NHS_Age_Est %>%
  filter(Age_group %in% c("65–74", "75+")) %>%
  group_by(Condition, Year, Classification) %>%
  summarise(`ESTIMATE ('000)` = sum(`ESTIMATE ('000)`, na.rm = TRUE)) %>%
  mutate(Age_group = "65+") %>%
  ungroup()

## `summarise()` has grouped output by 'Condition', 'Year'. You can override using
## the `.groups` argument.

# Combine the new 65+ group with the rest of the data
NHS_Age_Est_All <- NHS_Age_Est %>%
  filter(!Age_group %in% c("65–74", "75+")) %>%  # Remove the original 65–74 and 75+ age groups
  bind_rows(NHS_Age_Est_65plus)  # Add the new 65+ group

# Convert Year to a factor if it should be treated as categorical
NHS_Age_Est_All$Year <- as.factor(NHS_Age_Est_All$Year)

# Change Estimate ('000) to it's real value
NHS_Age_Est_All$`ESTIMATE ('000)` <- NHS_Age_Est_All$`ESTIMATE ('000)` * 1000

# Filter out the "Total" classification
NHS_Age_Est_filtered <- NHS_Age_Est_All %>% 
  filter(Classification == "Total")

all_impairment <- ggplot(NHS_Age_Est_filtered, aes(x = Year, y = `ESTIMATE ('000)`, fill = Age_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(title = "Estimate of Australians with Vision Impairment",
       x = "Year",
       y = "Number of Australains",
       fill = "Age Group") +
  scale_fill_viridis_d(option = "B", direction = -1) +  # Colorblind-friendly palette
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +  # Format y-axis in millions
  theme(panel.background = element_rect(fill = "white", color = NA),  # White panel background
        plot.background = element_rect(fill = "white", color = NA))  # White plot background

# Increase the size of the plot by saving it with specific dimensions
ggsave("NHS_Age_Est_plot_total.png", plot = all_impairment, width = 12, height = 8)

all_impairment

# Preparing data for the three plotly graphs

# Faceted plots for each dataset
impairment_age <- ggplot(NHS_Age_Est_All %>% filter(Classification == "Total"), aes(x = Age_group, y = `ESTIMATE ('000)`, fill = Age_group)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Year, nrow = 1, scales = "fixed") +
  theme_minimal() +
  labs(title = "Estimate of Australians with Vision Impairment",
       x = "Age Group",
       y = "Number of Australains",
       fill = "Age Group") +
  scale_fill_viridis_d(option = "D", end = 0.9) +  # Colorblind-friendly palette
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +  # Format y-axis in millions
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        panel.background = element_rect(fill = "white", color = NA),  # White panel background
        plot.background = element_rect(fill = "white", color = NA),  # White plot background
        legend.position = "top",  # Position legend at the top
        legend.justification = "center",  # Centre the legend
        legend.key.size = unit(0.5, "cm"),  # Adjust size of legend keys
        legend.title = element_text(size = 10, face = "bold"),  # Adjust legend title size
        legend.text = element_text(size = 9)  # Adjust legend text size
  )

impairment_type <- ggplot(NHS_Age_Est_All %>% filter(Classification != "Total"), aes(x = Age_group, y = `ESTIMATE ('000)`, fill = Classification)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Year, nrow = 1, scales = "fixed") +
  theme_minimal() +
  labs(title = "Estimate of Australians with Vision Impairment by Type",
       x = "Age Group",
       y = "Number of Australains",
       fill = "Condition") +
  scale_fill_viridis_d(option = "D", end = 0.9) +  # Colorblind-friendly palette
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +  # Format y-axis in millions
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        panel.background = element_rect(fill = "white", color = NA),  # White panel background
        plot.background = element_rect(fill = "white", color = NA),  # White plot background
        legend.position = "top",  # Position legend at the top
        legend.justification = "center",  # Center the legend
        legend.key.size = unit(0.5, "cm"),  # Adjust size of legend keys
        legend.title = element_text(size = 10, face = "bold"),  # Adjust legend title size
        legend.text = element_text(size = 9)  # Adjust legend text size
  )

    

impairment_age <- ggplotly(impairment_age, width = 800, height = 500)
impairment_age

htmlwidgets::saveWidget(impairment_age, "impairment_age.html")

impairment_type <- ggplotly(impairment_type, width = 800, height = 500)
impairment_type

htmlwidgets::saveWidget(impairment_type, "impairment_type.html")

# Faceted plots for each dataset
impairment <- ggplot(NHS_Age_Est_All, aes(x = Age_group, y = `ESTIMATE ('000)`, fill = Classification)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~ Year, nrow = 1, scales = "fixed") +
  theme_minimal() +
  labs(title = "Estimate of Australians with Vision Impairment by Condition",
       x = "Age Group",
       y = "Number of Australains",
       fill = "Condition") +
  scale_fill_viridis_d(option = "D", end = 0.9) +  # Colorblind-friendly palette
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +  # Format y-axis in millions
  theme(axis.text.x = element_text(angle = 45, hjust = 1),
        panel.background = element_rect(fill = "white", color = NA),  # White panel background
        plot.background = element_rect(fill = "white", color = NA),  # White plot background
        legend.position = "top",  # Position legend at the top
        legend.justification = "center",  # Centre the legend
        legend.key.size = unit(0.5, "cm"),  # Adjust size of legend keys
        legend.title = element_text(size = 10, face = "bold"),  # Adjust legend title size
        legend.text = element_text(size = 9)  # Adjust legend text size
  )

impairment <- ggplotly(impairment, width = 800, height = 500)
impairment

htmlwidgets::saveWidget(impairment, "impairment_all.html")

library(viridis)

## Loading required package: viridisLite

## 
## Attaching package: 'viridis'

## The following object is masked from 'package:scales':
## 
##     viridis_pal

library(unikn)

## Welcome to unikn (v1.0.0)!

## demopal() demonstrates a color palette.

vir_5 <- viridis(n = 5)
seecol(vir_5, col_brd = "white", lwd_brd = 4, hex = TRUE, 
       title = "Example of a viridis color palette (n = 5)",
       pal_names = c("Blindness", "Unlabeled A", "Low vision", "Unlabeled B", "Colour blindness/Other"))

# Ensure Year is numeric
NHS_Age_Est_All$Year <- as.numeric(as.character(NHS_Age_Est_All$Year))

# Get unique years from the data to use as breaks
unique_years <- sort(unique(NHS_Age_Est_All$Year))

# Custom function to format years
year_suffix <- function(x) {
  paste0("'", substr(x, 3, 4))  # Extract last two digits and add an apostrophe
}

# Preparing data for slope chart
impairment_age <- ggplot(NHS_Age_Est_All %>% filter(Classification == "Total"), 
                          aes(x = Year, y = `ESTIMATE ('000)`, group = Age_group, color = Age_group)) +
  geom_line(size = 1) +  # Add lines for each age group
  geom_point(size = 3) +  # Add points at each estimate
  theme_minimal() +
  labs(title = "Estimate of Australians with Vision Impairment",
       x = "Year",
       y = "Number of Australians",
       color = "Age Group") +
  scale_color_viridis_d(option = "B", direction = -1) +  # Colorblind-friendly palette
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +  # Format y-axis in millions
  scale_x_continuous(breaks = unique_years, labels = year_suffix) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.title = element_text(size = 10, face = "bold"),  # Adjust legend title size
        legend.text = element_text(size = 9)  # Adjust legend text size
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

impairment_age <- ggplotly(impairment_age, width = 800, height = 500)
impairment_age

htmlwidgets::saveWidget(impairment_age, "impairment_age_ts.html")

# Summing estimates by Year and Classification
impairment_type_sum <- NHS_Age_Est_All %>%
  filter(Classification != "Total") %>%
  group_by(Year, Classification) %>%
  summarise(Estimate = sum(`ESTIMATE ('000)`, na.rm = TRUE), .groups = "drop")  # Summing estimates and dropping grouping

# Preparing the slope chart for impairment type summed by age
impairment_type <- ggplot(impairment_type_sum, 
                           aes(x = Year, y = Estimate, group = Classification, color = Classification)) +
  geom_line(size = 1) +  # Add lines for each classification
  geom_point(size = 3) +  # Add points at each estimate
  theme_minimal() +
  labs(title = "Estimate of Australians with Vision Impairment by Type",
       x = "Year",
       y = "Number of Australians",
       color = "Condition") +
  scale_color_viridis_d(option = "D", end = 0.9) +  # Colorblind-friendly palette
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +  # Format y-axis in millions
  scale_x_continuous(breaks = unique_years, labels = year_suffix) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.title = element_text(size = 10, face = "bold"),  # Adjust legend title size
        legend.text = element_text(size = 9)  # Adjust legend text size
  )

impairment_type <- ggplotly(impairment_type, width = 800, height = 500)
impairment_type

htmlwidgets::saveWidget(impairment_type, "impairment_type_ts.html")

# Summing estimates by Year and Classification
impairment_type2_data <- NHS_Age_Est_All %>%
  filter(Classification != "Total") %>%
  group_by(Year, Classification, Age_group) %>%
  summarise(Estimate = sum(`ESTIMATE ('000)`, na.rm = TRUE), .groups = "drop")  # Summing estimates and dropping grouping

# Preparing the slope chart for impairment type summed by age
impairment_type2 <- ggplot(impairment_type2_data, 
                           aes(x = Year, y = Estimate, group = Classification, color = Classification)) +
  facet_wrap(~Age_group, nrow = 1)+
  geom_line(size = 0.5) +  # Add lines for each classification
  geom_point(size = 1) +  # Add points at each estimate
  theme_minimal() +
  labs(title = "Estimate of Australians with Vision Impairment by Condition",
       x = "Year",
       y = "Number of Australians",
       color = "Condition") +
  scale_color_viridis_d(option = "D", end = 0.9) +  # Colorblind-friendly palette
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +  # Format y-axis in millions
  scale_x_continuous(breaks = unique_years, labels = year_suffix) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.title = element_text(size = 10, face = "bold"),  # Adjust legend title size
        legend.text = element_text(size = 9)  # Adjust legend text size
  )

impairment_type2 <- ggplotly(impairment_type2, width = 1000, height = 500)
impairment_type2

htmlwidgets::saveWidget(impairment_type2, "impairment_all_ts.html")

NHS_pop <- rename(NHS_pop, Estimate = `ESTIMATE ('000)`)

# Filter for the 65–74 and 75+ age groups and sum them into a new "65+" variable
NHS_pop_65plus <- NHS_pop %>%
  filter(Age_group %in% c("65–74", "75+")) %>%
  group_by(Year) %>%
  summarise(Estimate = sum(Estimate, na.rm = TRUE)) %>%
  mutate(Age_group = "65+") %>%
  ungroup()

# Combine the new 65+ group with the rest of the data
NHS_pop <- NHS_pop %>%
  filter(!Age_group %in% c("65–74", "75+")) %>%  # Remove the original 65–74 and 75+ age groups
  bind_rows(NHS_pop_65plus)  # Add the new 65+ group

# Get unique years from the data to use as breaks
unique_years <- sort(unique(NHS_pop$Year))

# Change Estimate ('000) to it's real value
NHS_pop$Estimate <- NHS_pop$Estimate * 1000

# Preparing the slope chart for impairment type summed by age
Australian_population <- ggplot(NHS_pop, 
                           aes(x = Year, y = Estimate, group = Age_group, color = Age_group)) +
  facet_wrap(~Age_group, nrow = 1)+
  geom_line(size = 0.5) +  # Add lines for each classification
  geom_point(size = 1) +  # Add points at each estimate
  theme_minimal() +
  labs(title = "Estimate number of Australians by Age Group and Year",
       x = "Year",
       y = "Number of Australians",
       color = "Age_group") +
  scale_color_viridis_d(option = "B", direction = -1) +  # Colorblind-friendly palette
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) +  # Format y-axis in millions
  scale_x_continuous(breaks = unique_years, labels = year_suffix) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), 
        legend.title = element_text(size = 10, face = "bold"),  # Adjust legend title size
        legend.text = element_text(size = 9)  # Adjust legend text size
  )

Australian_population <- ggplotly(Australian_population, width = 800, height = 500)
Australian_population

htmlwidgets::saveWidget(Australian_population, "Australian_population.html")

Data Visualisations Assignment 3

2024-11-03

Import Packages

Import population data