library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(scales)
library(ggplot2)
library(shiny)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gapminder)
library(RColorBrewer)
###Do this again with the population estimate data
# Create a function to process each dataset
process_NHS_data <- function(file_path, sheet_name = NULL, range, year, col_names) {
data <- read_excel(file_path, sheet = sheet_name, range = range) %>%
mutate(Year = year) %>%
rename_with(~ col_names, everything()) %>%
mutate(across(where(is.character), ~ gsub("-", "NA", .)), # Replace '-' with 'NA'
across(where(is.character), ~ na_if(., "np")), # Replace 'np' with NA
across(-Condition, as.numeric)) %>% # Convert columns to numeric
# Standardizing 'Condition' variable
mutate(Condition = recode(Condition,
"Short sighted / myopia" = "Short sighted/myopia",
"Long sighted / hyperopia" = "Long sighted/hyperopia",
"Blindness (complete and partial)(h)" = "Blindness (complete and partial)",
"Blindness (complete and partial)(f)" = "Blindness (complete and partial)",
"Blindness" = "Blindness (complete and partial)",
"Other diseases of the eye and adnexa(i)" = "Other diseases of the eye and adnexa",
"Other diseases of eye and adnexa" = "Other diseases of the eye and adnexa",
"Total diseases of the eye and adnexa" = "Total diseases of the eye and adnexa",
"Total diseases of the eye and adnexa" = "Total diseases of the eye and adnexa")) %>%
# Apply Classification after standardizing 'Condition'
mutate(Classification = case_when(
Condition %in% c("Cataract", "Glaucoma", "Astigmatism", "Macular degeneration",
"Presbyopia",
"Other disorders of ocular muscles binocular",
"Other disorders of choroid and retina", "Other visual disturbances or loss of vision",
"Other diseases of the eye and adnexa") ~ "Low Vision",
Condition %in% c("Short sighted/myopia", "Long sighted/hyperopia") ~ "Long/Short sighted",
Condition == "Blindness (complete and partial)" ~ "Blindness",
Condition == "Total diseases of the eye and adnexa" ~ "Total",
Condition == "Colour blind" ~ "Colour Blind",
TRUE ~ NA_character_ # This handles any other conditions not explicitly classified
)) %>%
pivot_longer(cols = -c(Condition, Year, Classification),
names_to = "Age_group",
values_to = "ESTIMATE ('000)")
return(data)
}
# Define file paths, ranges, and column names for each dataset
datasets <- list(
list(file = "National Health Survey 2022/NHS 2022 Vision Data.xlsx", range = "A1:H12", year = 2022, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65+", "Year")),
list(file = "National Health Survey 2017-18/NHS 2017-18 Vision Data.xlsx", range = "A1:H15", year = 2018, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65+", "Year")),
list(file = "National Health Survey 2014-15/NHS 2014-15 Vision Data.xlsx", range = "A1:I15", year = 2015, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+", "Year")),
list(file = "National Health Survey 2011-12/NHS 2011-12 Vision Data.xlsx", range = "A1:I15", year = 2012, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+", "Year")),
list(file = "National Health Survey 2007-08/NHS 2007-08 Vision Data.xlsx", range = "A1:I14", year = 2008, col_names = c("Condition", "0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+", "Year"))
)
# Loop through each dataset and apply the processing function
NHS_Age_Est <- lapply(datasets, function(ds) {
process_NHS_data(ds$file, range = ds$range, year = ds$year, col_names = ds$col_names)
}) %>%
bind_rows() # Combine all datasets
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(-Condition, as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
rm(datasets)
colSums(is.na(NHS_Age_Est))
## Condition Year Classification Age_group ESTIMATE ('000)
## 0 0 0 0 33
# Create a function to process each dataset
process_pop_data <- function(file_path, sheet_name, range, year, col_names) {
data <- read_excel(file_path, sheet = sheet_name, range = range, col_names = col_names) %>%
mutate(Year = year) %>%
pivot_longer(cols = -Year, # Exclude 'Year' column from pivoting
names_to = "Age_group",
values_to = "ESTIMATE ('000)")
return(data)
}
# Define file paths, ranges, and column names for each dataset
datasets <- list(
list(file = "National Health Survey 2022/NHSDC03.xlsx", sheet_name = "Table 3.1_Estimates Persons", range = "B151:H151", year = 2022, col_names = c("0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65+")),
list(file = "National Health Survey 2017-18/4364055001do003_20172018.xls", sheet_name = "Table 3.1_Estimate, persons", range = "B160:H160", year = 2018, col_names = c("0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65+")),
list(file = "National Health Survey 2014-15/4364055001do003_20142015.xls", sheet_name = "Table_3_1 Estimates, persons", range = "B159:I159", year = 2015, col_names = c("0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+")),
list(file = "National Health Survey 2011-12/43640do003_20112012v2.xls", sheet_name = "Table_3_1", range = "B140:I140", year = 2012, col_names = c("0–14", "15–24", "25–34", "35–44", "45–54", "55–64", "65–74", "75+"))
)
# Loop through each dataset and apply the processing function
NHS_pop <- lapply(datasets, function(ds) {
process_pop_data(ds$file, sheet_name = ds$sheet_name, range = ds$range,
year = ds$year, col_names = ds$col_names)
}) %>%
bind_rows() # Combine all datasets
rm(datasets)
colSums(is.na(NHS_pop))
## Year Age_group ESTIMATE ('000)
## 0 0 0
#Create Data Visualisations
# Filter for the 65–74 and 75+ age groups and sum them into a new "65+" variable
NHS_Age_Est_65plus <- NHS_Age_Est %>%
filter(Age_group %in% c("65–74", "75+")) %>%
group_by(Condition, Year, Classification) %>%
summarise(`ESTIMATE ('000)` = sum(`ESTIMATE ('000)`, na.rm = TRUE)) %>%
mutate(Age_group = "65+") %>%
ungroup()
## `summarise()` has grouped output by 'Condition', 'Year'. You can override using
## the `.groups` argument.
# Combine the new 65+ group with the rest of the data
NHS_Age_Est_All <- NHS_Age_Est %>%
filter(!Age_group %in% c("65–74", "75+")) %>% # Remove the original 65–74 and 75+ age groups
bind_rows(NHS_Age_Est_65plus) # Add the new 65+ group
# Convert Year to a factor if it should be treated as categorical
NHS_Age_Est_All$Year <- as.factor(NHS_Age_Est_All$Year)
# Change Estimate ('000) to it's real value
NHS_Age_Est_All$`ESTIMATE ('000)` <- NHS_Age_Est_All$`ESTIMATE ('000)` * 1000
# Filter out the "Total" classification
NHS_Age_Est_filtered <- NHS_Age_Est_All %>%
filter(Classification == "Total")
all_impairment <- ggplot(NHS_Age_Est_filtered, aes(x = Year, y = `ESTIMATE ('000)`, fill = Age_group)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
labs(title = "Estimate of Australians with Vision Impairment",
x = "Year",
y = "Number of Australains",
fill = "Age Group") +
scale_fill_viridis_d(option = "B", direction = -1) + # Colorblind-friendly palette
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Format y-axis in millions
theme(panel.background = element_rect(fill = "white", color = NA), # White panel background
plot.background = element_rect(fill = "white", color = NA)) # White plot background
# Increase the size of the plot by saving it with specific dimensions
ggsave("NHS_Age_Est_plot_total.png", plot = all_impairment, width = 12, height = 8)
all_impairment
# Preparing data for the three plotly graphs
# Faceted plots for each dataset
impairment_age <- ggplot(NHS_Age_Est_All %>% filter(Classification == "Total"), aes(x = Age_group, y = `ESTIMATE ('000)`, fill = Age_group)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Year, nrow = 1, scales = "fixed") +
theme_minimal() +
labs(title = "Estimate of Australians with Vision Impairment",
x = "Age Group",
y = "Number of Australains",
fill = "Age Group") +
scale_fill_viridis_d(option = "D", end = 0.9) + # Colorblind-friendly palette
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Format y-axis in millions
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.background = element_rect(fill = "white", color = NA), # White panel background
plot.background = element_rect(fill = "white", color = NA), # White plot background
legend.position = "top", # Position legend at the top
legend.justification = "center", # Centre the legend
legend.key.size = unit(0.5, "cm"), # Adjust size of legend keys
legend.title = element_text(size = 10, face = "bold"), # Adjust legend title size
legend.text = element_text(size = 9) # Adjust legend text size
)
impairment_type <- ggplot(NHS_Age_Est_All %>% filter(Classification != "Total"), aes(x = Age_group, y = `ESTIMATE ('000)`, fill = Classification)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Year, nrow = 1, scales = "fixed") +
theme_minimal() +
labs(title = "Estimate of Australians with Vision Impairment by Type",
x = "Age Group",
y = "Number of Australains",
fill = "Condition") +
scale_fill_viridis_d(option = "D", end = 0.9) + # Colorblind-friendly palette
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Format y-axis in millions
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.background = element_rect(fill = "white", color = NA), # White panel background
plot.background = element_rect(fill = "white", color = NA), # White plot background
legend.position = "top", # Position legend at the top
legend.justification = "center", # Center the legend
legend.key.size = unit(0.5, "cm"), # Adjust size of legend keys
legend.title = element_text(size = 10, face = "bold"), # Adjust legend title size
legend.text = element_text(size = 9) # Adjust legend text size
)
impairment_age <- ggplotly(impairment_age, width = 800, height = 500)
impairment_age
htmlwidgets::saveWidget(impairment_age, "impairment_age.html")
impairment_type <- ggplotly(impairment_type, width = 800, height = 500)
impairment_type
htmlwidgets::saveWidget(impairment_type, "impairment_type.html")
# Faceted plots for each dataset
impairment <- ggplot(NHS_Age_Est_All, aes(x = Age_group, y = `ESTIMATE ('000)`, fill = Classification)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Year, nrow = 1, scales = "fixed") +
theme_minimal() +
labs(title = "Estimate of Australians with Vision Impairment by Condition",
x = "Age Group",
y = "Number of Australains",
fill = "Condition") +
scale_fill_viridis_d(option = "D", end = 0.9) + # Colorblind-friendly palette
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Format y-axis in millions
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.background = element_rect(fill = "white", color = NA), # White panel background
plot.background = element_rect(fill = "white", color = NA), # White plot background
legend.position = "top", # Position legend at the top
legend.justification = "center", # Centre the legend
legend.key.size = unit(0.5, "cm"), # Adjust size of legend keys
legend.title = element_text(size = 10, face = "bold"), # Adjust legend title size
legend.text = element_text(size = 9) # Adjust legend text size
)
impairment <- ggplotly(impairment, width = 800, height = 500)
impairment
htmlwidgets::saveWidget(impairment, "impairment_all.html")
library(viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
## The following object is masked from 'package:scales':
##
## viridis_pal
library(unikn)
## Welcome to unikn (v1.0.0)!
## demopal() demonstrates a color palette.
vir_5 <- viridis(n = 5)
seecol(vir_5, col_brd = "white", lwd_brd = 4, hex = TRUE,
title = "Example of a viridis color palette (n = 5)",
pal_names = c("Blindness", "Unlabeled A", "Low vision", "Unlabeled B", "Colour blindness/Other"))
# Ensure Year is numeric
NHS_Age_Est_All$Year <- as.numeric(as.character(NHS_Age_Est_All$Year))
# Get unique years from the data to use as breaks
unique_years <- sort(unique(NHS_Age_Est_All$Year))
# Custom function to format years
year_suffix <- function(x) {
paste0("'", substr(x, 3, 4)) # Extract last two digits and add an apostrophe
}
# Preparing data for slope chart
impairment_age <- ggplot(NHS_Age_Est_All %>% filter(Classification == "Total"),
aes(x = Year, y = `ESTIMATE ('000)`, group = Age_group, color = Age_group)) +
geom_line(size = 1) + # Add lines for each age group
geom_point(size = 3) + # Add points at each estimate
theme_minimal() +
labs(title = "Estimate of Australians with Vision Impairment",
x = "Year",
y = "Number of Australians",
color = "Age Group") +
scale_color_viridis_d(option = "B", direction = -1) + # Colorblind-friendly palette
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Format y-axis in millions
scale_x_continuous(breaks = unique_years, labels = year_suffix) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_text(size = 10, face = "bold"), # Adjust legend title size
legend.text = element_text(size = 9) # Adjust legend text size
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
impairment_age <- ggplotly(impairment_age, width = 800, height = 500)
impairment_age
htmlwidgets::saveWidget(impairment_age, "impairment_age_ts.html")
# Summing estimates by Year and Classification
impairment_type_sum <- NHS_Age_Est_All %>%
filter(Classification != "Total") %>%
group_by(Year, Classification) %>%
summarise(Estimate = sum(`ESTIMATE ('000)`, na.rm = TRUE), .groups = "drop") # Summing estimates and dropping grouping
# Preparing the slope chart for impairment type summed by age
impairment_type <- ggplot(impairment_type_sum,
aes(x = Year, y = Estimate, group = Classification, color = Classification)) +
geom_line(size = 1) + # Add lines for each classification
geom_point(size = 3) + # Add points at each estimate
theme_minimal() +
labs(title = "Estimate of Australians with Vision Impairment by Type",
x = "Year",
y = "Number of Australians",
color = "Condition") +
scale_color_viridis_d(option = "D", end = 0.9) + # Colorblind-friendly palette
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Format y-axis in millions
scale_x_continuous(breaks = unique_years, labels = year_suffix) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_text(size = 10, face = "bold"), # Adjust legend title size
legend.text = element_text(size = 9) # Adjust legend text size
)
impairment_type <- ggplotly(impairment_type, width = 800, height = 500)
impairment_type
htmlwidgets::saveWidget(impairment_type, "impairment_type_ts.html")
# Summing estimates by Year and Classification
impairment_type2_data <- NHS_Age_Est_All %>%
filter(Classification != "Total") %>%
group_by(Year, Classification, Age_group) %>%
summarise(Estimate = sum(`ESTIMATE ('000)`, na.rm = TRUE), .groups = "drop") # Summing estimates and dropping grouping
# Preparing the slope chart for impairment type summed by age
impairment_type2 <- ggplot(impairment_type2_data,
aes(x = Year, y = Estimate, group = Classification, color = Classification)) +
facet_wrap(~Age_group, nrow = 1)+
geom_line(size = 0.5) + # Add lines for each classification
geom_point(size = 1) + # Add points at each estimate
theme_minimal() +
labs(title = "Estimate of Australians with Vision Impairment by Condition",
x = "Year",
y = "Number of Australians",
color = "Condition") +
scale_color_viridis_d(option = "D", end = 0.9) + # Colorblind-friendly palette
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Format y-axis in millions
scale_x_continuous(breaks = unique_years, labels = year_suffix) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_text(size = 10, face = "bold"), # Adjust legend title size
legend.text = element_text(size = 9) # Adjust legend text size
)
impairment_type2 <- ggplotly(impairment_type2, width = 1000, height = 500)
impairment_type2
htmlwidgets::saveWidget(impairment_type2, "impairment_all_ts.html")
NHS_pop <- rename(NHS_pop, Estimate = `ESTIMATE ('000)`)
# Filter for the 65–74 and 75+ age groups and sum them into a new "65+" variable
NHS_pop_65plus <- NHS_pop %>%
filter(Age_group %in% c("65–74", "75+")) %>%
group_by(Year) %>%
summarise(Estimate = sum(Estimate, na.rm = TRUE)) %>%
mutate(Age_group = "65+") %>%
ungroup()
# Combine the new 65+ group with the rest of the data
NHS_pop <- NHS_pop %>%
filter(!Age_group %in% c("65–74", "75+")) %>% # Remove the original 65–74 and 75+ age groups
bind_rows(NHS_pop_65plus) # Add the new 65+ group
# Get unique years from the data to use as breaks
unique_years <- sort(unique(NHS_pop$Year))
# Change Estimate ('000) to it's real value
NHS_pop$Estimate <- NHS_pop$Estimate * 1000
# Preparing the slope chart for impairment type summed by age
Australian_population <- ggplot(NHS_pop,
aes(x = Year, y = Estimate, group = Age_group, color = Age_group)) +
facet_wrap(~Age_group, nrow = 1)+
geom_line(size = 0.5) + # Add lines for each classification
geom_point(size = 1) + # Add points at each estimate
theme_minimal() +
labs(title = "Estimate number of Australians by Age Group and Year",
x = "Year",
y = "Number of Australians",
color = "Age_group") +
scale_color_viridis_d(option = "B", direction = -1) + # Colorblind-friendly palette
scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M")) + # Format y-axis in millions
scale_x_continuous(breaks = unique_years, labels = year_suffix) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
legend.title = element_text(size = 10, face = "bold"), # Adjust legend title size
legend.text = element_text(size = 9) # Adjust legend text size
)
Australian_population <- ggplotly(Australian_population, width = 800, height = 500)
Australian_population
htmlwidgets::saveWidget(Australian_population, "Australian_population.html")