# Ensure necessary libraries are loaded
library(dplyr)
# Read the poverty data
poverty_by_state <- read.csv("poverty_by_state_usa.csv")
# Read the food insecurity data
food_insecurity_by_state <- read.csv("food_insecurity_by_state.csv")
# Merge the two datasets on state_abrev
merged_data <- merge(poverty_by_state, food_insecurity_by_state, by = "state_abrev")
# View the first few rows of the merged dataset
glimpse(food_insecurity_by_state)
## Rows: 52
## Columns: 8
## $ state_abrev <chr> "U.S.", "AK", "AL", "AR",…
## $ food_insecurity <dbl> 12.2, 10.4, 11.5, 18.9, 1…
## $ very_low_food_insecurity <dbl> 4.7, 5.3, 4.4, 6.7, 4.7, …
## $ diabetes <dbl> 11.6, 8.3, 14.9, 10.6, 12…
## $ Obesity <dbl> 33.7, 32.1, 38.3, 33.2, 3…
## $ Enough.food..but.not.always.the.kinds.wanted <dbl> 0.6, 0.6, 0.8, 0.6, 0.7, …
## $ Sometimes.not.enough.to.eat <dbl> 0.3, 0.3, 0.2, 0.2, 0.2, …
## $ Often.not.enough.to.eat <dbl> 0.1, 0.1, 0.0, 0.2, 0.1, …
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tigris) # For US state shapefiles
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(sf) # For spatial data handling
library(readr) # For reading CSV data
# Step 1: Load the data
file_path <- "food_insecurity_by_state.csv" # Update with your file path
food_insecurity_by_state <- read_csv(file_path)
## Rows: 52 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state_abrev
## dbl (7): food_insecurity, very_low_food_insecurity, diabetes, Obesity, Enoug...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Step 2: Get US States shapefile (excluding territories)
us_states <- states(cb = TRUE, resolution = "20m") %>%
filter(!STATEFP %in% c("02", "15", "72")) # Exclude Alaska, Hawaii, Puerto Rico
## Retrieving data for the year 2021
## | | | 0% | |====== | 8% | |============ | 17% | |============= | 19% | |========================= | 36% | |============================ | 40% | |================================== | 49% | |======================================== | 58% | |=========================================== | 62% | |================================================ | 69% | |====================================================== | 77% | |=============================================================== | 90% | |======================================================================| 100%
# Step 3: Clean and join the data
food_insecurity_by_state_clean <- food_insecurity_by_state %>%
select(state_abrev, food_insecurity) %>%
mutate(state_abrev = ifelse(state_abrev == "U.S.", "DC", state_abrev)) # Adjust U.S. to DC for matching
# Merge shapefile with the food insecurity data
us_states <- us_states %>%
left_join(food_insecurity_by_state_clean, by = c("STUSPS" = "state_abrev"))
# Step 4: Plot the food insecurity map
ggplot(us_states) +
geom_sf(aes(fill = food_insecurity), color = "white", size = 0.1) +
scale_fill_gradient(
name = "Food Insecurity %",
low = "#f0f9e8", high = "#d73027", na.value = "grey90"
) +
labs(
title = "Food Insecurity Across U.S. States"#,
# subtitle = "Percentage of Food Insecure Population",
# caption = "Data Source: food_insecurity_by_state"
) +
theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
plot.subtitle = element_text(size = 12, hjust = 0.5),
axis.text = element_blank(),
axis.ticks = element_blank(),
panel.grid = element_blank(),
legend.position = "right"
)
library(ggplot2)
library(dplyr)
# Ensure necessary columns exist
merged_data <- merged_data %>%
rename(
percent_poverty = percent,
percent_food_insecurity = food_insecurity
)
# Calculate the national averages
us_poverty_percent <- mean(merged_data$percent_poverty, na.rm = TRUE)
us_food_insecurity_percent <- mean(merged_data$percent_food_insecurity, na.rm = TRUE)
# Sort data by poverty percentage for the dot plot
state_data <- merged_data %>% arrange(desc(percent_poverty))
# Dot plot with poverty and food insecurity overlaid
dot_plot <- ggplot(state_data, aes(x = reorder(state_abrev, -percent_poverty))) +
geom_point(aes(y = percent_poverty, color = "Poverty"), size = 3, alpha = 0.8) +
geom_point(aes(y = percent_food_insecurity, color = "Food Insecurity"), size = 3, alpha = 0.8) +
geom_hline(yintercept = us_poverty_percent, color = "red", linetype = "dashed", linewidth = 0.8) +
geom_hline(yintercept = us_food_insecurity_percent, color = "blue", linetype = "dashed", linewidth = 0.8) +
annotate("text", x = 5, y = us_poverty_percent + 0.5, label = "National Poverty Avg", color = "red", size = 4, hjust = 0) +
annotate("text", x = 5, y = us_food_insecurity_percent + 0.5, label = "National Food Insecurity Avg", color = "blue", size = 4, hjust = 0) +
labs(
title = "Poverty and Food Insecurity by State",
x = "State",
y = "Percent",
color = "Metric"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90, hjust = 0.5),
panel.grid.major = element_line(color = "gray98"),
panel.grid.minor = element_blank()
)
# Scatter plot of poverty vs. food insecurity
scatter_plot <- ggplot(state_data, aes(x = percent_poverty, y = percent_food_insecurity)) +
geom_point(color = "darkgreen", size = 3, alpha = 0.8) +
geom_smooth(method = "lm", color = "blue", linetype = "dashed", se = FALSE) +
labs(
title = "Relationship Between Poverty and Food Insecurity",
x = "Poverty Percentage",
y = "Food Insecurity Percentage"
) +
theme_minimal()
# Print the plots
dot_plot
scatter_plot
## `geom_smooth()` using formula = 'y ~ x'
# Load necessary libraries
library(tidyr)
library(dplyr)
library(ggplot2)
library(readr)
# Read the dataset
poverty_by_state <- read.csv("poverty_by_state_usa.csv")
poverty_by_age_group <- read.csv("poverty_by_age_group_2009_to_2023.csv")
# Filter out 'US' and calculate the average percent for the US
us_percent <- poverty_by_state %>% filter(state_abrev == "US") %>% pull(percent)
state_data <- poverty_by_state %>% filter(state_abrev != "US")
# Convert to long format and ensure all categories are handled correctly
poverty_long <- poverty_by_age_group %>%
pivot_longer(
cols = c("all_peoplePeople", "under_18", "year_18_to_64", "over_65", "male", "female"),
names_to = "age_or_gender",
values_to = "percent"
) %>%
mutate(age_or_gender = case_when(
age_or_gender == "all_peoplePeople" ~ "All People",
age_or_gender == "under_18" ~ "Under 18",
age_or_gender == "year_18_to_64" ~ "18 to 64 Years",
age_or_gender == "over_65" ~ "Over 65",
age_or_gender == "male" ~ "Male",
age_or_gender == "female" ~ "Female",
TRUE ~ age_or_gender
))
# Filter for 2023 only
poverty_2023 <- poverty_long %>%
filter(year == 2023)
# Define custom color spectrum: Highlight "Under 18" and "Female", others in dark grey
group_colors <- c(
"Under 18" = "#66c2a5", # Green
"Female" = "#e78ac3", # Pink
"18 to 64 Years" = "grey30",
"Over 65" = "grey30",
"All People" = "grey30",
"Male" = "grey30"
)
# Ensure the ordering of age_or_gender
poverty_2023 <- poverty_2023 %>%
mutate(
age_or_gender = factor(age_or_gender, levels = c("Under 18", "18 to 64 Years", "Over 65", "All People", "Male", "Female"))
)
# Create a bar chart with highlights for Under 18 and Female
# Create a bar chart with bold annotations
bar_chart <- ggplot(poverty_2023, aes(x = age_or_gender, y = percent, fill = age_or_gender)) +
geom_bar(stat = "identity", alpha = 0.8, show.legend = FALSE) +
geom_text(aes(label = round(percent, 1)),
position = position_stack(vjust = 0.5), # Position at the center of the bar
color = "white", size = 5, fontface = "bold") + # Make annotations bold
scale_fill_manual(values = group_colors) + # Apply custom colors
labs(
title = "Poverty Percentage by Age Group and Gender (2023)",
x = "",
y = ""
) +
theme_minimal() +
theme(
axis.text.x = element_text(face = "bold", angle = 45, hjust = 1),
axis.ticks.x = element_blank(), # Remove x-axis ticks
panel.grid.major.x = element_blank(), # Remove x-axis gridlines
axis.text.y = element_blank(), # Remove y-axis tick labels
axis.ticks.y = element_blank() # Remove y-axis tick marks
)
# Display the bar chart
print(bar_chart)
# Create a time series plot with accessible colors and conditional line thickness
time_series <- ggplot(poverty_long, aes(x = year, y = percent, group = age_or_gender)) +
geom_line(aes(
color = age_or_gender,
linewidth = ifelse(age_or_gender %in% c("Under 18", "Female"), 1.5, 0.8) # Correctly inside aes()
)) +
scale_color_manual(values = group_colors) + # Apply custom colors
scale_linewidth_identity() + # Use linewidth values as is
scale_x_continuous(
breaks = seq(2009, max(poverty_long$year), by = 3),
limits = c(2009, max(poverty_long$year))
) +
labs(
title = "Poverty Percentage Over Time by Age Group and Gender",
x = "Year",
y = "Percentage (%)",
color = "Age Group / Gender"
) +
theme_minimal() +
theme(
legend.position = "bottom",
legend.title = element_text(face = "bold"),
axis.title.x = element_text(face = "bold"),
axis.title.y = element_text(face = "bold")
)
# Display the time series plot
print(time_series)
# Load necessary libraries
library(readxl)
library(dplyr)
library(tidyr)
# Load the Excel file with multi-row headers
file_path <- "tableA1_pov_characteristics_1.xlsx" # Replace with your file path
data <- read_excel(file_path, col_names = FALSE)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`
# Combine multi-level headers into a single row
headers <- data[1:2, ] %>%
t() %>%
as.data.frame() %>%
unite("header", V1, V2, sep = " ", na.rm = TRUE)
# Handle empty headers
headers$header <- ifelse(headers$header == "", paste0("Empty_Column_", seq_along(headers$header)), headers$header)
# Assign combined headers as column names
colnames(data) <- headers$header
# Remove the header rows from the data
data <- data[-c(1, 2), ]
# Convert the dataset into a tidy format
tidy_data <- data %>%
pivot_longer(
cols = starts_with("20"), # Adjust if year columns start differently
names_to = c("Year", "Metric"),
names_sep = " ", # Separator to split year and metric
values_to = "Value"
)
# Save the tidy dataset to a CSV file
write.csv(tidy_data, "tidy_poverty_data.csv", row.names = FALSE)
# Print success message
cat("Tidy dataset saved as 'tidy_poverty_data.csv'")
## Tidy dataset saved as 'tidy_poverty_data.csv'
# Load necessary libraries
if (!require("ggplot2")) install.packages("ggplot2")
library(ggplot2)
library(dplyr)
# Read the CSV file
file_path <- "food_insecurity_indicators.csv"
food_data <- read.csv(file_path)
# Convert data to long format for ggplot
food_long <- food_data %>%
pivot_longer(
cols = c("Food.secure", "Low.food.security", "Very.low.food.security"),
names_to = "Food.Security.Level",
values_to = "Percentage"
) %>%
arrange(Indicator, desc(Food.Security.Level)) # Ensure proper stacking order
# Calculate cumulative percentages for annotation positions
food_long <- food_long %>%
group_by(Indicator) %>%
mutate(
Cumulative_Percentage = cumsum(Percentage),
Annotation_Position = Cumulative_Percentage - (Percentage / 2)
)
# Create Stacked Bar Chart with Annotations and remove y-axis ticks and gridlines
stacked_bar <- ggplot(food_long, aes(x = Indicator, y = Percentage, fill = Food.Security.Level)) +
geom_bar(stat = "identity") +
geom_text(
aes(
y = Annotation_Position,
label = ifelse(Percentage > 10, round(Percentage, 1), "")
),
color = "black", size = 3
) +
scale_fill_manual(values = c(
"Food.secure" = "#66c2a5",
"Low.food.security" = "#fc8d62",
"Very.low.food.security" = "#8da0cb"
)) +
labs(
title = "Food Insecurity Indicators by Food Security Level",
x = "Indicator",
y = "Percentage",
fill = "Food Security Level"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
axis.ticks.y = element_blank(), # Remove y-axis tick marks
panel.grid.major.y = element_blank(), # Remove major y-axis gridlines
panel.grid.minor.y = element_blank(), # Remove minor y-axis gridlines
panel.grid.major.x = element_blank(), # Remove major y-axis gridlines
panel.grid.minor.x = element_blank(), # Remove minor y-axis gridlines
legend.position = "bottom" # Place legend at the bottom
)
# Plot the chart
stacked_bar
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_bar()`).
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_text()`).
# Load necessary libraries
library(ggplot2)
library(dplyr)
# Read the CSV file
file_path <- "food_assistance_by_age_group.csv"
snap_data <- read.csv(file_path)
# Clean and convert the columns to numeric
snap_data <- snap_data %>%
mutate(
Total_Number = as.numeric(gsub("[^0-9]", "", Total_Number)),
SNAP_Number = as.numeric(gsub("[^0-9]", "", SNAP_Number))
)
# Filter data for each group
snap_data_age <- snap_data[snap_data$group == "age", ]
snap_data_meal <- snap_data[snap_data$group == "free or reduced school meal", ]
snap_data_region <- snap_data[snap_data$group == "region", ]
snap_data_race <- snap_data[snap_data$group == "poverty status", ]
# Define order for each chart
age_order <- c("0 to 5 years", "6 to 11 years", "12 to 17 years", "18-24 years", "25-44 years", "45-64 years", "65 years and older")
meal_order <- c("5 to 11 years", "12 to 14 years", "15 to 18 years")
poverty_order <- c("< 50% Poverty", "< 100% Poverty", "< 150% Poverty")
region_order <- c("Midwest", "Northeast", "South", "West")
# Define colors for each chart
age_color <- "#66c2a5" # Green for Age Group
meal_color <- "#fc8d62" # Orange for Free or Reduced Meal
poverty_color <- "#8da0cb" # Blue for Poverty Threshold
region_color <- "#a6d854" # Greenish Yellow for Region
# ---- Plot for Age Group ----
snap_data_age$Age_Group <- factor(snap_data_age$Age_Group, levels = age_order)
# Add fade effect for the shortest bar
snap_data_age <- snap_data_age %>%
mutate(alpha = ifelse(SNAP_Percent == min(SNAP_Percent), 0.3, 1))
plot_age <- ggplot(snap_data_age, aes(x = Age_Group, y = SNAP_Percent, fill = Age_Group, alpha = alpha)) +
geom_bar(stat = "identity", fill = age_color, show.legend = FALSE) +
scale_alpha(range = c(0.3, 1)) +
labs(x = "Age Group", y = "SNAP Percent (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"))
# ---- Plot for Free or Reduced Meal ----
snap_data_meal$Age_Group <- factor(snap_data_meal$Age_Group, levels = meal_order)
# Add fade effect for the shortest bar
snap_data_meal <- snap_data_meal %>%
mutate(alpha = ifelse(SNAP_Percent == min(SNAP_Percent), 0.3, 1))
plot_meal <- ggplot(snap_data_meal, aes(x = Age_Group, y = SNAP_Percent, fill = Age_Group, alpha = alpha)) +
geom_bar(stat = "identity", fill = meal_color, show.legend = FALSE) +
scale_alpha(range = c(0.3, 1)) +
labs(x = "Free or Reduced Meal", y = "SNAP Percent (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"))
# ---- Plot for Poverty Status ----
snap_data_race$Age_Group <- recode(snap_data_race$Age_Group,
"Less than 100% of poverty threshold" = "< 100% Poverty",
"Less than 150% of poverty threshold" = "< 150% Poverty",
"Less than 50% of poverty threshold" = "< 50% Poverty"
)
snap_data_race$Age_Group <- factor(snap_data_race$Age_Group, levels = poverty_order)
# Add fade effect for the shortest bar
snap_data_race <- snap_data_race %>%
mutate(alpha = ifelse(SNAP_Percent == min(SNAP_Percent), 0.3, 1))
plot_race <- ggplot(snap_data_race, aes(x = Age_Group, y = SNAP_Percent, fill = Age_Group, alpha = alpha)) +
geom_bar(stat = "identity", fill = poverty_color, show.legend = FALSE) +
scale_alpha(range = c(0.3, 1)) +
labs(x = "Poverty Threshold (%)", y = "SNAP Percent (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"))
# ---- Plot for Region ----
snap_data_region$Age_Group <- factor(snap_data_region$Age_Group, levels = region_order)
# Add fade effect for the shortest bar
snap_data_region <- snap_data_region %>%
mutate(alpha = ifelse(SNAP_Percent == min(SNAP_Percent), 0.3, 1))
plot_region <- ggplot(snap_data_region, aes(x = Age_Group, y = SNAP_Percent, fill = Age_Group, alpha = alpha)) +
geom_bar(stat = "identity", fill = region_color, show.legend = FALSE) +
scale_alpha(range = c(0.3, 1)) +
labs(x = "Region", y = "SNAP Percent (%)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"))
# ---- Display All Plots ----
print(plot_age)
print(plot_meal)
print(plot_race)
print(plot_region)
# Load necessary library
library(ggplot2)
library(dplyr)
# Read the CSV file
file_path <- "tidy_burden_of_malnutrition.csv"
malnutrition_data <- read.csv(file_path)
# Ungroup and list unique values with explicit expansion
categorical_variables_expanded <- malnutrition_data %>%
select(where(is.character)) %>%
summarise(across(everything(), ~ paste(unique(.), collapse = ", "))) %>%
pivot_longer(cols = everything(), names_to = "Variable", values_to = "Unique_Values")
# Print the unique values
categorical_variables_expanded
## # A tibble: 3 × 2
## Variable Unique_Values
## <chr> <chr>
## 1 indicator adolescent_obesity, adolescent_overweight, adolescent_thinness…
## 2 disaggregation sex, pregnancy, all, age, Cause
## 3 disagg.value Boys, Girls, All women, Non-pregnant women, Pregnant women, Fe…
library(ggplot2)
library(dplyr)
# Group by year and indicator, calculate the mean value
average_data <- malnutrition_data %>%
filter(indicator %in% c("overweight", "stunting", "wasting")) %>%
group_by(year, indicator) %>%
summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
# Create the time series plot
ggplot(average_data, aes(x = year, y = mean_value, color = indicator, group = indicator)) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(
title = "Prevalence of stunting, wasting and overweight in children under 5 years of age",
x = "Year",
y = "Prevalence (%)",
color = "Indicator"
) +
scale_color_manual(
values = c(
"overweight" = "#66c2a5",
"stunting" = "#fc8d62",
"wasting" = "#8da0cb"
)
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(ggplot2)
library(dplyr)
# Group by year and indicator, calculate the mean value
average_data <- malnutrition_data %>%
filter(indicator %in% c("lbw")) %>%
group_by(year, indicator) %>%
summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
# Create the time series plot
ggplot(average_data, aes(x = year, y = mean_value, color = indicator, group = indicator)) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(
title = "Prevalence of infants with low birth weight",
x = "Year",
y = "Prevalence (%)",
color = "Indicator"
) +
scale_color_manual(
values = c(
"lbw" = "#fc8d62"
)
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)
library(ggplot2)
library(dplyr)
# Filter and group by year, indicator, and disagg.value
adolescent_data <- malnutrition_data %>%
filter(indicator %in% c("adolescent_obesity", "adolescent_overweight", "adolescent_thinness")) %>%
group_by(year, indicator, disagg.value) %>%
summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
# Create the time series plot with facet wrap and separate curves for Boys and Girls
ggplot(adolescent_data, aes(x = year, y = mean_value, color = disagg.value, group = disagg.value)) +
geom_line(size = 1) +
geom_point(size = 2) +
facet_wrap(~indicator, scales = "free_y") +
labs(
title = "Prevalence of thinness, overweight and obesity in children and adolescents\naged 5–19 years",
x = "Year",
y = "Mean Percentage",
color = "Sex"
) +
scale_color_manual(
values = c(
"Boys" = "#66c2a5",
"Girls" = "#fc8d62"
)
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 0),
legend.position = "bottom"
)
library(ggplot2)
library(dplyr)
# Filter and group by year, indicator, and disagg.value
adolescent_data <- malnutrition_data %>%
filter(indicator %in% c(
"adult_diabetes",
"adult_obesity",
"adult_overweight"
)) %>%
group_by(year, indicator, disagg.value) %>%
summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
# Create the time series plot with facet wrap and separate curves for Boys and Girls
ggplot(adolescent_data, aes(x = year, y = mean_value, color = disagg.value, group = disagg.value)) +
geom_line(size = 1) +
geom_point(size = 2) +
facet_wrap(~indicator, scales = "free_y") +
labs(
title = "Prevalence of underweight, overweight and obesity in adults aged 18 years and over",
x = "Year",
y = "Mean Percentage",
color = "Sex"
) +
scale_color_manual(
values = c(
"Male" = "#66c2a5",
"Female" = "#fc8d62"
)
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
library(ggplot2)
library(dplyr)
# Group by year and indicator, calculate the mean value for adolescent indicators
adolescent_data <- malnutrition_data %>%
filter(indicator %in% c("adolescent_obesity", "adolescent_overweight", "adolescent_thinness")) %>%
group_by(year, indicator) %>%
summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
# Create the time series plot
ggplot(adolescent_data, aes(x = year, y = mean_value, color = indicator, group = indicator)) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(
title = "Time Series of Average Adolescent Malnutrition Indicators",
x = "Year",
y = "Mean Percentage",
color = "Indicator"
) +
scale_color_manual(
values = c(
"adolescent_obesity" = "#66c2a5",
"adolescent_overweight" = "#fc8d62",
"adolescent_thinness" = "#8da0cb"
)
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
library(ggplot2)
library(dplyr)
# Filter and group by year and disagg.value for "adult_anaemia"
anaemia_data_filtered <- malnutrition_data %>%
filter(indicator == "adult_anaemia") %>%
group_by(year, disagg.value) %>%
summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
# Create the time series plot for adult anaemia
ggplot(anaemia_data_filtered, aes(x = year, y = mean_value, color = disagg.value, group = disagg.value)) +
geom_line(size = 1) +
geom_point(size = 2) +
labs(
title = "Time Series of Adult Anaemia by Disaggregation",
x = "Year",
y = "Mean Percentage",
color = "Disaggregation"
) +
scale_color_viridis_d(name = "Disaggregation") +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "bottom"
)
library(ggplot2)
library(dplyr)
# Filter and group by year and disagg.value for "overweight" in 2018, excluding "Both"
blood_pressure_data_filtered <- malnutrition_data %>%
filter(indicator == "overweight", year == "2018", disagg.value != "Both") %>%
group_by(year, disagg.value) %>%
summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")
# Create a bar plot for "overweight" by disaggregation value
ggplot(blood_pressure_data_filtered, aes(x = disagg.value, y = mean_value, fill = disagg.value)) +
geom_bar(stat = "identity", alpha = 0.8) +
labs(
title = "Overweight by Disaggregation (2018)",
x = "Disaggregation",
y = "Mean Percentage",
fill = "Disaggregation"
) +
scale_fill_viridis_d() +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
legend.position = "none"
)
# Load necessary libraries
if (!require("ggplot2")) install.packages("ggplot2")
library(ggplot2)
library(dplyr)
# Read the CSV file
file_path <- "fruit_vegetable_sweets_age1_5.csv"
food_data <- read.csv(file_path)
glimpse(food_data)
## Rows: 51
## Columns: 5
## $ State <chr> "Alabama", "Alaska", "Arizona", "Arkansas", …
## $ Total <int> 339, 350, 315, 327, 342, 503, 368, 340, 388,…
## $ Fruit <chr> "39.3 (32.0–47.2)", "21.6 (15.5–29.3)", "30.…
## $ Vegetables <chr> "57.3 (49.6–64.8)", "50.0 (41.8–58.1)", "50.…
## $ Sugar.sweetened.beverages <chr> "66.5 (59.7–72.7)", "54.9 (46.7–62.7)", "59.…
library(dplyr)
# Clean and extract the mean, lower, and upper bounds
foods_data_clean <- food_data %>%
mutate(
# Extract mean
Fruit_Mean = as.numeric(sub(" \\(.*", "", Fruit)),
# Extract lower bound for both formats (dash or comma, ignoring †or other symbols)
Fruit_Lower = as.numeric(sub(".*\\(([^–,]+)[–,].*\\).*", "\\1", Fruit)),
# Extract upper bound for both formats (dash or comma, ignoring †or other symbols)
Fruit_Upper = as.numeric(sub(".*[–,]([^\\)]+)\\).*", "\\1", Fruit)),
# Repeat the process for Vegetables
Vegetables_Mean = as.numeric(sub(" \\(.*", "", Vegetables)),
Vegetables_Lower = as.numeric(sub(".*\\(([^–,]+)[–,].*\\).*", "\\1", Vegetables)),
Vegetables_Upper = as.numeric(sub(".*[–,]([^\\)]+)\\).*", "\\1", Vegetables)),
# Repeat for Sugar-Sweetened Beverages
Sugar_Sweetened_Beverages_Mean = as.numeric(sub(" \\(.*", "", `Sugar.sweetened.beverages`)),
Sugar_Sweetened_Beverages_Lower = as.numeric(sub(".*\\(([^–,]+)[–,].*\\).*", "\\1", `Sugar.sweetened.beverages`)),
Sugar_Sweetened_Beverages_Upper = as.numeric(sub(".*[–,]([^\\)]+)\\).*", "\\1", `Sugar.sweetened.beverages`))
)
# Verify the cleaned data
glimpse(foods_data_clean)
## Rows: 51
## Columns: 14
## $ State <chr> "Alabama", "Alaska", "Arizona", "Arkan…
## $ Total <int> 339, 350, 315, 327, 342, 503, 368, 340…
## $ Fruit <chr> "39.3 (32.0–47.2)", "21.6 (15.5–29.3)"…
## $ Vegetables <chr> "57.3 (49.6–64.8)", "50.0 (41.8–58.1)"…
## $ Sugar.sweetened.beverages <chr> "66.5 (59.7–72.7)", "54.9 (46.7–62.7)"…
## $ Fruit_Mean <dbl> 39.3, 21.6, 30.0, 36.3, 32.3, 25.6, 33…
## $ Fruit_Lower <dbl> 32.0, 15.5, 22.2, 28.5, 24.9, 20.3, 25…
## $ Fruit_Upper <dbl> 47.2, 29.3, 39.2, 44.8, 40.7, 31.8, 42…
## $ Vegetables_Mean <dbl> 57.3, 50.0, 50.6, 51.5, 50.5, 47.2, 48…
## $ Vegetables_Lower <dbl> 49.6, 41.8, 41.5, 42.9, 42.5, 40.7, 40…
## $ Vegetables_Upper <dbl> 64.8, 58.1, 59.7, 60.1, 58.4, 53.8, 56…
## $ Sugar_Sweetened_Beverages_Mean <dbl> 66.5, 54.9, 59.6, 66.1, 53.9, 56.1, 42…
## $ Sugar_Sweetened_Beverages_Lower <dbl> 59.7, 46.7, 50.4, 57.8, 46.0, 49.7, 34…
## $ Sugar_Sweetened_Beverages_Upper <dbl> 72.7, 62.7, 68.1, 73.6, 61.5, 62.2, 51…
# Merge the datasets
merged_data <- merge(poverty_by_state, food_insecurity_by_state, by = "state_abrev")
merged_data <- merge(merged_data, foods_data_clean, by.x = "states", by.y = "State")
glimpse(merged_data)
## Rows: 51
## Columns: 26
## $ states <chr> "Alabama", "Alaska", "A…
## $ state_abrev <chr> "AL", "AK", "AZ", "AR",…
## $ hunger_population <chr> "727", "74", "903", "47…
## $ margin_of_error <int> 77, 9, 78, 35, 198, 62,…
## $ percent <dbl> 14.6, 10.4, 12.4, 15.8,…
## $ percent_margin_of_error <dbl> 1.5, 1.3, 1.1, 1.2, 0.5…
## $ food_insecurity <dbl> 11.5, 10.4, 11.8, 18.9,…
## $ very_low_food_insecurity <dbl> 4.4, 5.3, 4.7, 6.7, 4.1…
## $ diabetes <dbl> 14.9, 8.3, 12.3, 10.6, …
## $ Obesity <dbl> 38.3, 32.1, 37.4, 33.2,…
## $ `Enough food, but not always the kinds wanted` <dbl> 0.8, 0.6, 0.7, 0.6, 0.5…
## $ `Sometimes not enough to eat` <dbl> 0.2, 0.3, 0.2, 0.2, 0.4…
## $ `Often not enough to eat` <dbl> 0.0, 0.1, 0.1, 0.2, 0.1…
## $ Total <int> 339, 350, 315, 327, 342…
## $ Fruit <chr> "39.3 (32.0–47.2)", "21…
## $ Vegetables <chr> "57.3 (49.6–64.8)", "50…
## $ Sugar.sweetened.beverages <chr> "66.5 (59.7–72.7)", "54…
## $ Fruit_Mean <dbl> 39.3, 21.6, 30.0, 36.3,…
## $ Fruit_Lower <dbl> 32.0, 15.5, 22.2, 28.5,…
## $ Fruit_Upper <dbl> 47.2, 29.3, 39.2, 44.8,…
## $ Vegetables_Mean <dbl> 57.3, 50.0, 50.6, 51.5,…
## $ Vegetables_Lower <dbl> 49.6, 41.8, 41.5, 42.9,…
## $ Vegetables_Upper <dbl> 64.8, 58.1, 59.7, 60.1,…
## $ Sugar_Sweetened_Beverages_Mean <dbl> 66.5, 54.9, 59.6, 66.1,…
## $ Sugar_Sweetened_Beverages_Lower <dbl> 59.7, 46.7, 50.4, 57.8,…
## $ Sugar_Sweetened_Beverages_Upper <dbl> 72.7, 62.7, 68.1, 73.6,…
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
# Ensure relevant columns are numeric
numeric_data <- merged_data %>%
select(food_insecurity, Obesity, diabetes, Vegetables_Mean, Fruit_Mean,
Sugar_Sweetened_Beverages_Mean, hunger_population) %>%
mutate_all(~ as.numeric(gsub("[^0-9.]", "", .))) # Convert to numeric safely
# Check for any remaining non-numeric data
print(sapply(numeric_data, class))
## food_insecurity Obesity
## "numeric" "numeric"
## diabetes Vegetables_Mean
## "numeric" "numeric"
## Fruit_Mean Sugar_Sweetened_Beverages_Mean
## "numeric" "numeric"
## hunger_population
## "numeric"
# Calculate correlation matrix
correlation_matrix <- cor(numeric_data, use = "complete.obs")
# Melt the correlation matrix for heatmap plotting
melted_correlation <- melt(correlation_matrix)
# Create the heatmap
ggplot(melted_correlation, aes(x = Var2, y = Var1, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(
low = "#d73027", mid = "#ffffbf", high = "#1a9850", midpoint = 0,
limit = c(-1, 1), space = "Lab", name = "Correlation"
) +
labs(
title = "Correlation Heatmap: Food Insecurity and Other Variables",
x = "Variables",
y = "Food Insecurity"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold")
)
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)
# Convert hunger_population to numeric
merged_data <- merged_data %>%
mutate(hunger_population = as.numeric(gsub("[^0-9]", "", hunger_population)))
# Prepare data for the heatmap
heatmap_data <- merged_data %>%
select(state_abrev, hunger_population, diabetes, Obesity,
Vegetables_Mean, Fruit_Mean, Sugar_Sweetened_Beverages_Mean) %>%
pivot_longer(
cols = -c(state_abrev, hunger_population), # Keep state_abrev and hunger_population fixed
names_to = "Variable",
values_to = "Value"
)
# Create the heatmap with hunger_population ordered by itself
# Increase plot height and adjust y-axis text for readability
ggplot(heatmap_data, aes(x = Variable, y = reorder(hunger_population, hunger_population), fill = Value)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Value") +
labs(
title = "Heatmap: Hunger Population vs Other Variables",
x = "Variables",
y = "Hunger Population (Ordered)"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(size = 8), # Increase y-axis text size
plot.title = element_text(hjust = 0.5, face = "bold"),
panel.spacing = unit(1, "lines") # Add spacing between elements
) +
coord_fixed(ratio = 0.07) # Adjust ratio to make the y-axis taller
# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)
# Convert hunger_population to numeric
merged_data <- merged_data %>%
mutate(hunger_population = as.numeric(gsub("[^0-9]", "", hunger_population)))
# Prepare data for the heatmap
heatmap_data <- merged_data %>%
select(state_abrev, hunger_population, diabetes, Obesity,
Vegetables_Mean, Fruit_Mean, Sugar_Sweetened_Beverages_Mean) %>%
pivot_longer(
cols = -c(state_abrev, hunger_population), # Keep state_abrev and hunger_population fixed
names_to = "Variable",
values_to = "Value"
) %>%
# Rename variables for better clarity
mutate(Variable = case_when(
Variable == "Vegetables_Mean" ~ "Less Vegetable Consumption",
Variable == "Fruit_Mean" ~ "Less Fruit Consumption",
Variable == "Sugar_Sweetened_Beverages_Mean" ~ "High Sugar Drink Consumption",
TRUE ~ Variable
))
# Sort x-axis variables based on the average value (lighter to darker)
variable_order <- heatmap_data %>%
group_by(Variable) %>%
summarize(Mean_Value = mean(Value, na.rm = TRUE)) %>%
arrange(Mean_Value) %>%
pull(Variable)
# Reorder Variable levels based on calculated order
heatmap_data$Variable <- factor(heatmap_data$Variable, levels = variable_order)
# Create the heatmap with hunger_population ordered by itself
ggplot(heatmap_data, aes(x = Variable, y = reorder(hunger_population, hunger_population), fill = Value)) +
geom_tile(color = "white") +
scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Value") +
labs(
title = "Impact of Hunger Population",
x = "Variables",
y = "Hunger Population"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
axis.text.y = element_text(size = 8), # Increase y-axis text size
plot.title = element_text(hjust = 0.5, face = "bold"),
panel.spacing = unit(1, "lines") # Add spacing between elements
) +
coord_fixed(ratio = 0.07) # Adjust ratio to make the y-axis taller
# Compute correlations and sort in ascending order
cor_data <- merged_data %>%
select(food_insecurity, hunger_population, diabetes, Obesity, Vegetables_Mean, Fruit_Mean, Sugar_Sweetened_Beverages_Mean) %>%
summarise(across(everything(), ~ cor(., food_insecurity, use = "complete.obs"))) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "Correlation") %>%
filter(Variable != "food_insecurity") %>% # Exclude food_insecurity
arrange(Correlation) %>% # Sort by ascending order
mutate(Variable = factor(Variable, levels = Variable)) # Reorder factor levels
# Plot heatmap with sorted correlation
ggplot(cor_data, aes(x = Variable, y = "Food Insecurity", fill = Correlation)) +
geom_tile(color = "white") +
geom_text(aes(label = round(Correlation, 2)), color = "white", size = 4) +
scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0, name = "Correlation") +
labs(
title = "Correlation Heatmap: Food Insecurity vs Other Variables",
x = "Variables",
y = ""
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold")
)
# Scatterplot: Food Insecurity vs Hunger Population with Color Encoding
ggplot(merged_data, aes(x = food_insecurity, y = hunger_population)) +
geom_point(aes(color = Obesity), size = 3, alpha = 0.7) +
scale_color_gradient(low = "lightgreen", high = "darkgreen", name = "Obesity Rate") +
labs(
title = "Scatterplot: Food Insecurity vs Hunger Population (Colored by Obesity)",
x = "Food Insecurity (%)",
y = "Hunger Population"
) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(face = "bold")
)
# Create a dual-axis heatmap/scatterplot with limited y-axis range
ggplot(merged_data, aes(x = state_abrev)) +
geom_col(aes(y = hunger_population), fill = "skyblue", alpha = 0.7) + # Hunger population
geom_line(aes(y = food_insecurity * 1000), color = "darkred", size = 1) + # Scaled food insecurity
scale_y_continuous(
name = "Hunger Population",
limits = c(0, 5000), # Limit y-axis between 0 and 5000
sec.axis = sec_axis(~./1000, name = "Food Insecurity (%)") # Secondary axis scaled
) +
labs(
title = "Hunger Population and Food Insecurity Across States",
x = "States",
y = "Hunger Population"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold")
)
## Warning: Removed 51 rows containing missing values or values outside the scale range
## (`geom_line()`).
# Create a bar plot with color intensity proportional to food insecurity
ggplot(merged_data, aes(x = state_abrev, y = hunger_population, fill = food_insecurity)) +
geom_col(alpha = 0.8) + # Bars for hunger population
scale_fill_gradient(
low = "lightblue",
high = "darkred",
name = "Food Insecurity (%)"
) +
scale_y_continuous(
limits = c(0, 5000), # Limit y-axis between 0 and 5000
name = "Hunger Population"
) +
labs(
title = "Hunger Population Across States with Food Insecurity Intensity",
x = "States",
y = "Hunger Population"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold")
)
# Add a region column based on state_abrev
merged_data <- merged_data %>%
mutate(region = case_when(
state_abrev %in% c("ME", "NH", "VT", "MA", "RI", "CT", "NY", "NJ", "PA") ~ "Northeast",
state_abrev %in% c("IL", "IN", "IA", "KS", "MI", "MN", "MO", "ND", "NE", "OH", "SD", "WI") ~ "Midwest",
state_abrev %in% c("DE", "FL", "GA", "MD", "NC", "SC", "VA", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX") ~ "South",
state_abrev %in% c("AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY", "AK", "CA", "HI", "OR", "WA") ~ "West",
TRUE ~ "Unknown"
)) %>%
filter(state_abrev != "DC") # Drop DC explicitly
# Create a bar plot faceted by region with food insecurity as color intensity
ggplot(merged_data, aes(x = state_abrev, y = hunger_population, fill = food_insecurity)) +
geom_col(alpha = 0.8) + # Bars for hunger population
scale_fill_gradient(
low = "lightblue",
high = "darkred",
name = "Food Insecurity (%)"
) +
scale_y_continuous(
limits = c(0, 5000), # Limit y-axis range
name = "Hunger Population"
) +
labs(
title = "Hunger Population Across States by Region with Food Insecurity Intensity",
x = "States",
y = "Hunger Population"
) +
facet_wrap(~region, scales = "free_x", nrow = 2) + # Facet by region
theme_minimal() +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, face = "bold", size = 8), # Adjust x-axis labels
strip.text = element_text(face = "bold", size = 10), # Bold facet titles
plot.title = element_text(hjust = 0.5, face = "bold"),
legend.position = "bottom", # Place legend at bottom
legend.title = element_text(face = "bold"),
legend.text = element_text(size = 8)
)
# Load libraries
library(ggplot2)
library(dplyr)
# Prepare data in long format for faceting
faceted_data <- merged_data %>%
select(food_insecurity, Obesity, diabetes, Fruit_Mean, Vegetables_Mean) %>%
pivot_longer(
cols = c(Obesity, diabetes, Fruit_Mean, Vegetables_Mean),
names_to = "variable",
values_to = "value"
)
# Custom facet labels
facet_labels <- c(
"Obesity" = "Obesity",
"diabetes" = "Diabetes",
"Fruit_Mean" = "Less Fruits",
"Vegetables_Mean" = "Less Veggies"
)
# Create faceted scatter plots with relabeled titles
ggplot(faceted_data, aes(x = food_insecurity, y = value)) +
geom_point(aes(color = variable), size = 2, alpha = 0.8) +
geom_smooth(method = "lm", se = TRUE, color = "black") +
facet_wrap(~ variable, scales = "free_y", ncol = 2, labeller = labeller(variable = facet_labels)) +
scale_color_manual(values = c(
"Obesity" = "darkred",
"diabetes" = "darkblue",
"Fruit_Mean" = "forestgreen",
"Vegetables_Mean" = "darkorange"
)) +
labs(
title = "Food Insecurity vs Health and Nutrition Indicators",
x = "Food Insecurity Rate (%)",
y = "Value",
color = "Variable"
) +
theme_minimal() +
theme(
strip.text = element_text(face = "bold", size = 10), # Title for each facet
legend.position = "none" # Remove legend for clarity
)
## `geom_smooth()` using formula = 'y ~ x'
This dot plot effectively visualizes the relationship between food insecurity and vegetable consumption rates by state:
Would you like further statistical analysis to quantify this relationship or additional visualizations for fruit or sugar-sweetened beverages?
# Dot plot with jitter, uniform dot size, and modified y-axis
ggplot(merged_data, aes(x = reorder(state_abrev, -food_insecurity), y = Vegetables_Mean)) +
geom_point(aes(color = food_insecurity), alpha = 0.7, size = 3, position = position_jitter(width = 0.15, height = 0)) +
geom_errorbar(aes(ymin = Vegetables_Lower, ymax = Vegetables_Upper), width = 0.1, color = "gray90") +
geom_text(aes(label = state_abrev), hjust = -0.3, size = 3, color = "black") +
coord_flip() +
scale_color_gradient(low = "lightblue", high = "darkred") +
labs(
title = "Statewise Food Insecurity and Vegetable Consumption",
x = "State",
y = "Less Than Daily\nVegetable Consumption (%)",
color = "Food Insecurity Rate (%)"
) +
theme_minimal() +
theme(
axis.text.y = element_blank(), # Remove y-axis tick labels
axis.ticks.y = element_blank(), # Remove y-axis tick marks
panel.grid.major.y = element_blank(), # Remove major grid lines on the y-axis
panel.grid.major.x = element_blank(), # Remove major grid lines on the x-axis
panel.grid.minor.x = element_blank() # Remove major grid lines on the x-axis
)