DATA 608 Story 6

Load the data

# Ensure necessary libraries are loaded
library(dplyr)

# Read the poverty data
poverty_by_state <- read.csv("poverty_by_state_usa.csv")

# Read the food insecurity data
food_insecurity_by_state <- read.csv("food_insecurity_by_state.csv")

# Merge the two datasets on state_abrev
merged_data <- merge(poverty_by_state, food_insecurity_by_state, by = "state_abrev")

# View the first few rows of the merged dataset
glimpse(food_insecurity_by_state)

## Rows: 52
## Columns: 8
## $ state_abrev                                  <chr> "U.S.", "AK", "AL", "AR",…
## $ food_insecurity                              <dbl> 12.2, 10.4, 11.5, 18.9, 1…
## $ very_low_food_insecurity                     <dbl> 4.7, 5.3, 4.4, 6.7, 4.7, …
## $ diabetes                                     <dbl> 11.6, 8.3, 14.9, 10.6, 12…
## $ Obesity                                      <dbl> 33.7, 32.1, 38.3, 33.2, 3…
## $ Enough.food..but.not.always.the.kinds.wanted <dbl> 0.6, 0.6, 0.8, 0.6, 0.7, …
## $ Sometimes.not.enough.to.eat                  <dbl> 0.3, 0.3, 0.2, 0.2, 0.2, …
## $ Often.not.enough.to.eat                      <dbl> 0.1, 0.1, 0.0, 0.2, 0.1, …

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tigris)  # For US state shapefiles

## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.

library(sf)      # For spatial data handling
library(readr)   # For reading CSV data

# Step 1: Load the data
file_path <- "food_insecurity_by_state.csv"  # Update with your file path

food_insecurity_by_state <- read_csv(file_path)

## Rows: 52 Columns: 8

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state_abrev
## dbl (7): food_insecurity, very_low_food_insecurity, diabetes, Obesity, Enoug...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Step 2: Get US States shapefile (excluding territories)
us_states <- states(cb = TRUE, resolution = "20m") %>%
  filter(!STATEFP %in% c("02", "15", "72"))  # Exclude Alaska, Hawaii, Puerto Rico

## Retrieving data for the year 2021

##   |                                                                              |                                                                      |   0%  |                                                                              |======                                                                |   8%  |                                                                              |============                                                          |  17%  |                                                                              |=============                                                         |  19%  |                                                                              |=========================                                             |  36%  |                                                                              |============================                                          |  40%  |                                                                              |==================================                                    |  49%  |                                                                              |========================================                              |  58%  |                                                                              |===========================================                           |  62%  |                                                                              |================================================                      |  69%  |                                                                              |======================================================                |  77%  |                                                                              |===============================================================       |  90%  |                                                                              |======================================================================| 100%

# Step 3: Clean and join the data
food_insecurity_by_state_clean <- food_insecurity_by_state %>%
  select(state_abrev, food_insecurity) %>%
  mutate(state_abrev = ifelse(state_abrev == "U.S.", "DC", state_abrev))  # Adjust U.S. to DC for matching

# Merge shapefile with the food insecurity data
us_states <- us_states %>%
  left_join(food_insecurity_by_state_clean, by = c("STUSPS" = "state_abrev"))

# Step 4: Plot the food insecurity map
ggplot(us_states) +
  geom_sf(aes(fill = food_insecurity), color = "white", size = 0.1) +
  scale_fill_gradient(
    name = "Food Insecurity %",
    low = "#f0f9e8", high = "#d73027", na.value = "grey90"
  ) +
  labs(
    title = "Food Insecurity Across U.S. States"#,
   # subtitle = "Percentage of Food Insecure Population",
   # caption = "Data Source: food_insecurity_by_state"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold", size = 16, hjust = 0.5),
    plot.subtitle = element_text(size = 12, hjust = 0.5),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    panel.grid = element_blank(),
    legend.position = "right"
  )

library(ggplot2)
library(dplyr)

# Ensure necessary columns exist
merged_data <- merged_data %>%
  rename(
    percent_poverty = percent,
    percent_food_insecurity = food_insecurity
  )

# Calculate the national averages
us_poverty_percent <- mean(merged_data$percent_poverty, na.rm = TRUE)
us_food_insecurity_percent <- mean(merged_data$percent_food_insecurity, na.rm = TRUE)

# Sort data by poverty percentage for the dot plot
state_data <- merged_data %>% arrange(desc(percent_poverty))

# Dot plot with poverty and food insecurity overlaid
dot_plot <- ggplot(state_data, aes(x = reorder(state_abrev, -percent_poverty))) +
  geom_point(aes(y = percent_poverty, color = "Poverty"), size = 3, alpha = 0.8) +
  geom_point(aes(y = percent_food_insecurity, color = "Food Insecurity"), size = 3, alpha = 0.8) +
  geom_hline(yintercept = us_poverty_percent, color = "red", linetype = "dashed", linewidth = 0.8) +
  geom_hline(yintercept = us_food_insecurity_percent, color = "blue", linetype = "dashed", linewidth = 0.8) +
  annotate("text", x = 5, y = us_poverty_percent + 0.5, label = "National Poverty Avg", color = "red", size = 4, hjust = 0) +
  annotate("text", x = 5, y = us_food_insecurity_percent + 0.5, label = "National Food Insecurity Avg", color = "blue", size = 4, hjust = 0) +
  labs(
    title = "Poverty and Food Insecurity by State",
    x = "State",
    y = "Percent",
    color = "Metric"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 0.5),
    panel.grid.major = element_line(color = "gray98"),
    panel.grid.minor = element_blank()
  )

# Scatter plot of poverty vs. food insecurity
scatter_plot <- ggplot(state_data, aes(x = percent_poverty, y = percent_food_insecurity)) +
  geom_point(color = "darkgreen", size = 3, alpha = 0.8) +
  geom_smooth(method = "lm", color = "blue", linetype = "dashed", se = FALSE) +
  labs(
    title = "Relationship Between Poverty and Food Insecurity",
    x = "Poverty Percentage",
    y = "Food Insecurity Percentage"
  ) +
  theme_minimal()

# Print the plots
dot_plot

scatter_plot

## `geom_smooth()` using formula = 'y ~ x'

# Load necessary libraries
library(tidyr)
library(dplyr)
library(ggplot2)
library(readr)

# Read the dataset
poverty_by_state <- read.csv("poverty_by_state_usa.csv")
poverty_by_age_group <- read.csv("poverty_by_age_group_2009_to_2023.csv")

# Filter out 'US' and calculate the average percent for the US
us_percent <- poverty_by_state %>% filter(state_abrev == "US") %>% pull(percent)
state_data <- poverty_by_state %>% filter(state_abrev != "US")

# Convert to long format and ensure all categories are handled correctly
poverty_long <- poverty_by_age_group %>%
  pivot_longer(
    cols = c("all_peoplePeople", "under_18", "year_18_to_64", "over_65", "male", "female"),
    names_to = "age_or_gender",
    values_to = "percent"
  ) %>%
  mutate(age_or_gender = case_when(
    age_or_gender == "all_peoplePeople" ~ "All People",
    age_or_gender == "under_18" ~ "Under 18",
    age_or_gender == "year_18_to_64" ~ "18 to 64 Years",
    age_or_gender == "over_65" ~ "Over 65",
    age_or_gender == "male" ~ "Male",
    age_or_gender == "female" ~ "Female",
    TRUE ~ age_or_gender
  ))

# Filter for 2023 only
poverty_2023 <- poverty_long %>%
  filter(year == 2023)

# Define custom color spectrum: Highlight "Under 18" and "Female", others in dark grey
group_colors <- c(
  "Under 18" = "#66c2a5",  # Green
  "Female" = "#e78ac3",    # Pink
  "18 to 64 Years" = "grey30", 
  "Over 65" = "grey30", 
  "All People" = "grey30", 
  "Male" = "grey30"
)

# Ensure the ordering of age_or_gender
poverty_2023 <- poverty_2023 %>%
  mutate(
    age_or_gender = factor(age_or_gender, levels = c("Under 18", "18 to 64 Years", "Over 65", "All People", "Male", "Female"))
  )

# Create a bar chart with highlights for Under 18 and Female
# Create a bar chart with bold annotations
bar_chart <- ggplot(poverty_2023, aes(x = age_or_gender, y = percent, fill = age_or_gender)) +
  geom_bar(stat = "identity", alpha = 0.8, show.legend = FALSE) +
  geom_text(aes(label = round(percent, 1)), 
            position = position_stack(vjust = 0.5), # Position at the center of the bar
            color = "white", size = 5, fontface = "bold") +  # Make annotations bold
  scale_fill_manual(values = group_colors) +  # Apply custom colors
  labs(
    title = "Poverty Percentage by Age Group and Gender (2023)",
    x = "",
    y = ""
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(face = "bold", angle = 45, hjust = 1),
    axis.ticks.x = element_blank(),  # Remove x-axis ticks
    panel.grid.major.x = element_blank(),  # Remove x-axis gridlines
    axis.text.y = element_blank(),  # Remove y-axis tick labels
    axis.ticks.y = element_blank()  # Remove y-axis tick marks
  )

# Display the bar chart
print(bar_chart)

# Create a time series plot with accessible colors and conditional line thickness
time_series <- ggplot(poverty_long, aes(x = year, y = percent, group = age_or_gender)) +
  geom_line(aes(
    color = age_or_gender,
    linewidth = ifelse(age_or_gender %in% c("Under 18", "Female"), 1.5, 0.8) # Correctly inside aes()
  )) +
  scale_color_manual(values = group_colors) +  # Apply custom colors
  scale_linewidth_identity() +                # Use linewidth values as is
  scale_x_continuous(
    breaks = seq(2009, max(poverty_long$year), by = 3),
    limits = c(2009, max(poverty_long$year))
  ) +
  labs(
    title = "Poverty Percentage Over Time by Age Group and Gender",
    x = "Year",
    y = "Percentage (%)",
    color = "Age Group / Gender"
  ) +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    axis.title.x = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold")
  )

# Display the time series plot
print(time_series)

# Load necessary libraries
library(readxl)
library(dplyr)
library(tidyr)

# Load the Excel file with multi-row headers
file_path <- "tableA1_pov_characteristics_1.xlsx" # Replace with your file path
data <- read_excel(file_path, col_names = FALSE)

## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...7`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...12`
## • `` -> `...13`

# Combine multi-level headers into a single row
headers <- data[1:2, ] %>%
  t() %>%
  as.data.frame() %>%
  unite("header", V1, V2, sep = " ", na.rm = TRUE)

# Handle empty headers
headers$header <- ifelse(headers$header == "", paste0("Empty_Column_", seq_along(headers$header)), headers$header)

# Assign combined headers as column names
colnames(data) <- headers$header

# Remove the header rows from the data
data <- data[-c(1, 2), ]

# Convert the dataset into a tidy format
tidy_data <- data %>%
  pivot_longer(
    cols = starts_with("20"), # Adjust if year columns start differently
    names_to = c("Year", "Metric"),
    names_sep = " ", # Separator to split year and metric
    values_to = "Value"
  )

# Save the tidy dataset to a CSV file
write.csv(tidy_data, "tidy_poverty_data.csv", row.names = FALSE)

# Print success message
cat("Tidy dataset saved as 'tidy_poverty_data.csv'")

## Tidy dataset saved as 'tidy_poverty_data.csv'

# Load necessary libraries
if (!require("ggplot2")) install.packages("ggplot2")
library(ggplot2)
library(dplyr)

# Read the CSV file
file_path <- "food_insecurity_indicators.csv"
food_data <- read.csv(file_path)

# Convert data to long format for ggplot
food_long <- food_data %>%
  pivot_longer(
    cols = c("Food.secure", "Low.food.security", "Very.low.food.security"),
    names_to = "Food.Security.Level",
    values_to = "Percentage"
  ) %>%
  arrange(Indicator, desc(Food.Security.Level))  # Ensure proper stacking order

# Calculate cumulative percentages for annotation positions
food_long <- food_long %>%
  group_by(Indicator) %>%
  mutate(
    Cumulative_Percentage = cumsum(Percentage),
    Annotation_Position = Cumulative_Percentage - (Percentage / 2)
  )

# Create Stacked Bar Chart with Annotations and remove y-axis ticks and gridlines
stacked_bar <- ggplot(food_long, aes(x = Indicator, y = Percentage, fill = Food.Security.Level)) +
  geom_bar(stat = "identity") +
  geom_text(
    aes(
      y = Annotation_Position, 
      label = ifelse(Percentage > 10, round(Percentage, 1), "")
    ),
    color = "black", size = 3
  ) +
  scale_fill_manual(values = c(
    "Food.secure" = "#66c2a5", 
    "Low.food.security" = "#fc8d62", 
    "Very.low.food.security" = "#8da0cb"
  )) +
  labs(
    title = "Food Insecurity Indicators by Food Security Level",
    x = "Indicator",
    y = "Percentage",
    fill = "Food Security Level"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.ticks.y = element_blank(),       # Remove y-axis tick marks
    panel.grid.major.y = element_blank(), # Remove major y-axis gridlines
    panel.grid.minor.y = element_blank(),  # Remove minor y-axis gridlines
    panel.grid.major.x = element_blank(), # Remove major y-axis gridlines
    panel.grid.minor.x = element_blank(),  # Remove minor y-axis gridlines
    legend.position = "bottom"  # Place legend at the bottom
  )

# Plot the chart
stacked_bar

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_bar()`).

## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_text()`).

# Load necessary libraries
library(ggplot2)
library(dplyr)

# Read the CSV file
file_path <- "food_assistance_by_age_group.csv"
snap_data <- read.csv(file_path)

# Clean and convert the columns to numeric
snap_data <- snap_data %>%
  mutate(
    Total_Number = as.numeric(gsub("[^0-9]", "", Total_Number)),
    SNAP_Number = as.numeric(gsub("[^0-9]", "", SNAP_Number))
  )

# Filter data for each group
snap_data_age <- snap_data[snap_data$group == "age", ]
snap_data_meal <- snap_data[snap_data$group == "free or reduced school meal", ]
snap_data_region <- snap_data[snap_data$group == "region", ]
snap_data_race <- snap_data[snap_data$group == "poverty status", ]

# Define order for each chart
age_order <- c("0 to 5 years", "6 to 11 years", "12 to 17 years", "18-24 years", "25-44 years", "45-64 years", "65 years and older")
meal_order <- c("5 to 11 years", "12 to 14 years", "15 to 18 years")
poverty_order <- c("< 50% Poverty", "< 100% Poverty", "< 150% Poverty")
region_order <- c("Midwest", "Northeast", "South", "West")

# Define colors for each chart
age_color <- "#66c2a5"       # Green for Age Group
meal_color <- "#fc8d62"      # Orange for Free or Reduced Meal
poverty_color <- "#8da0cb"   # Blue for Poverty Threshold
region_color <- "#a6d854"    # Greenish Yellow for Region

# ---- Plot for Age Group ----
snap_data_age$Age_Group <- factor(snap_data_age$Age_Group, levels = age_order)

# Add fade effect for the shortest bar
snap_data_age <- snap_data_age %>%
  mutate(alpha = ifelse(SNAP_Percent == min(SNAP_Percent), 0.3, 1))

plot_age <- ggplot(snap_data_age, aes(x = Age_Group, y = SNAP_Percent, fill = Age_Group, alpha = alpha)) +
  geom_bar(stat = "identity", fill = age_color, show.legend = FALSE) +
  scale_alpha(range = c(0.3, 1)) +
  labs(x = "Age Group", y = "SNAP Percent (%)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"))

# ---- Plot for Free or Reduced Meal ----
snap_data_meal$Age_Group <- factor(snap_data_meal$Age_Group, levels = meal_order)

# Add fade effect for the shortest bar
snap_data_meal <- snap_data_meal %>%
  mutate(alpha = ifelse(SNAP_Percent == min(SNAP_Percent), 0.3, 1))

plot_meal <- ggplot(snap_data_meal, aes(x = Age_Group, y = SNAP_Percent, fill = Age_Group, alpha = alpha)) +
  geom_bar(stat = "identity", fill = meal_color, show.legend = FALSE) +
  scale_alpha(range = c(0.3, 1)) +
  labs(x = "Free or Reduced Meal", y = "SNAP Percent (%)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"))

# ---- Plot for Poverty Status ----
snap_data_race$Age_Group <- recode(snap_data_race$Age_Group,
  "Less than 100% of poverty threshold" = "< 100% Poverty",
  "Less than 150% of poverty threshold" = "< 150% Poverty",
  "Less than 50% of poverty threshold"  = "< 50% Poverty"
)
snap_data_race$Age_Group <- factor(snap_data_race$Age_Group, levels = poverty_order)

# Add fade effect for the shortest bar
snap_data_race <- snap_data_race %>%
  mutate(alpha = ifelse(SNAP_Percent == min(SNAP_Percent), 0.3, 1))

plot_race <- ggplot(snap_data_race, aes(x = Age_Group, y = SNAP_Percent, fill = Age_Group, alpha = alpha)) +
  geom_bar(stat = "identity", fill = poverty_color, show.legend = FALSE) +
  scale_alpha(range = c(0.3, 1)) +
  labs(x = "Poverty Threshold (%)", y = "SNAP Percent (%)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"))

# ---- Plot for Region ----
snap_data_region$Age_Group <- factor(snap_data_region$Age_Group, levels = region_order)

# Add fade effect for the shortest bar
snap_data_region <- snap_data_region %>%
  mutate(alpha = ifelse(SNAP_Percent == min(SNAP_Percent), 0.3, 1))

plot_region <- ggplot(snap_data_region, aes(x = Age_Group, y = SNAP_Percent, fill = Age_Group, alpha = alpha)) +
  geom_bar(stat = "identity", fill = region_color, show.legend = FALSE) +
  scale_alpha(range = c(0.3, 1)) +
  labs(x = "Region", y = "SNAP Percent (%)") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"))

# ---- Display All Plots ----
print(plot_age)

print(plot_meal)

print(plot_race)

print(plot_region)

# Load necessary library
library(ggplot2)
library(dplyr)

# Read the CSV file
file_path <- "tidy_burden_of_malnutrition.csv"
malnutrition_data <- read.csv(file_path)


# Ungroup and list unique values with explicit expansion
categorical_variables_expanded <- malnutrition_data %>%
  select(where(is.character)) %>%
  summarise(across(everything(), ~ paste(unique(.), collapse = ", "))) %>%
  pivot_longer(cols = everything(), names_to = "Variable", values_to = "Unique_Values")

# Print the unique values
categorical_variables_expanded

## # A tibble: 3 × 2
##   Variable       Unique_Values                                                  
##   <chr>          <chr>                                                          
## 1 indicator      adolescent_obesity, adolescent_overweight, adolescent_thinness…
## 2 disaggregation sex, pregnancy, all, age, Cause                                
## 3 disagg.value   Boys, Girls, All women, Non-pregnant women, Pregnant women, Fe…

library(ggplot2)
library(dplyr)

# Group by year and indicator, calculate the mean value
average_data <- malnutrition_data %>%
  filter(indicator %in% c("overweight", "stunting", "wasting")) %>%
  group_by(year, indicator) %>%
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")

# Create the time series plot
ggplot(average_data, aes(x = year, y = mean_value, color = indicator, group = indicator)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(
    title = "Prevalence of stunting, wasting and overweight in children under 5 years of age",
    x = "Year",
    y = "Prevalence (%)",
    color = "Indicator"
  ) +
  scale_color_manual(
    values = c(
      "overweight" = "#66c2a5",
      "stunting" = "#fc8d62",
      "wasting" = "#8da0cb"
    )
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

library(ggplot2)
library(dplyr)

# Group by year and indicator, calculate the mean value
average_data <- malnutrition_data %>%
  filter(indicator %in% c("lbw")) %>%
  group_by(year, indicator) %>%
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")

# Create the time series plot
ggplot(average_data, aes(x = year, y = mean_value, color = indicator, group = indicator)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(
    title = "Prevalence of infants with low birth weight",
    x = "Year",
    y = "Prevalence (%)",
    color = "Indicator"
  ) +
  scale_color_manual(
    values = c(
      "lbw" = "#fc8d62"
    )
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none"
  )

library(ggplot2)
library(dplyr)

# Filter and group by year, indicator, and disagg.value
adolescent_data <- malnutrition_data %>%
  filter(indicator %in% c("adolescent_obesity", "adolescent_overweight", "adolescent_thinness")) %>%
  group_by(year, indicator, disagg.value) %>%
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")

# Create the time series plot with facet wrap and separate curves for Boys and Girls
ggplot(adolescent_data, aes(x = year, y = mean_value, color = disagg.value, group = disagg.value)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  facet_wrap(~indicator, scales = "free_y") +
  labs(
    title = "Prevalence of thinness, overweight and obesity in children and adolescents\naged 5–19 years",
    x = "Year",
    y = "Mean Percentage",
    color = "Sex"
  ) +
  scale_color_manual(
    values = c(
      "Boys" = "#66c2a5",
      "Girls" = "#fc8d62"
    )
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 0),
    legend.position = "bottom"
  )

library(ggplot2)
library(dplyr)

# Filter and group by year, indicator, and disagg.value
adolescent_data <- malnutrition_data %>%
  filter(indicator %in% c(
     "adult_diabetes",
    "adult_obesity", 
    "adult_overweight"
  )) %>%
  group_by(year, indicator, disagg.value) %>%
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")

# Create the time series plot with facet wrap and separate curves for Boys and Girls
ggplot(adolescent_data, aes(x = year, y = mean_value, color = disagg.value, group = disagg.value)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  facet_wrap(~indicator, scales = "free_y") +
  labs(
    title = "Prevalence of underweight, overweight and obesity in adults aged 18 years and over",
    x = "Year",
    y = "Mean Percentage",
    color = "Sex"
  ) +
  scale_color_manual(
    values = c(
      "Male" = "#66c2a5",
      "Female" = "#fc8d62"
    )
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )

library(ggplot2)
library(dplyr)

# Group by year and indicator, calculate the mean value for adolescent indicators
adolescent_data <- malnutrition_data %>%
  filter(indicator %in% c("adolescent_obesity", "adolescent_overweight", "adolescent_thinness")) %>%
  group_by(year, indicator) %>%
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")

# Create the time series plot
ggplot(adolescent_data, aes(x = year, y = mean_value, color = indicator, group = indicator)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(
    title = "Time Series of Average Adolescent Malnutrition Indicators",
    x = "Year",
    y = "Mean Percentage",
    color = "Indicator"
  ) +
  scale_color_manual(
    values = c(
      "adolescent_obesity" = "#66c2a5",
      "adolescent_overweight" = "#fc8d62",
      "adolescent_thinness" = "#8da0cb"
    )
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )

library(ggplot2)
library(dplyr)

# Filter and group by year and disagg.value for "adult_anaemia"
anaemia_data_filtered <- malnutrition_data %>%
  filter(indicator == "adult_anaemia") %>%
  group_by(year, disagg.value) %>%
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")

# Create the time series plot for adult anaemia
ggplot(anaemia_data_filtered, aes(x = year, y = mean_value, color = disagg.value, group = disagg.value)) +
  geom_line(size = 1) +
  geom_point(size = 2) +
  labs(
    title = "Time Series of Adult Anaemia by Disaggregation",
    x = "Year",
    y = "Mean Percentage",
    color = "Disaggregation"
  ) +
  scale_color_viridis_d(name = "Disaggregation") +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "bottom"
  )

library(ggplot2)
library(dplyr)

# Filter and group by year and disagg.value for "overweight" in 2018, excluding "Both"
blood_pressure_data_filtered <- malnutrition_data %>%
  filter(indicator == "overweight", year == "2018", disagg.value != "Both") %>%
  group_by(year, disagg.value) %>%
  summarise(mean_value = mean(value, na.rm = TRUE), .groups = "drop")

# Create a bar plot for "overweight" by disaggregation value
ggplot(blood_pressure_data_filtered, aes(x = disagg.value, y = mean_value, fill = disagg.value)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  labs(
    title = "Overweight by Disaggregation (2018)",
    x = "Disaggregation",
    y = "Mean Percentage",
    fill = "Disaggregation"
  ) +
  scale_fill_viridis_d() +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none"
  )

# Load necessary libraries
if (!require("ggplot2")) install.packages("ggplot2")
library(ggplot2)
library(dplyr)

# Read the CSV file
file_path <- "fruit_vegetable_sweets_age1_5.csv"
food_data <- read.csv(file_path)

glimpse(food_data)

## Rows: 51
## Columns: 5
## $ State                     <chr> "Alabama", "Alaska", "Arizona", "Arkansas", …
## $ Total                     <int> 339, 350, 315, 327, 342, 503, 368, 340, 388,…
## $ Fruit                     <chr> "39.3 (32.0–47.2)", "21.6 (15.5–29.3)", "30.…
## $ Vegetables                <chr> "57.3 (49.6–64.8)", "50.0 (41.8–58.1)", "50.…
## $ Sugar.sweetened.beverages <chr> "66.5 (59.7–72.7)", "54.9 (46.7–62.7)", "59.…

library(dplyr)

# Clean and extract the mean, lower, and upper bounds
foods_data_clean <- food_data %>%
  mutate(
    # Extract mean
    Fruit_Mean = as.numeric(sub(" \\(.*", "", Fruit)),
    
    # Extract lower bound for both formats (dash or comma, ignoring † or other symbols)
    Fruit_Lower = as.numeric(sub(".*\\(([^–,]+)[–,].*\\).*", "\\1", Fruit)),
    
    # Extract upper bound for both formats (dash or comma, ignoring † or other symbols)
    Fruit_Upper = as.numeric(sub(".*[–,]([^\\)]+)\\).*", "\\1", Fruit)),

    # Repeat the process for Vegetables
    Vegetables_Mean = as.numeric(sub(" \\(.*", "", Vegetables)),
    Vegetables_Lower = as.numeric(sub(".*\\(([^–,]+)[–,].*\\).*", "\\1", Vegetables)),
    Vegetables_Upper = as.numeric(sub(".*[–,]([^\\)]+)\\).*", "\\1", Vegetables)),

    # Repeat for Sugar-Sweetened Beverages
    Sugar_Sweetened_Beverages_Mean = as.numeric(sub(" \\(.*", "", `Sugar.sweetened.beverages`)),
    Sugar_Sweetened_Beverages_Lower = as.numeric(sub(".*\\(([^–,]+)[–,].*\\).*", "\\1", `Sugar.sweetened.beverages`)),
    Sugar_Sweetened_Beverages_Upper = as.numeric(sub(".*[–,]([^\\)]+)\\).*", "\\1", `Sugar.sweetened.beverages`))
  )

# Verify the cleaned data
glimpse(foods_data_clean)

## Rows: 51
## Columns: 14
## $ State                           <chr> "Alabama", "Alaska", "Arizona", "Arkan…
## $ Total                           <int> 339, 350, 315, 327, 342, 503, 368, 340…
## $ Fruit                           <chr> "39.3 (32.0–47.2)", "21.6 (15.5–29.3)"…
## $ Vegetables                      <chr> "57.3 (49.6–64.8)", "50.0 (41.8–58.1)"…
## $ Sugar.sweetened.beverages       <chr> "66.5 (59.7–72.7)", "54.9 (46.7–62.7)"…
## $ Fruit_Mean                      <dbl> 39.3, 21.6, 30.0, 36.3, 32.3, 25.6, 33…
## $ Fruit_Lower                     <dbl> 32.0, 15.5, 22.2, 28.5, 24.9, 20.3, 25…
## $ Fruit_Upper                     <dbl> 47.2, 29.3, 39.2, 44.8, 40.7, 31.8, 42…
## $ Vegetables_Mean                 <dbl> 57.3, 50.0, 50.6, 51.5, 50.5, 47.2, 48…
## $ Vegetables_Lower                <dbl> 49.6, 41.8, 41.5, 42.9, 42.5, 40.7, 40…
## $ Vegetables_Upper                <dbl> 64.8, 58.1, 59.7, 60.1, 58.4, 53.8, 56…
## $ Sugar_Sweetened_Beverages_Mean  <dbl> 66.5, 54.9, 59.6, 66.1, 53.9, 56.1, 42…
## $ Sugar_Sweetened_Beverages_Lower <dbl> 59.7, 46.7, 50.4, 57.8, 46.0, 49.7, 34…
## $ Sugar_Sweetened_Beverages_Upper <dbl> 72.7, 62.7, 68.1, 73.6, 61.5, 62.2, 51…

# Merge the datasets
merged_data <- merge(poverty_by_state, food_insecurity_by_state, by = "state_abrev")
merged_data <- merge(merged_data, foods_data_clean, by.x = "states", by.y = "State")

glimpse(merged_data)

## Rows: 51
## Columns: 26
## $ states                                         <chr> "Alabama", "Alaska", "A…
## $ state_abrev                                    <chr> "AL", "AK", "AZ", "AR",…
## $ hunger_population                              <chr> "727", "74", "903", "47…
## $ margin_of_error                                <int> 77, 9, 78, 35, 198, 62,…
## $ percent                                        <dbl> 14.6, 10.4, 12.4, 15.8,…
## $ percent_margin_of_error                        <dbl> 1.5, 1.3, 1.1, 1.2, 0.5…
## $ food_insecurity                                <dbl> 11.5, 10.4, 11.8, 18.9,…
## $ very_low_food_insecurity                       <dbl> 4.4, 5.3, 4.7, 6.7, 4.1…
## $ diabetes                                       <dbl> 14.9, 8.3, 12.3, 10.6, …
## $ Obesity                                        <dbl> 38.3, 32.1, 37.4, 33.2,…
## $ `Enough food, but not always the kinds wanted` <dbl> 0.8, 0.6, 0.7, 0.6, 0.5…
## $ `Sometimes not enough to eat`                  <dbl> 0.2, 0.3, 0.2, 0.2, 0.4…
## $ `Often not enough to eat`                      <dbl> 0.0, 0.1, 0.1, 0.2, 0.1…
## $ Total                                          <int> 339, 350, 315, 327, 342…
## $ Fruit                                          <chr> "39.3 (32.0–47.2)", "21…
## $ Vegetables                                     <chr> "57.3 (49.6–64.8)", "50…
## $ Sugar.sweetened.beverages                      <chr> "66.5 (59.7–72.7)", "54…
## $ Fruit_Mean                                     <dbl> 39.3, 21.6, 30.0, 36.3,…
## $ Fruit_Lower                                    <dbl> 32.0, 15.5, 22.2, 28.5,…
## $ Fruit_Upper                                    <dbl> 47.2, 29.3, 39.2, 44.8,…
## $ Vegetables_Mean                                <dbl> 57.3, 50.0, 50.6, 51.5,…
## $ Vegetables_Lower                               <dbl> 49.6, 41.8, 41.5, 42.9,…
## $ Vegetables_Upper                               <dbl> 64.8, 58.1, 59.7, 60.1,…
## $ Sugar_Sweetened_Beverages_Mean                 <dbl> 66.5, 54.9, 59.6, 66.1,…
## $ Sugar_Sweetened_Beverages_Lower                <dbl> 59.7, 46.7, 50.4, 57.8,…
## $ Sugar_Sweetened_Beverages_Upper                <dbl> 72.7, 62.7, 68.1, 73.6,…

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

# Ensure relevant columns are numeric
numeric_data <- merged_data %>%
  select(food_insecurity, Obesity, diabetes, Vegetables_Mean, Fruit_Mean, 
         Sugar_Sweetened_Beverages_Mean, hunger_population) %>%
  mutate_all(~ as.numeric(gsub("[^0-9.]", "", .)))  # Convert to numeric safely

# Check for any remaining non-numeric data
print(sapply(numeric_data, class))

##                food_insecurity                        Obesity 
##                      "numeric"                      "numeric" 
##                       diabetes                Vegetables_Mean 
##                      "numeric"                      "numeric" 
##                     Fruit_Mean Sugar_Sweetened_Beverages_Mean 
##                      "numeric"                      "numeric" 
##              hunger_population 
##                      "numeric"

# Calculate correlation matrix
correlation_matrix <- cor(numeric_data, use = "complete.obs")

# Melt the correlation matrix for heatmap plotting
melted_correlation <- melt(correlation_matrix)

# Create the heatmap
ggplot(melted_correlation, aes(x = Var2, y = Var1, fill = value)) +
  geom_tile(color = "white") +
  scale_fill_gradient2(
    low = "#d73027", mid = "#ffffbf", high = "#1a9850", midpoint = 0, 
    limit = c(-1, 1), space = "Lab", name = "Correlation"
  ) +
  labs(
    title = "Correlation Heatmap: Food Insecurity and Other Variables",
    x = "Variables",
    y = "Food Insecurity"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
    axis.text.y = element_text(face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)

# Convert hunger_population to numeric
merged_data <- merged_data %>%
  mutate(hunger_population = as.numeric(gsub("[^0-9]", "", hunger_population)))

# Prepare data for the heatmap
heatmap_data <- merged_data %>%
  select(state_abrev, hunger_population, diabetes, Obesity, 
         Vegetables_Mean, Fruit_Mean, Sugar_Sweetened_Beverages_Mean) %>%
  pivot_longer(
    cols = -c(state_abrev, hunger_population),  # Keep state_abrev and hunger_population fixed
    names_to = "Variable",
    values_to = "Value"
  )

# Create the heatmap with hunger_population ordered by itself
# Increase plot height and adjust y-axis text for readability
ggplot(heatmap_data, aes(x = Variable, y = reorder(hunger_population, hunger_population), fill = Value)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Value") +
  labs(
    title = "Heatmap: Hunger Population vs Other Variables",
    x = "Variables",
    y = "Hunger Population (Ordered)"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
    axis.text.y = element_text(size = 8), # Increase y-axis text size
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.spacing = unit(1, "lines") # Add spacing between elements
  ) +
  coord_fixed(ratio = 0.07) # Adjust ratio to make the y-axis taller

# Load necessary libraries
library(ggplot2)
library(dplyr)
library(tidyr)

# Convert hunger_population to numeric
merged_data <- merged_data %>%
  mutate(hunger_population = as.numeric(gsub("[^0-9]", "", hunger_population)))

# Prepare data for the heatmap
heatmap_data <- merged_data %>%
  select(state_abrev, hunger_population, diabetes, Obesity, 
         Vegetables_Mean, Fruit_Mean, Sugar_Sweetened_Beverages_Mean) %>%
  pivot_longer(
    cols = -c(state_abrev, hunger_population),  # Keep state_abrev and hunger_population fixed
    names_to = "Variable",
    values_to = "Value"
  ) %>%
  # Rename variables for better clarity
  mutate(Variable = case_when(
    Variable == "Vegetables_Mean" ~ "Less Vegetable Consumption",
    Variable == "Fruit_Mean" ~ "Less Fruit Consumption",
    Variable == "Sugar_Sweetened_Beverages_Mean" ~ "High Sugar Drink Consumption",
    TRUE ~ Variable
  ))

# Sort x-axis variables based on the average value (lighter to darker)
variable_order <- heatmap_data %>%
  group_by(Variable) %>%
  summarize(Mean_Value = mean(Value, na.rm = TRUE)) %>%
  arrange(Mean_Value) %>%
  pull(Variable)

# Reorder Variable levels based on calculated order
heatmap_data$Variable <- factor(heatmap_data$Variable, levels = variable_order)

# Create the heatmap with hunger_population ordered by itself
ggplot(heatmap_data, aes(x = Variable, y = reorder(hunger_population, hunger_population), fill = Value)) +
  geom_tile(color = "white") +
  scale_fill_gradient(low = "lightblue", high = "darkblue", name = "Value") +
  labs(
    title = "Impact of Hunger Population",
    x = "Variables",
    y = "Hunger Population"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
    axis.text.y = element_text(size = 8), # Increase y-axis text size
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.spacing = unit(1, "lines") # Add spacing between elements
  ) +
  coord_fixed(ratio = 0.07) # Adjust ratio to make the y-axis taller

# Compute correlations and sort in ascending order
cor_data <- merged_data %>%
  select(food_insecurity, hunger_population, diabetes, Obesity, Vegetables_Mean, Fruit_Mean, Sugar_Sweetened_Beverages_Mean) %>%
  summarise(across(everything(), ~ cor(., food_insecurity, use = "complete.obs"))) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Correlation") %>%
  filter(Variable != "food_insecurity") %>% # Exclude food_insecurity
  arrange(Correlation) %>% # Sort by ascending order
  mutate(Variable = factor(Variable, levels = Variable)) # Reorder factor levels

# Plot heatmap with sorted correlation
ggplot(cor_data, aes(x = Variable, y = "Food Insecurity", fill = Correlation)) +
  geom_tile(color = "white") +
  geom_text(aes(label = round(Correlation, 2)), color = "white", size = 4) +
  scale_fill_gradient2(low = "red", high = "blue", mid = "white", midpoint = 0, name = "Correlation") +
  labs(
    title = "Correlation Heatmap: Food Insecurity vs Other Variables",
    x = "Variables",
    y = ""
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

# Scatterplot: Food Insecurity vs Hunger Population with Color Encoding
ggplot(merged_data, aes(x = food_insecurity, y = hunger_population)) +
  geom_point(aes(color = Obesity), size = 3, alpha = 0.7) +
  scale_color_gradient(low = "lightgreen", high = "darkgreen", name = "Obesity Rate") +
  labs(
    title = "Scatterplot: Food Insecurity vs Hunger Population (Colored by Obesity)",
    x = "Food Insecurity (%)",
    y = "Hunger Population"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, face = "bold"),
    axis.title = element_text(face = "bold")
  )

# Create a dual-axis heatmap/scatterplot with limited y-axis range
ggplot(merged_data, aes(x = state_abrev)) +
  geom_col(aes(y = hunger_population), fill = "skyblue", alpha = 0.7) + # Hunger population
  geom_line(aes(y = food_insecurity * 1000), color = "darkred", size = 1) + # Scaled food insecurity
  scale_y_continuous(
    name = "Hunger Population",
    limits = c(0, 5000), # Limit y-axis between 0 and 5000
    sec.axis = sec_axis(~./1000, name = "Food Insecurity (%)") # Secondary axis scaled
  ) +
  labs(
    title = "Hunger Population and Food Insecurity Across States",
    x = "States",
    y = "Hunger Population"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

## Warning: Removed 51 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Create a bar plot with color intensity proportional to food insecurity
ggplot(merged_data, aes(x = state_abrev, y = hunger_population, fill = food_insecurity)) +
  geom_col(alpha = 0.8) + # Bars for hunger population
  scale_fill_gradient(
    low = "lightblue", 
    high = "darkred", 
    name = "Food Insecurity (%)"
  ) +
  scale_y_continuous(
    limits = c(0, 5000), # Limit y-axis between 0 and 5000
    name = "Hunger Population"
  ) +
  labs(
    title = "Hunger Population Across States with Food Insecurity Intensity",
    x = "States",
    y = "Hunger Population"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, face = "bold"),
    plot.title = element_text(hjust = 0.5, face = "bold")
  )

# Add a region column based on state_abrev
merged_data <- merged_data %>%
  mutate(region = case_when(
    state_abrev %in% c("ME", "NH", "VT", "MA", "RI", "CT", "NY", "NJ", "PA") ~ "Northeast",
    state_abrev %in% c("IL", "IN", "IA", "KS", "MI", "MN", "MO", "ND", "NE", "OH", "SD", "WI") ~ "Midwest",
    state_abrev %in% c("DE", "FL", "GA", "MD", "NC", "SC", "VA", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX") ~ "South",
    state_abrev %in% c("AZ", "CO", "ID", "MT", "NV", "NM", "UT", "WY", "AK", "CA", "HI", "OR", "WA") ~ "West",
    TRUE ~ "Unknown"
  )) %>%
  filter(state_abrev != "DC")  # Drop DC explicitly

# Create a bar plot faceted by region with food insecurity as color intensity
ggplot(merged_data, aes(x = state_abrev, y = hunger_population, fill = food_insecurity)) +
  geom_col(alpha = 0.8) + # Bars for hunger population
  scale_fill_gradient(
    low = "lightblue", 
    high = "darkred", 
    name = "Food Insecurity (%)"
  ) +
  scale_y_continuous(
    limits = c(0, 5000), # Limit y-axis range
    name = "Hunger Population"
  ) +
  labs(
    title = "Hunger Population Across States by Region with Food Insecurity Intensity",
    x = "States",
    y = "Hunger Population"
  ) +
  facet_wrap(~region, scales = "free_x", nrow = 2) + # Facet by region
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, face = "bold", size = 8), # Adjust x-axis labels
    strip.text = element_text(face = "bold", size = 10), # Bold facet titles
    plot.title = element_text(hjust = 0.5, face = "bold"),
    legend.position = "bottom", # Place legend at bottom
    legend.title = element_text(face = "bold"),
    legend.text = element_text(size = 8)
  )

# Load libraries
library(ggplot2)
library(dplyr)

# Prepare data in long format for faceting
faceted_data <- merged_data %>%
  select(food_insecurity, Obesity, diabetes, Fruit_Mean, Vegetables_Mean) %>%
  pivot_longer(
    cols = c(Obesity, diabetes, Fruit_Mean, Vegetables_Mean),
    names_to = "variable",
    values_to = "value"
  )

# Custom facet labels
facet_labels <- c(
  "Obesity" = "Obesity",
  "diabetes" = "Diabetes",
  "Fruit_Mean" = "Less Fruits",
  "Vegetables_Mean" = "Less Veggies"
)

# Create faceted scatter plots with relabeled titles
ggplot(faceted_data, aes(x = food_insecurity, y = value)) +
  geom_point(aes(color = variable), size = 2, alpha = 0.8) +
  geom_smooth(method = "lm", se = TRUE, color = "black") +
  facet_wrap(~ variable, scales = "free_y", ncol = 2, labeller = labeller(variable = facet_labels)) +
  scale_color_manual(values = c(
    "Obesity" = "darkred",
    "diabetes" = "darkblue",
    "Fruit_Mean" = "forestgreen",
    "Vegetables_Mean" = "darkorange"
  )) +
  labs(
    title = "Food Insecurity vs Health and Nutrition Indicators",
    x = "Food Insecurity Rate (%)",
    y = "Value",
    color = "Variable"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 10),  # Title for each facet
    legend.position = "none"  # Remove legend for clarity
  )

## `geom_smooth()` using formula = 'y ~ x'

Relationship between food insecurity and vegetable consumption rates by state

This dot plot effectively visualizes the relationship between food insecurity and vegetable consumption rates by state:

Key Observations:

Descending Food Insecurity Order:
- States with higher food insecurity rates (e.g., Louisiana, Mississippi) are at the bottom, while states with lower food insecurity rates (e.g., Vermont, New Hampshire) are at the top.
Vegetable Consumption Trend:
- States with higher food insecurity tend to have higher percentages of children consuming vegetables less than daily. This indicates a potential inverse relationship between food security and adequate vegetable consumption.
Confidence Intervals:
- Error bars show the confidence intervals for vegetable consumption rates. States with wider intervals (e.g., Louisiana, Maine) have more variability in their reported data.
Homogeneity in Dots:
- Uniform dot size ensures focus remains on the position and color of the dots, highlighting food insecurity levels.
Color Gradient:
- A clear color gradient from light blue to dark red aligns with increasing food insecurity rates, making it easy to identify states with higher issues of food insecurity.

Insights:

States with higher food insecurity rates may benefit from targeted interventions to improve access to fresh vegetables for children.
There seems to be a geographic pattern where certain regions (e.g., Southern states) exhibit higher food insecurity and poorer vegetable consumption habits.

Would you like further statistical analysis to quantify this relationship or additional visualizations for fruit or sugar-sweetened beverages?

# Dot plot with jitter, uniform dot size, and modified y-axis
ggplot(merged_data, aes(x = reorder(state_abrev, -food_insecurity), y = Vegetables_Mean)) +
  geom_point(aes(color = food_insecurity), alpha = 0.7, size = 3, position = position_jitter(width = 0.15, height = 0)) +
  geom_errorbar(aes(ymin = Vegetables_Lower, ymax = Vegetables_Upper), width = 0.1, color = "gray90") +
  geom_text(aes(label = state_abrev), hjust = -0.3, size = 3, color = "black") +
  coord_flip() +
  scale_color_gradient(low = "lightblue", high = "darkred") +
  labs(
    title = "Statewise Food Insecurity and Vegetable Consumption",
    x = "State",
    y = "Less Than Daily\nVegetable Consumption (%)",
    color = "Food Insecurity Rate (%)"
  ) +
  theme_minimal() +
  theme(
    axis.text.y = element_blank(),       # Remove y-axis tick labels
    axis.ticks.y = element_blank(),      # Remove y-axis tick marks
    panel.grid.major.y = element_blank(), # Remove major grid lines on the y-axis
    panel.grid.major.x = element_blank(), # Remove major grid lines on the x-axis
    panel.grid.minor.x = element_blank() # Remove major grid lines on the x-axis


  )

DATA 608 Story 6

Fomba Kassoh

2024-12-16

Load the data

Relationship between food insecurity and vegetable consumption rates by state

Key Observations:

Insights: