Project2 Nutrition And Physical Risk Factors

Author

Mamokotjo Letjama

Introduction

Load Packages

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.2     ✔ tibble    3.3.0
✔ lubridate 1.9.4     ✔ tidyr     1.3.1
✔ purrr     1.0.4     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(leaflet)
library(ggrepel)
library(ggthemes)
library(viridis)
Loading required package: viridisLite

Setting working directory and load dataset

setwd("C:/Users/tmats/OneDrive/DATA110/Working Directories")
health_behavior <- read_csv("Nutrition__Physical_Activity__and_Obesity_-_Behavioral_Risk_Factor_Surveillance_System.csv")
Rows: 104272 Columns: 33
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (25): LocationAbbr, LocationDesc, Datasource, Class, Topic, Question, Da...
dbl  (8): YearStart, YearEnd, Data_Value_Unit, Data_Value, Data_Value_Alt, L...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(health_behavior)
# A tibble: 6 × 33
  YearStart YearEnd LocationAbbr LocationDesc Datasource Class    Topic Question
      <dbl>   <dbl> <chr>        <chr>        <chr>      <chr>    <chr> <chr>   
1      2011    2011 AK           Alaska       BRFSS      Obesity… Obes… Percent…
2      2011    2011 AK           Alaska       BRFSS      Obesity… Obes… Percent…
3      2011    2011 AK           Alaska       BRFSS      Physica… Phys… Percent…
4      2011    2011 AK           Alaska       BRFSS      Obesity… Obes… Percent…
5      2011    2011 AK           Alaska       BRFSS      Obesity… Obes… Percent…
6      2011    2011 AK           Alaska       BRFSS      Obesity… Obes… Percent…
# ℹ 25 more variables: Data_Value_Unit <dbl>, Data_Value_Type <chr>,
#   Data_Value <dbl>, Data_Value_Alt <dbl>, Data_Value_Footnote_Symbol <chr>,
#   Data_Value_Footnote <chr>, Low_Confidence_Limit <dbl>,
#   High_Confidence_Limit <dbl>, Sample_Size <dbl>, Total <chr>,
#   `Age(years)` <chr>, Education <chr>, Sex <chr>, Income <chr>,
#   `Race/Ethnicity` <chr>, GeoLocation <chr>, ClassID <chr>, TopicID <chr>,
#   QuestionID <chr>, DataValueTypeID <chr>, LocationID <chr>, …
health_behavior1 <- health_behavior |>
  filter(!is.na(GeoLocation)) |>
  mutate(GeoLocation = str_replace_all(GeoLocation, "[()]", "")) |>
  separate(GeoLocation, into = c("lat", "long"), sep = ",", convert = TRUE)
health_behavior1
# A tibble: 102,340 × 34
   YearStart YearEnd LocationAbbr LocationDesc Datasource Class   Topic Question
       <dbl>   <dbl> <chr>        <chr>        <chr>      <chr>   <chr> <chr>   
 1      2011    2011 AK           Alaska       BRFSS      Obesit… Obes… Percent…
 2      2011    2011 AK           Alaska       BRFSS      Obesit… Obes… Percent…
 3      2011    2011 AK           Alaska       BRFSS      Physic… Phys… Percent…
 4      2011    2011 AK           Alaska       BRFSS      Obesit… Obes… Percent…
 5      2011    2011 AK           Alaska       BRFSS      Obesit… Obes… Percent…
 6      2011    2011 AK           Alaska       BRFSS      Obesit… Obes… Percent…
 7      2011    2011 AK           Alaska       BRFSS      Physic… Phys… Percent…
 8      2011    2011 AK           Alaska       BRFSS      Obesit… Obes… Percent…
 9      2011    2011 AK           Alaska       BRFSS      Physic… Phys… Percent…
10      2011    2011 AK           Alaska       BRFSS      Physic… Phys… Percent…
# ℹ 102,330 more rows
# ℹ 26 more variables: Data_Value_Unit <dbl>, Data_Value_Type <chr>,
#   Data_Value <dbl>, Data_Value_Alt <dbl>, Data_Value_Footnote_Symbol <chr>,
#   Data_Value_Footnote <chr>, Low_Confidence_Limit <dbl>,
#   High_Confidence_Limit <dbl>, Sample_Size <dbl>, Total <chr>,
#   `Age(years)` <chr>, Education <chr>, Sex <chr>, Income <chr>,
#   `Race/Ethnicity` <chr>, lat <dbl>, long <dbl>, ClassID <chr>, …

Select data to explore

life_style <- health_behavior1 |>
  filter(Class %in% c("Physical Activity", "Fruits and Vegetables")) |>
  filter(Question %in% c('Percent of adults who achieve at least 150 minutes a week of moderate-intensity aerobic physical activity or 75 minutes a week of vigorous-intensity aerobic activity (or an equivalent combination)', 'Percent of adults who report consuming fruit less than one time daily', 'Percent of adults who report consuming vegetables less than one time daily')) |>
  filter(YearStart == 2019) |>
  rename(Percentage = Data_Value) |>
  rename(State = LocationAbbr) |>
  select (YearStart, State, Class, Percentage, Question, lat, long, StratificationCategory1)
head(life_style)
# A tibble: 6 × 8
  YearStart State Class   Percentage Question   lat  long StratificationCatego…¹
      <dbl> <chr> <chr>        <dbl> <chr>    <dbl> <dbl> <chr>                 
1      2019 AK    Physic…       48.5 Percent…  64.8 -148. Age (years)           
2      2019 AK    Fruits…       12.7 Percent…  64.8 -148. Income                
3      2019 AK    Fruits…       23.8 Percent…  64.8 -148. Income                
4      2019 AK    Fruits…       47.9 Percent…  64.8 -148. Education             
5      2019 AK    Fruits…       19.1 Percent…  64.8 -148. Education             
6      2019 AK    Fruits…       13   Percent…  64.8 -148. Income                
# ℹ abbreviated name: ¹​StratificationCategory1

##Removing Na’s from the dataset

lifestyle_clean <- life_style |>
  filter(!is.na(Percentage))

##Explore the state in south region

south_lifestyleClean <- lifestyle_clean |>
  filter(State %in% c("MD", "DC", "GA", "KY", "NC", "VA", "TX", "LA", "MS", "TN" ))

First draft graph

  ggplot(south_lifestyleClean, aes(x = State, y = Percentage, fill = Class)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_light() +
  labs(
    title = "Lifestyle in 2019",
    x = "State", 
    y = "Percentage")

Brief Summary