#Loading…

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(readr)
library(ggplot2)
library(RColorBrewer)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

#Load Libraries and important data

library(readr)
CDCBreastStats <- read_csv("CDCBreastStats.csv")
## Rows: 10944 Columns: 37
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (29): LocationAbbr, LocationDesc, Datasource, Class, Topic, Question, Da...
## dbl  (6): YearStart, YearEnd, Data_Value, Data_Value_Alt, Low_Confidence_Lim...
## lgl  (1): Data_Value_Unit
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(CDCBreastStats)
str(CDCBreastStats)
## spec_tbl_df [10,944 x 37] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ YearStart                 : num [1:10944] 2005 2005 2009 2000 2006 ...
##  $ YearEnd                   : num [1:10944] 2005 2005 2009 2000 2006 ...
##  $ LocationAbbr              : chr [1:10944] "NY" "US" "NM" "US" ...
##  $ LocationDesc              : chr [1:10944] "New York" "National" "New Mexico" "National" ...
##  $ Datasource                : chr [1:10944] "National Immunization Survey" "National Immunization Survey" "National Immunization Survey" "National Immunization Survey" ...
##  $ Class                     : chr [1:10944] "Breastfeeding" "Breastfeeding" "Breastfeeding" "Breastfeeding" ...
##  $ Topic                     : chr [1:10944] "Breastfeeding - Behavior" "Breastfeeding - Behavior" "Breastfeeding - Behavior" "Breastfeeding - Behavior" ...
##  $ Question                  : chr [1:10944] "Percent of infants who were ever breastfed" "Percent of infants who were exclusively breastfed through 3 months" "Percent of infants who were ever breastfed" "Percent of infants who were breastfed at 6 months" ...
##  $ Data_Value_Unit           : logi [1:10944] NA NA NA NA NA NA ...
##  $ Data_Value_Type           : chr [1:10944] "Value" "Value" "Value" "Value" ...
##  $ Data_Value                : num [1:10944] 74.6 43.8 75 24.4 74.6 86 32.1 47.6 83.8 11.2 ...
##  $ Data_Value_Alt            : num [1:10944] 74.6 43.8 75 24.4 74.6 86 32.1 47.6 83.8 11.2 ...
##  $ Data_Value_Footnote_Symbol: chr [1:10944] NA NA NA NA ...
##  $ Data_Value_Footnote       : chr [1:10944] NA NA NA NA ...
##  $ Low_Confidence_Limit      : num [1:10944] 70.8 42.3 68.3 21.9 73.2 83.8 28.6 45.5 82.6 8 ...
##  $ High_Confidence_Limit     : num [1:10944] 78.1 45.4 80.7 27.1 75.9 87.9 35.8 49.7 84.9 15.5 ...
##  $ Sample_Size               : num [1:10944] 770 9823 381 5795 12032 ...
##  $ Total                     : chr [1:10944] "Total" NA "Total" NA ...
##  $ %PovertyLevel             : chr [1:10944] NA NA NA NA ...
##  $ Age(years)                : chr [1:10944] NA NA NA NA ...
##  $ BirthOrder                : chr [1:10944] NA NA NA NA ...
##  $ Education                 : chr [1:10944] NA "College graduate" NA NA ...
##  $ Gender                    : chr [1:10944] NA NA NA NA ...
##  $ MaritalStatus             : chr [1:10944] NA NA NA NA ...
##  $ Metropolitan              : chr [1:10944] NA NA NA NA ...
##  $ Race/Ethnicity            : chr [1:10944] NA NA NA NA ...
##  $ WICParticipation          : chr [1:10944] NA NA NA "Yes" ...
##  $ GeoLocation               : chr [1:10944] "(42.82700103200045, -75.54397042699964)" NA "(34.52088095200048, -106.24058098499967)" NA ...
##  $ ClassID                   : chr [1:10944] "BF" "BF" "BF" "BF" ...
##  $ TopicID                   : chr [1:10944] "BF1" "BF1" "BF1" "BF1" ...
##  $ QuestionID                : chr [1:10944] "Q006" "Q007" "Q006" "Q005" ...
##  $ DataValueTypeID           : chr [1:10944] "VALUE" "VALUE" "VALUE" "VALUE" ...
##  $ LocationID                : chr [1:10944] "36" "59" "35" "59" ...
##  $ StratificationCategory1   : chr [1:10944] "Total" "Education" "Total" "WIC Participation" ...
##  $ Stratification1           : chr [1:10944] "Total" "College graduate" "Total" "Yes" ...
##  $ StratificationCategoryId1 : chr [1:10944] "OVR" "EDU" "OVR" "WIC" ...
##  $ StratificationID1         : chr [1:10944] "OVERALL" "EDUCOGRAD" "OVERALL" "WICYES" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   YearStart = col_double(),
##   ..   YearEnd = col_double(),
##   ..   LocationAbbr = col_character(),
##   ..   LocationDesc = col_character(),
##   ..   Datasource = col_character(),
##   ..   Class = col_character(),
##   ..   Topic = col_character(),
##   ..   Question = col_character(),
##   ..   Data_Value_Unit = col_logical(),
##   ..   Data_Value_Type = col_character(),
##   ..   Data_Value = col_double(),
##   ..   Data_Value_Alt = col_double(),
##   ..   Data_Value_Footnote_Symbol = col_character(),
##   ..   Data_Value_Footnote = col_character(),
##   ..   Low_Confidence_Limit = col_double(),
##   ..   High_Confidence_Limit = col_double(),
##   ..   Sample_Size = col_number(),
##   ..   Total = col_character(),
##   ..   `%PovertyLevel` = col_character(),
##   ..   `Age(years)` = col_character(),
##   ..   BirthOrder = col_character(),
##   ..   Education = col_character(),
##   ..   Gender = col_character(),
##   ..   MaritalStatus = col_character(),
##   ..   Metropolitan = col_character(),
##   ..   `Race/Ethnicity` = col_character(),
##   ..   WICParticipation = col_character(),
##   ..   GeoLocation = col_character(),
##   ..   ClassID = col_character(),
##   ..   TopicID = col_character(),
##   ..   QuestionID = col_character(),
##   ..   DataValueTypeID = col_character(),
##   ..   LocationID = col_character(),
##   ..   StratificationCategory1 = col_character(),
##   ..   Stratification1 = col_character(),
##   ..   StratificationCategoryId1 = col_character(),
##   ..   StratificationID1 = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
dim(CDCBreastStats)
## [1] 10944    37

#Data Wrangling

##Selecting CI of infants ever breast fed and breastfed at 6 months in 2018 (Low, High) ##ROW72 Focuses on MARYLAND. This is the data I want specifically.

CDC_BS1 <- CDCBreastStats  %>%
  select(YearStart, LocationAbbr, Question, Data_Value) %>%
  filter(LocationAbbr == "MD") %>%
  group_by(Question, Data_Value, YearStart)
head(CDC_BS1)
## # A tibble: 6 x 4
## # Groups:   Question, Data_Value, YearStart [6]
##   YearStart LocationAbbr Question                                     Data_Value
##       <dbl> <chr>        <chr>                                             <dbl>
## 1      2016 MD           Percent of infants who were breastfed at 12~       32.9
## 2      2009 MD           Percent of infants who were breastfed at 6 ~       49.5
## 3      2007 MD           Percent of breastfed infants who were suppl~       25.8
## 4      2013 MD           Percent of infants who were ever breastfed         84.8
## 5      2010 MD           Percent of infants who were breastfed at 6 ~       51.9
## 6      2010 MD           Percent of breastfed infants who were suppl~       25.8
#8 observations and 7 variables 
CDC_BSPlot <- CDC_BS1 %>%
  filter(Question %in%  c("Percent of infants who were breastfed at 6 months", "Percent of infants who were ever breastfed", "Percent of infants who were exclusively breastfed through 3 months")) %>%
  ggplot(aes (x= YearStart, y= Data_Value)) +
  labs(title= "Percentages of Breastfed Infants in Maryland over the Years") +
  xlab("Year of Birth")+
  ylab("Percentage") +
  theme_minimal(base_size = 9)
CDC_BSFinalPlot <- CDC_BSPlot +
geom_point(aes(color= Question)) + geom_line(aes(color= Question)) 
  labs(color= "Country") +
  scale_color_brewer(palette = "Set1") 
## NULL
CDC_BSFinalPlot

#Essay

CDC Breastfeeding Behaviors The source and topic of the data, any variables included what kind of variables they are, how you cleaned the dataset up (be detailed and specific, using proper terminology where appropriate). I retrieved the breastfeeding behaviors dataset from the Center of Diseases website: “The CDC surveys breastfeeding practices in the United States to monitor progress over time and identify priority areas that need attention… The National Immunization Survey (NIS) uses random digit dialing to survey US households with children and teens. Since July 2001, breastfeeding questions have been asked on the NIS and are used to monitor breastfeeding rates at both national and state levels by birth year. All respondents with children aged 19 to 35 months are asked the breastfeeding questions.”1 My variables included were year start, year-end, the location abbr, location desc, and sample size, low confidence limit, and high confidence limit. Some other variables I would have loved to include WIC Participation, age, education, and marital status but my data contained many missing values [NA]. My goal was to remake a visualization similar to the CDC website (DNPAO Data, Trends, and Maps: Explore by Location | CDC) What I found: • Percent of breastfed infants who were supplemented with infant formula within 2 days of life (Row – 1) • Percent of infants who were ever breastfed. (Row – 2) • Percent of infants who were breastfed at 12 months (Row - 3) • Percent of infants who were exclusively breastfed through 6 months (Row – 4) • Percent of breastfed infants who were supplemented with infant formula before 3 months (Row – 6) • Percent of breastfed infants who were supplemented with infant formula before 6 months (Row – 5) • Percent of infants who were exclusively breastfed through 3 months (Row – 7) • Percent of infants who were breastfed at 6 months (Row – 8) Things I wanted to include within my visualization for my project to feel “complete”. a.

Figure 1 Bar plot with labels ggplot2 barplots : Quick start guide - R software and data visualization - Easy Guides - Wiki - STHDA

  1. I would have plotted the values within that specific row/column to create a scatterplot/line plot like what we did in week six.
  2. Show the appropriate VALUES instead of numbers on my Y-axis going from 0-400.

See word for images.

###END