library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v purrr   0.3.4
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Read in Maternal Mortality dataset

library(readr)
mm_df <- read_csv("maternalmortality.csv")
## Rows: 51 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): State
## dbl (6): MMR, Prenatal, Csection, Underserved, Uninsured, Population_18
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(mm_df)

I used the Maternal Mortality Rates dataset created by Professor Rachel Saidi. I retrieved this dataset from the course resources tab through a shared one drive link provided to students by Professor Saidi. The variables within this dataset include state, MMR (maternal mortality rate), prenatal, C-section, underserved, population, and year. I really enjoyed working with this dataset since I am building a career in healthcare. All the variables were quantitative except for the state. The state was my only categorical variable.

4.) Summary Statistics

summary(mm_df)
##     State                MMR          Prenatal        Csection    
##  Length:51          Min.   : 1.2   Min.   : 9.20   Min.   :22.20  
##  Class :character   1st Qu.: 7.2   1st Qu.:13.75   1st Qu.:28.30  
##  Mode  :character   Median :10.4   Median :15.70   Median :30.70  
##                     Mean   :10.2   Mean   :16.35   Mean   :30.62  
##                     3rd Qu.:12.0   3rd Qu.:18.90   3rd Qu.:33.50  
##                     Max.   :34.9   Max.   :30.90   Max.   :38.30  
##   Underserved      Uninsured     Population_18     
##  Min.   :22.00   Min.   : 8.70   Min.   :  577737  
##  1st Qu.:39.00   1st Qu.:12.35   1st Qu.: 1780020  
##  Median :45.00   Median :15.70   Median : 4468402  
##  Mean   :43.51   Mean   :16.45   Mean   : 6417545  
##  3rd Qu.:50.00   3rd Qu.:19.95   3rd Qu.: 7353618  
##  Max.   :61.00   Max.   :27.80   Max.   :39557045

EDA

I used the dplyr function to filter out three states in total: Georgia, Mississippi, Maryland, and the District of Columbia.

mm_dfUN <- mm_df %>%
  select(State, MMR, Uninsured) %>%
  group_by(State) %>%
  filter(State %in% c("DC", "MD", "MS", "GA"))
summary(mm_dfUN)
##     State                MMR          Uninsured   
##  Length:4           Min.   :15.20   Min.   :11.5  
##  Class :character   1st Qu.:16.18   1st Qu.:14.2  
##  Mode  :character   Median :18.50   Median :17.4  
##                     Mean   :21.77   Mean   :16.8  
##                     3rd Qu.:24.10   3rd Qu.:20.0  
##                     Max.   :34.90   Max.   :20.9
  1. I wanted to see if there was a correlation between MMR and uninsured mothers in the following states.
mm_dfUNp1 <- mm_dfUN %>%
  filter(State %in% c("DC", "MD", "GA", "MS")) %>%
  ggplot(aes(x= Uninsured, y= MMR)) +
  labs(title= "Correlation Between MMR and Uninsured Mother's in Four States" ) +
  xlab("Uninsured Mother's [in pecernt]")+
  ylab("Mortality Rate") +
  theme_minimal(base_size = 7) +
geom_point(aes(color= State), size= 5)+ 
  labs(color= "State") +
  scale_color_brewer(palette = "Set1")
  1. I wanted to look at the MMR for the percentages of women who DID receive prenatal care in underserved communities.
mm_dfPre <- mm_df %>%
  select(State, MMR, Prenatal, Underserved) %>%
  group_by(Underserved) %>%
  filter(State %in% c("DC", "MD", "MS", "GA"))

mm_dfPre
## # A tibble: 4 x 4
## # Groups:   Underserved [4]
##   State   MMR Prenatal Underserved
##   <chr> <dbl>    <dbl>       <dbl>
## 1 GA     20.5     15.8          41
## 2 MD     16.5     16.6          40
## 3 MS     15.2     15.6          46
## 4 DC     34.9     23.2          50
summary(mm_dfPre)
##     State                MMR           Prenatal      Underserved   
##  Length:4           Min.   :15.20   Min.   :15.60   Min.   :40.00  
##  Class :character   1st Qu.:16.18   1st Qu.:15.75   1st Qu.:40.75  
##  Mode  :character   Median :18.50   Median :16.20   Median :43.50  
##                     Mean   :21.77   Mean   :17.80   Mean   :44.25  
##                     3rd Qu.:24.10   3rd Qu.:18.25   3rd Qu.:47.00  
##                     Max.   :34.90   Max.   :23.20   Max.   :50.00
mm_dfPrep2 <- mm_dfPre %>%
  ggplot(aes(x=State, y=Prenatal, fill= MMR)) + 
    geom_bar(colour="white", width=.8, stat="identity") + 
    xlab("State") + ylab("Prenatal Care") +
    ggtitle("Prenatal Care and Maternal Mortality Rates by State")

mm_dfPrep2

3. I took it just one more step further by adding an additional variable to my EDA…C-section rates.

mm_dfCS <- mm_df %>%
  select(State, MMR, Csection, Underserved, Uninsured) %>%
  group_by(Uninsured) %>%
  filter(State %in% c("DC", "MD", "MS", "GA"))

mm_dfCS
## # A tibble: 4 x 5
## # Groups:   Uninsured [4]
##   State   MMR Csection Underserved Uninsured
##   <chr> <dbl>    <dbl>       <dbl>     <dbl>
## 1 GA     20.5     32            41      19.7
## 2 MD     16.5     33.1          40      15.1
## 3 MS     15.2     36.2          46      20.9
## 4 DC     34.9     32.6          50      11.5
summary(mm_dfCS)
##     State                MMR           Csection      Underserved   
##  Length:4           Min.   :15.20   Min.   :32.00   Min.   :40.00  
##  Class :character   1st Qu.:16.18   1st Qu.:32.45   1st Qu.:40.75  
##  Mode  :character   Median :18.50   Median :32.85   Median :43.50  
##                     Mean   :21.77   Mean   :33.48   Mean   :44.25  
##                     3rd Qu.:24.10   3rd Qu.:33.88   3rd Qu.:47.00  
##                     Max.   :34.90   Max.   :36.20   Max.   :50.00  
##    Uninsured   
##  Min.   :11.5  
##  1st Qu.:14.2  
##  Median :17.4  
##  Mean   :16.8  
##  3rd Qu.:20.0  
##  Max.   :20.9
mm_dfCS1 <- mm_dfCS %>%
  ggplot(aes(x=MMR, y=Csection, fill= State)) + 
    geom_bar(colour="white", width=.8, stat="identity") + 
    xlab("Maternal Mortality Rates") + ylab("C- Sections") +
    ggtitle("C-Section and Maternal Mortality Rates by State")

What did I learn?

The District of Columbia had the highest maternal mortality rates (34.9%), underserving half (50%) of the population of mothers in the dataset and had the lowest number of uninsured mothers (11.5%) in 2018. Georgia had the second-highest maternal mortality rates (20.5%), the third-highest underserved population (41%), and the second-highest number of uninsured mothers (19.7%) in 2018. Maryland had the third-highest maternal mortality rate (16.5%), an underserved population of about (40%), and had the second-lowest number of uninsured mothers (15.1%) in 2018. Mississippi had the highest number of uninsured mothers (20.9%) with an underserved population of 46%. Mississippi had the lowest MMR (15.2%) C-section rates for these four states stayed around 32-33.6% for all three states and The District of Columbia.