library(gganimate)
library(ggmap)
library(ggridges)
library(ggthemes)
library(knitr)
library(leaflet)
library(lubridate)
library(plotly)
library(scales)
library(tidyverse)
library(tibble)
library(skimr)
library(naniar)
library(gridExtra)
library(sf)
library(htmltools)
| Variable | Meaning |
|---|---|
state |
identifying variable/cases |
pop |
size of population |
lgl_abortion_clinics |
Number of legal abortion clinics available to women in the state |
health_clinics |
women’s health clinics that do not provide abortion services |
prop_abortion |
the ratio of people per abortion clinic in the state (bigger numbers mean presumably less access) |
prop_health |
the ratio of people per women’s health clinic in the state (bigger numbers mean presumably less access) |
paid_fam_leave |
whether a state has a law requiring an employer to issue paid family leave (women are disproportionately affected by paid family leave or lack thereof because they’re the ones having babies!) |
prop_equal_pay |
how many cents women make to the dollar that men make in each state |
equal_pay_rank |
how a state ranks nationally according to equal pay laws |
marital_rape_except |
whether a state has loopholes in sexual assault in the cases of marriage |
law strength |
metric to assign to the strength of the legislation: 5 = clear and convincing, 4 = beyond reasonable doubt, 3 = not specified, 2 = conviction, 1 = nothing |
year_passage |
when the bill (to terminate parental rights in cases of rape that resulted in child’s conception) was initially passed |
year_amend |
when/if the bill was amended to expand restrictions from requiring a rape conviction to clear and convincing evidence |
post_2015 |
whether the bill was passed before or after 2015, when the Obama Administration issued grant money to any state that made this issue a legislative priority |
bill_name |
name of legaislation passed in the state |
perc_women |
percent of women in both chambers combined at the times of the bill’s passage |
perc_demo_senate |
percent of members in the state senate that are of the Democratic party at the time of the bill’s passage |
perc_demo_house |
percent of members in the state house that are of the Democratic party at the time of the bill’s passage |
Senate |
party that controlled state senate during the passage of the legislation; D = Democrat, R = Republican, P = Split, B = Bipartisan, N/A = Does not apply (legislation not passed) |
House |
party that controlled state house during the passage of the legislation |
Governor |
governor’s party affiliation |
data <- read_csv("Honors_stats_updated_nov18_2.csv")
data <- data %>%
select(-X22)
data
To begin answering our research question, we first were interested in understanding how states are similar in their state legislatures and other variables that may account for how they enact the legislation we are interested in looking at. As such, we use hierarchical clustering analysis to find structure in our data and determine states that are similar with regards to the features outlined above. Our research question focuses on understanding factors that may influence how this legislation is enforced; as a result, we do not factor in the actual strength of the law we coded above or the year in which it was passed. This is because we are interested in seeing if the variables in this data set have a relationship with what the strength of the legislation actually is.
For this analysis, we have to convert the variables that are factors into numeric data. As such, this is how the variables were coded:
Senate, House & Governor variables: 1 = Bipartisan (B), 2 = Democratic (D), 3 = Purple (P), 4 = Legislation not passed (N/A), 5 = Republican (R)
paid_fam_leave & marital_rape_except variables: 1 = “No”, 2 = “Yes”
Additionally, it is also important to note why we also excluded the variables that indicate population of the state, number of legal abortion clinics available to women in the state and the number of women’s health clinics in the state. We did not want to size of the state to affect the results of the clustering analysis. So to account for this, we left the variables of prop_abortion and prop_health in the algorithm because this places all states on the same scale by computing the ratio of people per abortion clinic and people to health clinics, respectively. These are the most important variables since we are trying to find similarities in resources available, and not simply the size of the state, for example.
Coding the variables above as such will allow us to see the break down of these variables when summarizing variable means across clusters once clusters have been determined. We used the complete linkage approach to find similarities between clusters of states, which is defined as the maximum distance between any 2 cases between any 2 clusters. After running the hierarchical clustering algorithm and visualizing the resultant dendrogram (i.e. when clusters merge and which states merge into what clusters), we determined that the appropriate number of natural clusters was 4 and better visualized these four clusters on a states map, as seen below.
# Intitial clustering w/o taking into account year passage
my_cluster_data <- data %>%
select(-c(bill_name, year_amend, post_2015, law_strength, year_passage, Pop, lgl_abortion_clinics, health_clinics)) %>%
mutate(Senate = as.numeric(factor(Senate)),
House = as.numeric(factor(House)),
Governor = as.numeric(factor(Governor)),
paid_fam_leave = as.numeric(factor(paid_fam_leave)),
marital_rape_except = as.numeric(factor(marital_rape_except))) %>%
column_to_rownames("State")
# Hierarchical clustering
# method can be "complete", "single", "average", "centroid"
hier_model <- hclust(dist(scale(my_cluster_data)), method = "complete")
# Visualization: dendrogram
plot(hier_model, cex = 0.8, xlab = "", main = "Dendrogram Not Accounting for Law Strength",
sub = "Method: Complete Linkage")
# Assign each sample case to a cluster
# You specify the number of clusters, k
clusters <- as.factor(cutree(hier_model, k = 3))
# Mapping initial clustering results
states_map <- map_data("state")
data %>%
mutate(cluster = clusters,
State = tolower(State)) %>%
ggplot(aes(fill = cluster)) +
geom_map(aes(map_id = State), color = "black", size = 0.1, alpha = 0.8, map = states_map) +
expand_limits(x = states_map$long, y = states_map$lat) +
theme_map() +
scale_fill_viridis_d() +
labs(fill = "Cluster", title = "How Each State Clusters Together, Part 1",
subtitle = "Results from Hierarchical Analysis before accounting for law strength",
caption = "Note: Alaska is in Cluster 1, Hawaii is in Cluster 2") +
theme(plot.title = element_text(face = "bold", size = 12),
plot.caption = element_text(face = "italic", hjust = 0.5, size = 10))
# Calculating the mean of each feature for each cluster when there are 2 clusters
my_cluster_data %>%
mutate(cluster = clusters) %>% group_by(cluster) %>%
summarize_all(list(mean = mean), na.rm = TRUE)
From this map, we are able to visualize the which states are similar in terms of their representation in the state legislatures and the resources available in each state that may affect how legislation is enforced. We can define the four clusters seen above as the following, based on the means for each variable within each cluster:
Cluster 1: States whose representation in the state legislature is Republican favored and has the second to worst representation of women. It is a tier below Cluster 2 and a tier higher than Cluster 3 in terms of “equality” for women.
Cluster 2: States whose representation in the state legislature is strongly Democratic favored and has the highest representation of women. These states also have the most “equality” for women, most access to abortion and health clinics. (Best in regards to our research question).
Cluster 3: States whose representation in the state legislature is for the most part split, with representation of women the lowest out of the three clusters and the worst in terms of “equality” for women. (Worst in regards to our research question).
Taking into account the metric above in our cluster analysis, we want to compare how each state actually rules on this legislation to how they seem that they would on paper based on the variables above. As such, we created another map that shows the law strength for each state, with 1 being the “worst” (i.e.nothing in place) and 5 being the best (i.e. clear and convincing).
# Mapping states and law strength
data %>%
mutate(State = tolower(State)) %>%
ggplot(aes(fill = factor(law_strength))) +
geom_map(aes(map_id = State), color = "black", size = 0.1, alpha = 0.8, map = states_map) +
expand_limits(x = states_map$long, y = states_map$lat) +
theme_map() +
scale_fill_viridis_d() +
labs(fill = "Law Strength", title = "Law Strength for Each State",
subtitle = "How States Actually Treat Legislation",
caption = "Note: Alaska is 3, Hawaii is 5") +
theme(plot.title = element_text(face = "bold", size = 12),
plot.caption = element_text(face = "italic", hjust = 0.5, size = 10))
Comparing the two maps above, we find some surprising results. For example, the group of West Coast and some East Coast states that were coded as being in Cluster 2 - the best cluster - only have a strength of 2 for the legislation they have in place. Based on the clustering performed above, we would expect these states to have a strong legislation since they seem to be most progressive and have the most representation for women liberal ideals. Conversely, we see that states such as Florida, Mississippi and Georgia who were placed in Cluster 1 or Cluster 2 - the worse clusters - have much better law strengths.
There are many possible explanations for this effect. One of them may be that those states in which have lower “equality” for women may need to have stronger laws in place to protect them because of their need to “make-up” for their treatment towards women. (NEED EMMA FOR THIS PART)
Bringing it all together and now accounting for the year in which the legislation in question was passed and the strength of the law, we re-run our clustering algorithm to see how states are similar to each other in a more hollistic view, accounting for both what we expected their attitudes towards this legislation to be and the reality for the legislation in these states. Again, we determined their to be 3 “natural” clusters and visualized these groups on the map once again.
# Hierarchical Analysis using all information
my_cluster_data2 <- data %>%
select(-c(bill_name, year_amend, Pop, lgl_abortion_clinics, health_clinics)) %>%
mutate(Senate = as.numeric(factor(Senate)),
House = as.numeric(factor(House)),
Governor = as.numeric(factor(Governor)),
post_2015 = as.numeric(factor(post_2015)),
paid_fam_leave = as.numeric(factor(paid_fam_leave)),
marital_rape_except = as.numeric(factor(marital_rape_except))) %>%
column_to_rownames("State")
# Hierarchical clustering
# method can be "complete", "single", "average", "centroid"
hier_model2 <- hclust(dist(scale(my_cluster_data2)), method = "complete")
# Visualization: dendrogram
plot(hier_model2, cex = 0.8, xlab = "", main = " Dendrogram Accounting for Law Strength",
sub = "Method: Complete Linkage")
# Assign each sample case to a cluster
# You specify the number of clusters, k
clusters2 <- as.factor(cutree(hier_model2, k = 3))
# Calculating the mean of each feature for each cluster when there are 3 clusters
my_cluster_data2 %>%
mutate(cluster = clusters2) %>% group_by(cluster) %>%
summarize_all(list(mean = mean), na.rm = TRUE)
# Mapping Clusters when accounting for law strength and year passage
data %>%
mutate(cluster = clusters2,
State = tolower(State)) %>%
ggplot(aes(fill = cluster)) +
geom_map(aes(map_id = State), color = "black", size = 0.1, alpha = 0.8, map = states_map) +
expand_limits(x = states_map$long, y = states_map$lat) +
theme_map() +
scale_fill_viridis_d() +
labs(fill = "Cluster", title = "How Each State Clusters Together, Part 2",
subtitle = "Accounting for Law Strength in Hierarchical Analysis",
caption = "Note: Alaska is in Cluster 1, Hawaii is in Cluster 3") +
theme(plot.title = element_text(face = "bold", size = 12),
plot.caption = element_text(face = "italic", hjust = 0.5, size = 10))
The breakdown in groups is very similar to the original clustering analysis, but assigned to different clusters:
Cluster 1: States whose representation in the state legislature is Republican favored and has the second to worst representation of women. It is a tier below Cluster 3 (the best) and a tier higher than Cluster 3 (worst) in terms of “equality” for women and has the strongest law strength.
Cluster 2: States whose representation in the state legislature is about split and has the worst resources, “equality” and representation for women, with an average law strength of 2 (Worst in our analysis).
Cluster 3: States whose representation in the state legislature is strongly favored towards Democratic and the most representation of women. These states have the most resources for women and the best “equality”, with an average law strength of about 3 (Best in our analysis).
The following updates our original data and adds a new column to designate if a state was red or blue at the time.
data <- data %>%
mutate(party = case_when(perc_demo_senate < 50 & perc_demo_house < 50 ~ "Red",
perc_demo_senate > 50 & perc_demo_house > 50 ~ "Blue",
perc_demo_senate == 50 & perc_demo_house == 50 ~ "Purple",
perc_demo_senate <= 50 & perc_demo_house >= 50 ~ "Purple",
perc_demo_senate >= 50 & perc_demo_house <= 50 ~ "Purple")) #what do we do with the Bipartisan (B) groups?
The following shows the breakdown of how many states have each of the five law strengths, as well as a table showing the states and their strength law and the color of the state at the time of passage.
data %>%
group_by(law_strength) %>%
count() %>%
rename("Law Strength" = "law_strength", "Number of States" = "n")
data %>%
select(State, law_strength, party) %>%
rename("Law Strength" = "law_strength", "Color of State During Passage of Law" = "party")
This map summarizes the information found in the table for each state. It is interactive so if you click on the state, a box will pop up with all of its information.
# Loading shape file for all states
shp <- read_sf("files/tl_2017_us_state/tl_2017_us_state.shp")
# Removing territories and arranging in alphabetical order
shp <- shp %>%
filter(!NAME %in% c("Puerto Rico", "Guam", "Commonwealth of the Northern Mariana Islands", "United States Virgin Islands", "American Samoa", "District of Columbia")) %>%
arrange(NAME)
# Creating info that will pop up when clicked on state
state_popup <- paste0("<strong>State: </strong>", data$State,
"<br><strong>Pop:</strong> ", data$Pop,
"<br><strong>Prop. Abortion Clinics: </strong> ", data$prop_health,
"<br><strong>Prop. Health Clinics: </strong>", data$prop_abortion,
"<br><strong>Family Leave: </strong>", data$paid_fam_leave,
"<br><strong>Prop. Equal Pay: </strong>", data$prop_equal_pay,
"<br><strong>Equal Pay Rank: </strong>", data$equal_pay_rank,
"<br><strong>Marital Exception: </strong>", data$marital_rape_except,
"<br><strong>Year Law Passed: </strong>", data$year_passage,
"<br><strong>Year Amended: </strong>", data$year_amend,
"<br><strong>Controlling Party During Passage: </strong>", data$party)
# Creating Palette for Law Strength for each state
pal <- colorFactor("viridis",
domain = factor(data$law_strength))
# Map
leaflet(shp) %>%
addTiles() %>%
setView(lng = -98.268082, lat = 41.125370, zoom = 3) %>%
addPolygons(popup = state_popup, fillColor = ~pal(factor(data$law_strength)),
stroke = 0.2, color = ~pal(factor(data$law_strength)), fillOpacity = 0.7,
smoothFactor = 0.5,
highlight = highlightOptions(weight = 5,
color = "black",
fillOpacity = 0.9,
bringToFront = FALSE)) %>%
addLegend(pal = pal,
values = ~factor(data$law_strength),
opacity = 0.5,
title = "Law Strength",
position = "bottomright")