For this assignment, I decided to use the dataset that corresponds to the What Do Men Think It Means To Be A Man? article on FiveThirtyEight.com. The article can be found here: https://fivethirtyeight.com/features/what-do-men-think-it-means-to-be-a-man/
It contains the results of a survey of 1,615 adult men conducted by SurveyMonkey in partnership with FiveThirtyEight and WNYC Studios from May 10-22, 2018.
library(tidyverse)
dat <- as_tibble(read.csv('https://raw.githubusercontent.com/amberferger/DATA607_Masculinity/master/raw-responses.csv'))
We have quite a bit of questions in this survey, so we will focus on just a few. For the purpose of this vignette, let’s see what role demographics play in the answer to the question How important is it to you that others see you as masculine? We’ll use the select command (from the tidyverse dependency dplyr) to return only the columns we are interested in looking at (race and orientation). We’ll also use the filter() command to subset our data to only individuals that provided a response to these question.
dat <- dat %>%
select(race2,orientation, q0002) %>%
filter(q0002 != 'No answer' & race2 != 'No answer' & orientation != 'No answer')
Our final data set has 1 response variable (the answer to the question) and 2 explanatory variables (our demographic data). We’ll use the group_by function with the count() function to summarize our data. We will then transform our values by creating a percent for each of the of the groupings.
raceCount <- dat %>%
group_by(race2, q0002) %>%
count()
raceCount <- raceCount %>%
group_by(race2) %>%
mutate(RACE_PCT = n/sum(n))
raceCount
## # A tibble: 8 x 4
## # Groups: race2 [2]
## race2 q0002 n RACE_PCT
## <fct> <fct> <int> <dbl>
## 1 Non-white Not at all important 46 0.178
## 2 Non-white Not too important 68 0.264
## 3 Non-white Somewhat important 99 0.384
## 4 Non-white Very important 45 0.174
## 5 White Not at all important 193 0.144
## 6 White Not too important 471 0.353
## 7 White Somewhat important 523 0.391
## 8 White Very important 149 0.112
We’ll do the same thing for the orientation variable.
orientationCount <- dat %>%
group_by(orientation, q0002) %>%
count()
orientationCount <- orientationCount %>%
group_by(orientation) %>%
mutate(ORIENTATION_PCT = n/sum(n))
orientationCount
## # A tibble: 12 x 4
## # Groups: orientation [3]
## orientation q0002 n ORIENTATION_PCT
## <fct> <fct> <int> <dbl>
## 1 Gay/Bisexual Not at all important 33 0.206
## 2 Gay/Bisexual Not too important 58 0.362
## 3 Gay/Bisexual Somewhat important 54 0.338
## 4 Gay/Bisexual Very important 15 0.0938
## 5 Other Not at all important 10 0.323
## 6 Other Not too important 8 0.258
## 7 Other Somewhat important 5 0.161
## 8 Other Very important 8 0.258
## 9 Straight Not at all important 196 0.140
## 10 Straight Not too important 473 0.337
## 11 Straight Somewhat important 563 0.401
## 12 Straight Very important 171 0.122
Now let’s visualize our data! We’ll use the ggplot library to take a look:
library(ggplot2)
ggplot(raceCount, aes(fill=race2, y=RACE_PCT, x=q0002)) +
geom_bar(position="dodge", stat="identity") +
ggtitle("Race vs Answer")
ggplot(orientationCount, aes(fill=orientation, y=ORIENTATION_PCT, x=q0002)) +
geom_bar(position="dodge", stat="identity") +
ggtitle("Orientation vs Answer")
dat1<- as_tibble(read_csv('https://raw.githubusercontent.com/amberferger/DATA607_Masculinity/master/raw-responses.csv'))
## Warning: Missing column names filled in: 'X1' [1]
## Parsed with column specification:
## cols(
## .default = col_character(),
## X1 = col_double(),
## weight = col_double()
## )
## See spec(...) for full column specifications.
dat1 <- dat1 %>%
select(racethn4,age3, kids) %>%
filter(racethn4 != 'No answer' & age3 != 'No answer' & kids != 'No answer')
dat1
## # A tibble: 1,606 x 3
## racethn4 age3 kids
## <chr> <chr> <chr>
## 1 Hispanic 35 - 64 No children
## 2 White 65 and up Has children
## 3 White 35 - 64 Has children
## 4 White 65 and up Has children
## 5 White 35 - 64 No children
## 6 White 65 and up Has children
## 7 Other 18 - 34 Has children
## 8 White 65 and up No children
## 9 Hispanic 35 - 64 Has children
## 10 White 35 - 64 No children
## # ... with 1,596 more rows
kidsCount <- dat1 %>%
group_by(kids, racethn4) %>%
count()
kidsCount <- kidsCount %>%
group_by(kids) %>%
mutate(Kids_PCT = n/sum(n))
kidsCount
## # A tibble: 8 x 4
## # Groups: kids [2]
## kids racethn4 n Kids_PCT
## <chr> <chr> <int> <dbl>
## 1 Has children Black 41 0.0385
## 2 Has children Hispanic 42 0.0394
## 3 Has children Other 80 0.0751
## 4 Has children White 902 0.847
## 5 No children Black 29 0.0536
## 6 No children Hispanic 28 0.0518
## 7 No children Other 40 0.0739
## 8 No children White 444 0.821
ggplot(kidsCount, aes(fill=racethn4, y=Kids_PCT, x=racethn4)) +
geom_bar(position="dodge", stat="identity") +
ggtitle("Kids vs Answer")
ageCount <- dat1 %>%
group_by(age3, racethn4) %>%
count()
ageCount <- ageCount %>%
group_by(age3) %>%
mutate(Age_PCT = n/sum(n))
ageCount
## # A tibble: 12 x 4
## # Groups: age3 [3]
## age3 racethn4 n Age_PCT
## <chr> <chr> <int> <dbl>
## 1 18 - 34 Black 14 0.106
## 2 18 - 34 Hispanic 13 0.0985
## 3 18 - 34 Other 19 0.144
## 4 18 - 34 White 86 0.652
## 5 35 - 64 Black 46 0.0542
## 6 35 - 64 Hispanic 48 0.0566
## 7 35 - 64 Other 75 0.0884
## 8 35 - 64 White 679 0.801
## 9 65 and up Black 10 0.0160
## 10 65 and up Hispanic 9 0.0144
## 11 65 and up Other 26 0.0415
## 12 65 and up White 581 0.928
ggplot(ageCount, aes(fill=age3, y=Age_PCT, x=racethn4)) +
geom_bar(position="dodge", stat="identity") +
ggtitle("Age vs Answer")