library(readr)
pop_1940 <- read.csv("~/OneDrive - Plymouth State University/madness/institutionalPop_1940.csv")
# print data
pop_1940
## Variable Pop_Total Pop_Mental
## 1 Total 101102924 591355
## 2 Male 50553748 317812
## 3 Female 50549176 273553
## 4 White 91428165 535529
## 5 Male 45823031 288238
## 6 Female 45605134 248391
## 7 Nonwhite 9574759 54736
## 8 Male 4730717 29574
## 9 Female 4944042 25162
# delete rows: 1,2,3,4,7
pop_1940_cleaned <- pop_1940 [-c(1,2,3,4,7),]
# print data
pop_1940_cleaned
## Variable Pop_Total Pop_Mental
## 5 Male 45823031 288238
## 6 Female 45605134 248391
## 8 Male 4730717 29574
## 9 Female 4944042 25162
# rename values
pop_1940_cleaned[1,1] <- "White_Male"
pop_1940_cleaned[2,1] <- "White_Female"
pop_1940_cleaned[3,1] <- "Nonwhite_Male"
pop_1940_cleaned[4,1] <- "Nonwhite_Female"
#print data
pop_1940_cleaned
## Variable Pop_Total Pop_Mental
## 5 White_Male 45823031 288238
## 6 White_Female 45605134 248391
## 8 Nonwhite_Male 4730717 29574
## 9 Nonwhite_Female 4944042 25162
# load packages
library(dplyr) #for %>%
library(tidyr) #for separate() function
# separate Variable to two columns: Race and Gender
pop_1940_processed <- pop_1940_cleaned %>%
separate(col = Variable, into = c("Race","Gender"), sep = "_")
# print data
pop_1940_processed
## Race Gender Pop_Total Pop_Mental
## 5 White Male 45823031 288238
## 6 White Female 45605134 248391
## 8 Nonwhite Male 4730717 29574
## 9 Nonwhite Female 4944042 25162
pop70_cleaned <- read_csv("~/OneDrive - Plymouth State University/madness/popdata1970.csv")
# print data
pop70_cleaned
## # A tibble: 4 x 3
## Race Gender Pop_Mental_70
## <chr> <chr> <dbl>
## 1 White Male 194405
## 2 White Female 157578
## 3 Nonwhite Male 45567
## 4 Nonwhite Female 29227
join data
pop_40N70 <- pop_1940_processed %>%
select(-Pop_Total) %>%
left_join(pop70_cleaned, by = c("Race", "Gender")) %>%
rename(Pop_Mental_40 = Pop_Mental)
pop_40N70
## Race Gender Pop_Mental_40 Pop_Mental_70
## 1 White Male 288238 194405
## 2 White Female 248391 157578
## 3 Nonwhite Male 29574 45567
## 4 Nonwhite Female 25162 29227
pop_long <- pop_40N70 %>%
pivot_longer(3 : 4, names_to = "Census_Year", values_to = "Pop_mental") %>%
mutate(Year = stringr::str_remove(Census_Year, "Pop_Mental_")) %>%
select(-Census_Year)
pop_long
## # A tibble: 8 x 4
## Race Gender Pop_mental Year
## <chr> <chr> <dbl> <chr>
## 1 White Male 288238 40
## 2 White Male 194405 70
## 3 White Female 248391 40
## 4 White Female 157578 70
## 5 Nonwhite Male 29574 40
## 6 Nonwhite Male 45567 70
## 7 Nonwhite Female 25162 40
## 8 Nonwhite Female 29227 70
library(ggplot2)
library(scales)
# Interested in the change between 40 and 70, overall
pop_long %>%
group_by(Year, Race) %>%
summarise(Pop = sum(Pop_mental)) %>%
ggplot(aes(x = Year, y = Pop, fill = Race)) +
geom_col(position = "dodge") +
scale_y_continuous(labels = comma_format())