acs_county_data <- read_csv("C:/Users/samc8/OneDrive - Xavier University/Data Wrangling/Week 5/acs_2015_county_data_revised.csv")
dim(acs_county_data)
[1] 3142 35
glimpse(acs_county_data)
str(acs_county_data)
There are two missing values:
colSums(is.na(acs_county_data))
acs_county_data_clean <- acs_county_data %>%
mutate(
income = replace_na(income, median(income, na.rm = TRUE)),
child_poverty = replace_na(child_poverty, median(child_poverty, na.rm = TRUE))
)
acs_county_data_clean %>% summarise(missing_income = sum(is.na(income)), missing_child_poverty = sum(is.na(child_poverty)))
colSums(is.na(acs_county_data_clean))
summary(acs_county_data_clean)
boxplot(acs_county_data_clean$men, main = "Number of Men")
acs_county_data_clean %>% filter(women > men) %>% nrow()
[1] 1985
acs_county_data_clean %>% filter(unemployment < 10) %>% nrow()
[1] 2420
acs_county_data_clean %>%
select(census_id, county, state, mean_commute) %>%
top_n(10, mean_commute) %>%
arrange(desc(mean_commute))
acs_women_pct <- acs_county_data_clean %>%
mutate(pct_women = (women / total_pop) * 100)
acs_women_pct %>%
select (census_id, county, state, pct_women) %>%
arrange(pct_women) %>%
slice(1:10)
acs_race_pct <- acs_county_data_clean %>%
mutate(race_sum = hispanic + white + black + native + asian + pacific)
acs_race_pct %>%
select(census_id, county, state, race_sum) %>%
arrange(race_sum) %>%
slice(1:10)
acs_race_pct %>%
group_by(state) %>%
summarise(avg_race_sum = mean(race_sum, na.rm = TRUE)) %>%
arrange(avg_race_sum) %>%
slice(1)
acs_race_pct %>%
filter(race_sum > 100) %>%
nrow()
[1] 11
acs_race_pct %>%
filter(race_sum == 100) %>%
distinct(state) %>%
nrow()
[1] 13
acs_carpool <- acs_county_data_clean %>%
mutate(carpool_rank = min_rank(desc(carpool)))
acs_carpool %>%
select(census_id, county, state, carpool, carpool_rank) %>%
arrange(carpool_rank) %>%
slice(1:10)
acs_carpool %>%
arrange(desc(carpool_rank)) %>%
select(census_id, county, state, carpool, carpool_rank) %>%
slice(1:10)
acs_carpool %>%
group_by(state) %>%
summarise(avg_rank = mean(carpool_rank, na.rm = TRUE)) %>%
arrange(avg_rank) %>%
slice(1)
state avg_rank
<chr> <dbl>
1 Arizona 971.
acs_carpool %>%
group_by(state) %>%
summarise(avg_rank = mean(carpool_rank, na.rm = TRUE)) %>%
arrange(avg_rank) %>%
slice(1:5)
state avg_rank
<chr> <dbl>
1 Arizona 971.
2 Utah 1019.
3 Arkansas 1055.
4 Hawaii 1072.
5 Alaska 1087.