[Video]
Take a look at the counties dataset using the glimpse() function.
What is the first value in the income variable?
glimpse(counties)
## Rows: 3,138
## Columns: 40
## $ census_id <chr> "1001", "1003", "1005", "1007", "1009", "1011", "1…
## $ state <chr> "Alabama", "Alabama", "Alabama", "Alabama", "Alaba…
## $ county <chr> "Autauga", "Baldwin", "Barbour", "Bibb", "Blount",…
## $ region <chr> "South", "South", "South", "South", "South", "Sout…
## $ metro <chr> "Metro", "Metro", "Nonmetro", "Metro", "Metro", "N…
## $ population <dbl> 55221, 195121, 26932, 22604, 57710, 10678, 20354, …
## $ men <dbl> 26745, 95314, 14497, 12073, 28512, 5660, 9502, 562…
## $ women <dbl> 28476, 99807, 12435, 10531, 29198, 5018, 10852, 60…
## $ hispanic <dbl> 2.6, 4.5, 4.6, 2.2, 8.6, 4.4, 1.2, 3.5, 0.4, 1.5, …
## $ white <dbl> 75.8, 83.1, 46.2, 74.5, 87.9, 22.2, 53.3, 73.0, 57…
## $ black <dbl> 18.5, 9.5, 46.7, 21.4, 1.5, 70.7, 43.8, 20.3, 40.3…
## $ native <dbl> 0.4, 0.6, 0.2, 0.4, 0.3, 1.2, 0.1, 0.2, 0.2, 0.6, …
## $ asian <dbl> 1.0, 0.7, 0.4, 0.1, 0.1, 0.2, 0.4, 0.9, 0.8, 0.3, …
## $ pacific <dbl> 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, …
## $ citizens <dbl> 40725, 147695, 20714, 17495, 42345, 8057, 15581, 8…
## $ income <dbl> 51281, 50254, 32964, 38678, 45813, 31938, 32229, 4…
## $ income_err <dbl> 2391, 1263, 2973, 3995, 3141, 5884, 1793, 925, 294…
## $ income_per_cap <dbl> 24974, 27317, 16824, 18431, 20532, 17580, 18390, 2…
## $ income_per_cap_err <dbl> 1080, 711, 798, 1618, 708, 2055, 714, 489, 1366, 1…
## $ poverty <dbl> 12.9, 13.4, 26.7, 16.8, 16.7, 24.6, 25.4, 20.5, 21…
## $ child_poverty <dbl> 18.6, 19.2, 45.3, 27.9, 27.2, 38.4, 39.2, 31.6, 37…
## $ professional <dbl> 33.2, 33.1, 26.8, 21.5, 28.5, 18.8, 27.5, 27.3, 23…
## $ service <dbl> 17.0, 17.7, 16.1, 17.9, 14.1, 15.0, 16.6, 17.7, 14…
## $ office <dbl> 24.2, 27.1, 23.1, 17.8, 23.9, 19.7, 21.9, 24.2, 26…
## $ construction <dbl> 8.6, 10.8, 10.8, 19.0, 13.5, 20.1, 10.3, 10.5, 11.…
## $ production <dbl> 17.1, 11.2, 23.1, 23.7, 19.9, 26.4, 23.7, 20.4, 24…
## $ drive <dbl> 87.5, 84.7, 83.8, 83.2, 84.9, 74.9, 84.5, 85.3, 85…
## $ carpool <dbl> 8.8, 8.8, 10.9, 13.5, 11.2, 14.9, 12.4, 9.4, 11.9,…
## $ transit <dbl> 0.1, 0.1, 0.4, 0.5, 0.4, 0.7, 0.0, 0.2, 0.2, 0.2, …
## $ walk <dbl> 0.5, 1.0, 1.8, 0.6, 0.9, 5.0, 0.8, 1.2, 0.3, 0.6, …
## $ other_transp <dbl> 1.3, 1.4, 1.5, 1.5, 0.4, 1.7, 0.6, 1.2, 0.4, 0.7, …
## $ work_at_home <dbl> 1.8, 3.9, 1.6, 0.7, 2.3, 2.8, 1.7, 2.7, 2.1, 2.5, …
## $ mean_commute <dbl> 26.5, 26.4, 24.1, 28.8, 34.9, 27.5, 24.6, 24.1, 25…
## $ employed <dbl> 23986, 85953, 8597, 8294, 22189, 3865, 7813, 47401…
## $ private_work <dbl> 73.6, 81.5, 71.8, 76.8, 82.0, 79.5, 77.4, 74.1, 85…
## $ public_work <dbl> 20.9, 12.3, 20.8, 16.1, 13.5, 15.1, 16.2, 20.8, 12…
## $ self_employed <dbl> 5.5, 5.8, 7.3, 6.7, 4.2, 5.4, 6.2, 5.0, 2.8, 7.9, …
## $ family_work <dbl> 0.0, 0.4, 0.1, 0.4, 0.4, 0.0, 0.2, 0.1, 0.0, 0.5, …
## $ unemployment <dbl> 7.6, 7.5, 17.6, 8.3, 7.7, 18.0, 10.9, 12.3, 8.9, 7…
## $ land_area <dbl> 594.44, 1589.78, 884.88, 622.58, 644.78, 622.81, 7…
# Select the columns
counties %>%
select(state, county, population, poverty)
## # A tibble: 3,138 x 4
## state county population poverty
## <chr> <chr> <dbl> <dbl>
## 1 Alabama Autauga 55221 12.9
## 2 Alabama Baldwin 195121 13.4
## 3 Alabama Barbour 26932 26.7
## 4 Alabama Bibb 22604 16.8
## 5 Alabama Blount 57710 16.7
## 6 Alabama Bullock 10678 24.6
## 7 Alabama Butler 20354 25.4
## 8 Alabama Calhoun 116648 20.5
## 9 Alabama Chambers 34079 21.6
## 10 Alabama Cherokee 26008 19.2
## # … with 3,128 more rows
[Video]
counties_selected <- counties %>%
select(state, county, population, unemployment)
counties_selected
## # A tibble: 3,138 x 4
## state county population unemployment
## <chr> <chr> <dbl> <dbl>
## 1 Alabama Autauga 55221 7.6
## 2 Alabama Baldwin 195121 7.5
## 3 Alabama Barbour 26932 17.6
## 4 Alabama Bibb 22604 8.3
## 5 Alabama Blount 57710 7.7
## 6 Alabama Bullock 10678 18
## 7 Alabama Butler 20354 10.9
## 8 Alabama Calhoun 116648 12.3
## 9 Alabama Chambers 34079 8.9
## 10 Alabama Cherokee 26008 7.9
## # … with 3,128 more rows
counties_selected %>%
arrange(population)
## # A tibble: 3,138 x 4
## state county population unemployment
## <chr> <chr> <dbl> <dbl>
## 1 Hawaii Kalawao 85 0
## 2 Texas King 267 5.1
## 3 Nebraska McPherson 433 0.9
## 4 Montana Petroleum 443 6.6
## 5 Nebraska Arthur 448 4
## 6 Nebraska Loup 548 0.7
## 7 Nebraska Blaine 551 0.7
## 8 New Mexico Harding 565 6
## 9 Texas Kenedy 565 0
## 10 Colorado San Juan 606 13.8
## # … with 3,128 more rows
counties_selected %>%
arrange(desc(population))
## # A tibble: 3,138 x 4
## state county population unemployment
## <chr> <chr> <dbl> <dbl>
## 1 California Los Angeles 10038388 10
## 2 Illinois Cook 5236393 10.7
## 3 Texas Harris 4356362 7.5
## 4 Arizona Maricopa 4018143 7.7
## 5 California San Diego 3223096 8.7
## 6 California Orange 3116069 7.6
## 7 Florida Miami-Dade 2639042 10
## 8 New York Kings 2595259 10
## 9 Texas Dallas 2485003 7.6
## 10 New York Queens 2301139 8.6
## # … with 3,128 more rows
counties_selected %>%
arrange(desc(population)) %>%
filter(state == "New York")
## # A tibble: 62 x 4
## state county population unemployment
## <chr> <chr> <dbl> <dbl>
## 1 New York Kings 2595259 10
## 2 New York Queens 2301139 8.6
## 3 New York New York 1629507 7.5
## 4 New York Suffolk 1501373 6.4
## 5 New York Bronx 1428357 14
## 6 New York Nassau 1354612 6.4
## 7 New York Westchester 967315 7.6
## 8 New York Erie 921584 7
## 9 New York Monroe 749356 7.7
## 10 New York Richmond 472481 6.9
## # … with 52 more rows
counties_selected %>%
arrange(desc(population)) %>%
filter(unemployment < 6)
## # A tibble: 949 x 4
## state county population unemployment
## <chr> <chr> <dbl> <dbl>
## 1 Virginia Fairfax 1128722 4.9
## 2 Utah Salt Lake 1078958 5.8
## 3 Hawaii Honolulu 984178 5.6
## 4 Texas Collin 862215 4.9
## 5 Texas Denton 731851 5.7
## 6 Texas Fort Bend 658331 5.1
## 7 Kansas Johnson 566814 4.5
## 8 Maryland Anne Arundel 555280 5.9
## 9 Colorado Jefferson 552344 5.9
## 10 Utah Utah 551957 5.5
## # … with 939 more rows
counties_selected %>%
arrange(desc(population)) %>%
filter(state == "New York",
unemployment < 6)
## # A tibble: 5 x 4
## state county population unemployment
## <chr> <chr> <dbl> <dbl>
## 1 New York Tompkins 103855 5.9
## 2 New York Chemung 88267 5.4
## 3 New York Madison 72427 5.1
## 4 New York Livingston 64801 5.4
## 5 New York Seneca 35144 5.5
counties_selected <- counties %>%
select(state, county, population, private_work, public_work, self_employed)
# Add a verb to sort in descending order of public_work
counties_selected %>%
arrange(desc(public_work))
## # A tibble: 3,138 x 6
## state county population private_work public_work self_employed
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Hawaii Kalawao 85 25 64.1 10.9
## 2 Alaska Yukon-Koyukuk Ce… 5644 33.3 61.7 5.1
## 3 Wisconsin Menominee 4451 36.8 59.1 3.7
## 4 North Da… Sioux 4380 32.9 56.8 10.2
## 5 South Da… Todd 9942 34.4 55 9.8
## 6 Alaska Lake and Peninsu… 1474 42.2 51.6 6.1
## 7 Californ… Lassen 32645 42.6 50.5 6.8
## 8 South Da… Buffalo 2038 48.4 49.5 1.8
## 9 South Da… Dewey 5579 34.9 49.2 14.7
## 10 Texas Kenedy 565 51.9 48.1 0
## # … with 3,128 more rows
counties_selected <- counties %>%
select(state, county, population)
# Filter for counties with a population above 1000000
counties_selected %>%
filter(population > 1000000)
## # A tibble: 41 x 3
## state county population
## <chr> <chr> <dbl>
## 1 Arizona Maricopa 4018143
## 2 California Alameda 1584983
## 3 California Contra Costa 1096068
## 4 California Los Angeles 10038388
## 5 California Orange 3116069
## 6 California Riverside 2298032
## 7 California Sacramento 1465832
## 8 California San Bernardino 2094769
## 9 California San Diego 3223096
## 10 California Santa Clara 1868149
## # … with 31 more rows
counties_selected <- counties %>%
select(state, county, population)
# Filter for counties in the state of California that have a population above 1000000
counties_selected %>%
filter(state == "California", population > 1000000)
## # A tibble: 9 x 3
## state county population
## <chr> <chr> <dbl>
## 1 California Alameda 1584983
## 2 California Contra Costa 1096068
## 3 California Los Angeles 10038388
## 4 California Orange 3116069
## 5 California Riverside 2298032
## 6 California Sacramento 1465832
## 7 California San Bernardino 2094769
## 8 California San Diego 3223096
## 9 California Santa Clara 1868149
counties_selected <- counties %>%
select(state, county, population, private_work, public_work, self_employed)
# Filter for Texas and more than 10000 people; sort in descending order of private_work
counties_selected %>%
filter(state == "Texas", population > 10000) %>%
arrange(desc(private_work))
## # A tibble: 169 x 6
## state county population private_work public_work self_employed
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Texas Gregg 123178 84.7 9.8 5.4
## 2 Texas Collin 862215 84.1 10 5.8
## 3 Texas Dallas 2485003 83.9 9.5 6.4
## 4 Texas Harris 4356362 83.4 10.1 6.3
## 5 Texas Andrews 16775 83.1 9.6 6.8
## 6 Texas Tarrant 1914526 83.1 11.4 5.4
## 7 Texas Titus 32553 82.5 10 7.4
## 8 Texas Denton 731851 82.2 11.9 5.7
## 9 Texas Ector 149557 82 11.2 6.7
## 10 Texas Moore 22281 82 11.7 5.9
## # … with 159 more rows
[Video]
counties_selected <- counties %>%
select(state, county, population, unemployment)
counties_selected
## # A tibble: 3,138 x 4
## state county population unemployment
## <chr> <chr> <dbl> <dbl>
## 1 Alabama Autauga 55221 7.6
## 2 Alabama Baldwin 195121 7.5
## 3 Alabama Barbour 26932 17.6
## 4 Alabama Bibb 22604 8.3
## 5 Alabama Blount 57710 7.7
## 6 Alabama Bullock 10678 18
## 7 Alabama Butler 20354 10.9
## 8 Alabama Calhoun 116648 12.3
## 9 Alabama Chambers 34079 8.9
## 10 Alabama Cherokee 26008 7.9
## # … with 3,128 more rows
counties_selected %>%
mutate(unemployed_population = population * unemployment / 100)
## # A tibble: 3,138 x 5
## state county population unemployment unemployed_population
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Alabama Autauga 55221 7.6 4197.
## 2 Alabama Baldwin 195121 7.5 14634.
## 3 Alabama Barbour 26932 17.6 4740.
## 4 Alabama Bibb 22604 8.3 1876.
## 5 Alabama Blount 57710 7.7 4444.
## 6 Alabama Bullock 10678 18 1922.
## 7 Alabama Butler 20354 10.9 2219.
## 8 Alabama Calhoun 116648 12.3 14348.
## 9 Alabama Chambers 34079 8.9 3033.
## 10 Alabama Cherokee 26008 7.9 2055.
## # … with 3,128 more rows
counties_selected %>%
mutate(unemployed_population = population * unemployment / 100) %>%
arrange(desc(unemployed_population))
## # A tibble: 3,138 x 5
## state county population unemployment unemployed_population
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 California Los Angeles 10038388 10 1003839.
## 2 Illinois Cook 5236393 10.7 560294.
## 3 Texas Harris 4356362 7.5 326727.
## 4 Arizona Maricopa 4018143 7.7 309397.
## 5 California Riverside 2298032 12.9 296446.
## 6 California San Diego 3223096 8.7 280409.
## 7 Michigan Wayne 1778969 14.9 265066.
## 8 California San Bernardino 2094769 12.6 263941.
## 9 Florida Miami-Dade 2639042 10 263904.
## 10 New York Kings 2595259 10 259526.
## # … with 3,128 more rows
counties_selected <- counties %>%
select(state, county, population, public_work)
# Add a new column public_workers with the number of people employed in public work
counties_selected %>%
mutate(public_workers = population * public_work / 100) %>%
arrange(desc(public_workers))
## # A tibble: 3,138 x 5
## state county population public_work public_workers
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 California Los Angeles 10038388 11.5 1154415.
## 2 Illinois Cook 5236393 11.5 602185.
## 3 California San Diego 3223096 14.8 477018.
## 4 Arizona Maricopa 4018143 11.7 470123.
## 5 Texas Harris 4356362 10.1 439993.
## 6 New York Kings 2595259 14.4 373717.
## 7 California San Bernardino 2094769 16.7 349826.
## 8 California Riverside 2298032 14.9 342407.
## 9 California Sacramento 1465832 21.8 319551.
## 10 California Orange 3116069 10.2 317839.
## # … with 3,128 more rows
# Select the columns state, county, population, men, and women
counties_selected <- counties %>%
select(state, county, population, men, women)
# Calculate proportion_women as the fraction of the population made up of women
counties_selected %>%
mutate(proportion_women = women / population)
## # A tibble: 3,138 x 6
## state county population men women proportion_women
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Alabama Autauga 55221 26745 28476 0.516
## 2 Alabama Baldwin 195121 95314 99807 0.512
## 3 Alabama Barbour 26932 14497 12435 0.462
## 4 Alabama Bibb 22604 12073 10531 0.466
## 5 Alabama Blount 57710 28512 29198 0.506
## 6 Alabama Bullock 10678 5660 5018 0.470
## 7 Alabama Butler 20354 9502 10852 0.533
## 8 Alabama Calhoun 116648 56274 60374 0.518
## 9 Alabama Chambers 34079 16258 17821 0.523
## 10 Alabama Cherokee 26008 12975 13033 0.501
## # … with 3,128 more rows
counties %>%
# Select the five columns
select(state, county, population, men, women) %>%
# Add the proportion_men variable
mutate(proportion_men = men / population) %>%
# Filter for population of at least 10,000
filter(population >= 10000) %>%
# Arrange proportion of men in descending order
arrange(desc(proportion_men))
## # A tibble: 2,437 x 6
## state county population men women proportion_men
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Virginia Sussex 11864 8130 3734 0.685
## 2 California Lassen 32645 21818 10827 0.668
## 3 Georgia Chattahoochee 11914 7940 3974 0.666
## 4 Louisiana West Feliciana 15415 10228 5187 0.664
## 5 Florida Union 15191 9830 5361 0.647
## 6 Texas Jones 19978 12652 7326 0.633
## 7 Missouri DeKalb 12782 8080 4702 0.632
## 8 Texas Madison 13838 8648 5190 0.625
## 9 Virginia Greensville 11760 7303 4457 0.621
## 10 Texas Anderson 57915 35469 22446 0.612
## # … with 2,427 more rows
[Video]
counties %>%
count()
## # A tibble: 1 x 1
## n
## <int>
## 1 3138
counties %>%
count(state)
## # A tibble: 50 x 2
## state n
## <chr> <int>
## 1 Alabama 67
## 2 Alaska 28
## 3 Arizona 15
## 4 Arkansas 75
## 5 California 58
## 6 Colorado 64
## 7 Connecticut 8
## 8 Delaware 3
## 9 Florida 67
## 10 Georgia 159
## # … with 40 more rows
# Use count to find the number of counties in each region
counties_selected %>%
count(region, sort = TRUE)
## # A tibble: 4 x 2
## region n
## <chr> <int>
## 1 South 1420
## 2 North Central 1054
## 3 West 447
## 4 Northeast 217
# Find number of counties per state, weighted by citizens
counties_selected %>%
count(state, sort = TRUE, wt = citizens)
## # A tibble: 50 x 2
## state n
## <chr> <dbl>
## 1 California 24280349
## 2 Texas 16864864
## 3 Florida 13933052
## 4 New York 13531404
## 5 Pennsylvania 9710416
## 6 Illinois 8979999
## 7 Ohio 8709050
## 8 Michigan 7380136
## 9 North Carolina 7107998
## 10 Georgia 6978660
## # … with 40 more rows
counties_selected %>%
# Add population_walk containing the total number of people who walk to work
mutate(population_walk = population * walk / 100) %>%
# Count weighted by the new column
count(state, wt = population_walk, sort = TRUE)
## # A tibble: 50 x 2
## state n
## <chr> <dbl>
## 1 New York 1237938.
## 2 California 1017964.
## 3 Pennsylvania 505397.
## 4 Texas 430783.
## 5 Illinois 400346.
## 6 Massachusetts 316765.
## 7 Florida 284723.
## 8 New Jersey 273047.
## 9 Ohio 266911.
## 10 Washington 239764.
## # … with 40 more rows
[Video]
counties %>%
summarize(total_population = sum(population))
## # A tibble: 1 x 1
## total_population
## <dbl>
## 1 315845353
counties %>%
summarize(total_population = sum(population), average_unemployment = mean(unemployment))
## # A tibble: 1 x 2
## total_population average_unemployment
## <dbl> <dbl>
## 1 315845353 7.80
Summary functions * sum()
* mean()
* median()
* min()
* max()
* n()
counties %>%
group_by(state) %>%
summarize(total_pop = sum(population), average_unemployment = sum(unemployment))
## # A tibble: 50 x 3
## state total_pop average_unemployment
## <chr> <dbl> <dbl>
## 1 Alabama 4830620 758.
## 2 Alaska 725461 257.
## 3 Arizona 6641928 180.
## 4 Arkansas 2958208 674.
## 5 California 38421464 626.
## 6 Colorado 5278906 477.
## 7 Connecticut 3593222 65.3
## 8 Delaware 926454 23.8
## 9 Florida 19645772 696.
## 10 Georgia 10006693 1586.
## # … with 40 more rows
counties %>%
group_by(state) %>%
summarize(total_pop = sum(population), average_unemployment = mean(unemployment)) %>%
arrange(desc(average_unemployment))
## # A tibble: 50 x 3
## state total_pop average_unemployment
## <chr> <dbl> <dbl>
## 1 Mississippi 2988081 12.0
## 2 Arizona 6641928 12.0
## 3 South Carolina 4777576 11.3
## 4 Alabama 4830620 11.3
## 5 California 38421464 10.8
## 6 Nevada 2798636 10.5
## 7 North Carolina 9845333 10.5
## 8 Florida 19645772 10.4
## 9 Georgia 10006693 9.97
## 10 Michigan 9900571 9.96
## # … with 40 more rows
counties %>%
select(state, metro, county, population)
## # A tibble: 3,138 x 4
## state metro county population
## <chr> <chr> <chr> <dbl>
## 1 Alabama Metro Autauga 55221
## 2 Alabama Metro Baldwin 195121
## 3 Alabama Nonmetro Barbour 26932
## 4 Alabama Metro Bibb 22604
## 5 Alabama Metro Blount 57710
## 6 Alabama Nonmetro Bullock 10678
## 7 Alabama Nonmetro Butler 20354
## 8 Alabama Metro Calhoun 116648
## 9 Alabama Nonmetro Chambers 34079
## 10 Alabama Nonmetro Cherokee 26008
## # … with 3,128 more rows
counties %>%
group_by(state, metro) %>%
summarize(total_pop = sum(population))
## # A tibble: 97 x 3
## # Groups: state [50]
## state metro total_pop
## <chr> <chr> <dbl>
## 1 Alabama Metro 3671377
## 2 Alabama Nonmetro 1159243
## 3 Alaska Metro 494990
## 4 Alaska Nonmetro 230471
## 5 Arizona Metro 6295145
## 6 Arizona Nonmetro 346783
## 7 Arkansas Metro 1806867
## 8 Arkansas Nonmetro 1151341
## 9 California Metro 37587429
## 10 California Nonmetro 834035
## # … with 87 more rows
counties %>%
group_by(state, metro) %>%
summarize(total_pop = sum(population)) %>%
ungroup()
## # A tibble: 97 x 3
## state metro total_pop
## <chr> <chr> <dbl>
## 1 Alabama Metro 3671377
## 2 Alabama Nonmetro 1159243
## 3 Alaska Metro 494990
## 4 Alaska Nonmetro 230471
## 5 Arizona Metro 6295145
## 6 Arizona Nonmetro 346783
## 7 Arkansas Metro 1806867
## 8 Arkansas Nonmetro 1151341
## 9 California Metro 37587429
## 10 California Nonmetro 834035
## # … with 87 more rows
# Summarize to find minimum population, maximum unemployment, and average income
counties_selected %>%
summarize(min_population = min(population), max_unemployment = max(unemployment), average_income = mean(income))
## # A tibble: 1 x 3
## min_population max_unemployment average_income
## <dbl> <dbl> <dbl>
## 1 85 29.4 46832.
# Group by state and find the total area and population
counties_selected %>%
group_by(state) %>%
summarize(total_area = sum(land_area), total_population = sum(population)) %>%
mutate(density = total_population / total_area) %>%
arrange(desc(density))
## # A tibble: 50 x 4
## state total_area total_population density
## <chr> <dbl> <dbl> <dbl>
## 1 New Jersey 7354. 8904413 1211.
## 2 Rhode Island 1034. 1053661 1019.
## 3 Massachusetts 7800. 6705586 860.
## 4 Connecticut 4842. 3593222 742.
## 5 Maryland 9707. 5930538 611.
## 6 Delaware 1949. 926454 475.
## 7 New York 47126. 19673174 417.
## 8 Florida 53625. 19645772 366.
## 9 Pennsylvania 44743. 12779559 286.
## 10 Ohio 40861. 11575977 283.
## # … with 40 more rows
# Summarize to find the total population
counties_selected %>%
group_by(region, state) %>%
summarize(total_pop = sum(population)) %>%
# Calculate the average_pop and median_pop columns
summarize(average_pop = mean(total_pop),
median_pop = median(total_pop))
## # A tibble: 4 x 3
## region average_pop median_pop
## <chr> <dbl> <dbl>
## 1 North Central 5627687. 5580644
## 2 Northeast 6221058. 3593222
## 3 South 7370486 4804098
## 4 West 5722755. 2798636
[Video]
Michael is a hybrid thinker and doer—a byproduct of being a CliftonStrengths “Learner” over time. With 20+ years of engineering, design, and product experience, he helps organizations identify market needs, mobilize internal and external resources, and deliver delightful digital customer experiences that align with business goals. He has been entrusted with problem-solving for brands—ranging from Fortune 500 companies to early-stage startups to not-for-profit organizations.
Michael earned his BS in Computer Science from New York Institute of Technology and his MBA from the University of Maryland, College Park. He is also a candidate to receive his MS in Applied Analytics from Columbia University.
LinkedIn | Twitter | www.michaelmallari.com/data | www.columbia.edu/~mm5470