Mini Lesson 5: Diverging Lollipop Chart
Lollipop charts convey the same information as bar charts and diverging bar but with a more modern look.
Rather than using geom_bar, geom_point and geom_segment was used to get the “lollipop” look.
#start with loading the data and packages
library(openintro)
## Please visit openintro.org for free statistics materials
##
## Attaching package: 'openintro'
## The following objects are masked from 'package:datasets':
##
## cars, trees
library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
cc <- countyComplete
theme_set(theme_gray(base_size = 18))
#Looking over data and selecting whatever i need to make a lollipop chart
glimpse(cc)
## Observations: 3,143
## Variables: 53
## $ state <fctr> Alabama, Alabama, A...
## $ name <fctr> Autauga County, Bal...
## $ FIPS <dbl> 1001, 1003, 1005, 10...
## $ pop2010 <dbl> 54571, 182265, 27457...
## $ pop2000 <dbl> 43671, 140415, 29038...
## $ age_under_5 <dbl> 6.6, 6.1, 6.2, 6.0, ...
## $ age_under_18 <dbl> 26.8, 23.0, 21.9, 22...
## $ age_over_65 <dbl> 12.0, 16.8, 14.2, 12...
## $ female <dbl> 51.3, 51.1, 46.9, 46...
## $ white <dbl> 78.5, 85.7, 48.0, 75...
## $ black <dbl> 17.7, 9.4, 46.9, 22....
## $ native <dbl> 0.4, 0.7, 0.4, 0.3, ...
## $ asian <dbl> 0.9, 0.7, 0.4, 0.1, ...
## $ pac_isl <dbl> NA, NA, NA, NA, NA, ...
## $ two_plus_races <dbl> 1.6, 1.5, 0.9, 0.9, ...
## $ hispanic <dbl> 2.4, 4.4, 5.1, 1.8, ...
## $ white_not_hispanic <dbl> 77.2, 83.5, 46.8, 75...
## $ no_move_in_one_plus_year <dbl> 86.3, 83.0, 83.0, 90...
## $ foreign_born <dbl> 2.0, 3.6, 2.8, 0.7, ...
## $ foreign_spoken_at_home <dbl> 3.7, 5.5, 4.7, 1.5, ...
## $ hs_grad <dbl> 85.3, 87.6, 71.9, 74...
## $ bachelors <dbl> 21.7, 26.8, 13.5, 10...
## $ veterans <dbl> 5817, 20396, 2327, 1...
## $ mean_work_travel <dbl> 25.1, 25.8, 23.8, 28...
## $ housing_units <dbl> 22135, 104061, 11829...
## $ home_ownership <dbl> 77.5, 76.7, 68.0, 82...
## $ housing_multi_unit <dbl> 7.2, 22.6, 11.1, 6.6...
## $ median_val_owner_occupied <dbl> 133900, 177200, 8820...
## $ households <dbl> 19718, 69476, 9795, ...
## $ persons_per_household <dbl> 2.70, 2.50, 2.52, 3....
## $ per_capita_income <dbl> 24568, 26469, 15875,...
## $ median_household_income <dbl> 53255, 50147, 33219,...
## $ poverty <dbl> 10.6, 12.2, 25.0, 12...
## $ private_nonfarm_establishments <dbl> 877, 4812, 522, 318,...
## $ private_nonfarm_employment <dbl> 10628, 52233, 7990, ...
## $ percent_change_private_nonfarm_employment <dbl> 16.6, 17.4, -27.0, -...
## $ nonemployment_establishments <dbl> 2971, 14175, 1527, 1...
## $ firms <dbl> 4067, 19035, 1667, 1...
## $ black_owned_firms <dbl> 15.2, 2.7, NA, 14.9,...
## $ native_owned_firms <dbl> NA, 0.4, NA, NA, NA,...
## $ asian_owned_firms <dbl> 1.3, 1.0, NA, NA, NA...
## $ pac_isl_owned_firms <dbl> NA, NA, NA, NA, NA, ...
## $ hispanic_owned_firms <dbl> 0.7, 1.3, NA, NA, NA...
## $ women_owned_firms <dbl> 31.7, 27.3, 27.0, NA...
## $ manufacturer_shipments_2007 <dbl> NA, 1410273, NA, 0, ...
## $ mercent_whole_sales_2007 <dbl> NA, NA, NA, NA, NA, ...
## $ sales <dbl> 598175, 2966489, 188...
## $ sales_per_capita <dbl> 12003, 17166, 6334, ...
## $ accommodation_food_service <dbl> 88157, 436955, NA, 1...
## $ building_permits <dbl> 191, 696, 10, 8, 18,...
## $ fed_spending <dbl> 331142, 1119082, 240...
## $ area <dbl> 594.44, 1589.78, 884...
## $ density <dbl> 91.8, 114.6, 31.0, 3...
summary(cc)
## state name FIPS
## Texas : 254 Washington County: 30 Min. : 1001
## Georgia : 159 Jefferson County : 25 1st Qu.:18178
## Virginia: 134 Franklin County : 24 Median :29177
## Kentucky: 120 Jackson County : 23 Mean :30390
## Missouri: 115 Lincoln County : 23 3rd Qu.:45082
## Kansas : 105 Madison County : 19 Max. :56045
## (Other) :2256 (Other) :2999
## pop2010 pop2000 age_under_5 age_under_18
## Min. : 82 Min. : 67 Min. : 0.000 Min. : 0.00
## 1st Qu.: 11104 1st Qu.: 11210 1st Qu.: 5.500 1st Qu.:21.40
## Median : 25857 Median : 24608 Median : 6.200 Median :23.30
## Mean : 98233 Mean : 89623 Mean : 6.261 Mean :23.42
## 3rd Qu.: 66699 3rd Qu.: 61766 3rd Qu.: 6.800 3rd Qu.:25.10
## Max. :9818605 Max. :9519338 Max. :12.600 Max. :41.60
## NA's :3
## age_over_65 female white black
## Min. : 3.50 Min. :27.90 Min. : 2.70 Min. : 0.000
## 1st Qu.:13.10 1st Qu.:49.60 1st Qu.:75.25 1st Qu.: 0.500
## Median :15.60 Median :50.50 Median :89.10 Median : 2.000
## Mean :15.88 Mean :50.03 Mean :82.89 Mean : 8.931
## 3rd Qu.:18.20 3rd Qu.:51.10 3rd Qu.:95.50 3rd Qu.:10.200
## Max. :43.40 Max. :56.80 Max. :99.20 Max. :85.700
## NA's :17
## native asian pac_isl two_plus_races
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.100
## 1st Qu.: 0.200 1st Qu.: 0.300 1st Qu.: 0.000 1st Qu.: 1.100
## Median : 0.400 Median : 0.500 Median : 0.000 Median : 1.600
## Mean : 2.026 Mean : 1.166 Mean : 0.154 Mean : 1.976
## 3rd Qu.: 0.800 3rd Qu.: 1.000 3rd Qu.: 0.100 3rd Qu.: 2.300
## Max. :96.000 Max. :43.900 Max. :48.900 Max. :29.500
## NA's :4 NA's :24 NA's :1697
## hispanic white_not_hispanic no_move_in_one_plus_year
## Min. : 0.000 Min. : 2.70 Min. : 51.6
## 1st Qu.: 1.600 1st Qu.:66.95 1st Qu.: 83.2
## Median : 3.300 Median :85.80 Median : 86.3
## Mean : 8.284 Mean :78.29 Mean : 85.8
## 3rd Qu.: 8.200 3rd Qu.:94.20 3rd Qu.: 89.0
## Max. :95.700 Max. :99.20 Max. :100.0
##
## foreign_born foreign_spoken_at_home hs_grad bachelors
## Min. : 0.000 Min. : 0.000 Min. :47.90 Min. : 3.70
## 1st Qu.: 1.200 1st Qu.: 2.800 1st Qu.:78.40 1st Qu.:13.10
## Median : 2.400 Median : 4.800 Median :84.60 Median :16.90
## Mean : 4.372 Mean : 9.057 Mean :83.11 Mean :19.03
## 3rd Qu.: 5.300 3rd Qu.:10.000 3rd Qu.:88.60 3rd Qu.:22.60
## Max. :72.200 Max. :96.000 Max. :99.30 Max. :71.00
##
## veterans mean_work_travel housing_units home_ownership
## Min. : 0 Min. : 4.30 Min. : 50 Min. : 0.00
## 1st Qu.: 958 1st Qu.:19.00 1st Qu.: 5416 1st Qu.:69.50
## Median : 2180 Median :22.40 Median : 12162 Median :74.60
## Mean : 7207 Mean :22.73 Mean : 41904 Mean :73.26
## 3rd Qu.: 5944 3rd Qu.:26.10 3rd Qu.: 30574 3rd Qu.:78.40
## Max. :368128 Max. :44.20 Max. :3445076 Max. :91.30
##
## housing_multi_unit median_val_owner_occupied households
## Min. : 0.00 Min. : 0 Min. : 22
## 1st Qu.: 6.10 1st Qu.: 80200 1st Qu.: 4260
## Median : 9.70 Median : 105900 Median : 9868
## Mean :12.33 Mean : 132545 Mean : 36346
## 3rd Qu.:15.90 3rd Qu.: 152950 3rd Qu.: 25358
## Max. :98.50 Max. :1000001 Max. :3217889
##
## persons_per_household per_capita_income median_household_income
## Min. :1.100 Min. : 7772 Min. : 19351
## 1st Qu.:2.370 1st Qu.:19030 1st Qu.: 36952
## Median :2.490 Median :21773 Median : 42445
## Mean :2.513 Mean :22505 Mean : 44270
## 3rd Qu.:2.630 3rd Qu.:24814 3rd Qu.: 49142
## Max. :4.470 Max. :64381 Max. :115574
##
## poverty private_nonfarm_establishments private_nonfarm_employment
## Min. : 0.0 Min. : 0 Min. : 0
## 1st Qu.:11.0 1st Qu.: 229 1st Qu.: 2109
## Median :14.7 Median : 551 Median : 6351
## Mean :15.5 Mean : 2362 Mean : 35656
## 3rd Qu.:19.0 3rd Qu.: 1484 3rd Qu.: 19436
## Max. :53.5 Max. :245523 Max. :3703233
##
## percent_change_private_nonfarm_employment nonemployment_establishments
## Min. :-83.2000 Min. : 21
## 1st Qu.:-12.0000 1st Qu.: 729
## Median : -2.0000 Median : 1594
## Mean : 0.5338 Mean : 6720
## 3rd Qu.: 9.8000 3rd Qu.: 4130
## Max. :386.5000 Max. :821177
## NA's :67 NA's :5
## firms black_owned_firms native_owned_firms asian_owned_firms
## Min. : 27 Min. : 0.200 Min. : 0.200 Min. : 0.300
## 1st Qu.: 1074 1st Qu.: 2.100 1st Qu.: 0.525 1st Qu.: 1.400
## Median : 2350 Median : 5.700 Median : 0.900 Median : 2.200
## Mean : 9301 Mean : 9.806 Mean : 3.785 Mean : 3.422
## 3rd Qu.: 6034 3rd Qu.:13.350 3rd Qu.: 2.300 3rd Qu.: 3.700
## Max. :1046940 Max. :66.700 Max. :71.800 Max. :56.600
## NA's :176 NA's :2376 NA's :2653 NA's :2408
## pac_isl_owned_firms hispanic_owned_firms women_owned_firms
## Min. : 0.0000 Min. : 0.300 Min. : 6.50
## 1st Qu.: 0.1000 1st Qu.: 1.400 1st Qu.:22.70
## Median : 0.1000 Median : 2.800 Median :26.20
## Mean : 0.7171 Mean : 6.811 Mean :25.96
## 3rd Qu.: 0.3000 3rd Qu.: 6.800 3rd Qu.:29.20
## Max. :10.5000 Max. :78.000 Max. :56.20
## NA's :3073 NA's :2363 NA's :970
## manufacturer_shipments_2007 mercent_whole_sales_2007 sales
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 0 1st Qu.: 42125 1st Qu.: 79988
## Median : 238180 Median : 138930 Median : 257667
## Mean : 1680613 Mean : 1794262 Mean : 1262270
## 3rd Qu.: 1161878 3rd Qu.: 562056 3rd Qu.: 791435
## Max. :169275136 Max. :205478751 Max. :119111840
## NA's :488 NA's :1022 NA's :42
## sales_per_capita accommodation_food_service building_permits
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 6993 1st Qu.: 9349 1st Qu.: 5
## Median : 9793 Median : 31065 Median : 32
## Mean :10375 Mean : 211181 Mean : 192
## 3rd Qu.:12980 3rd Qu.: 110695 3rd Qu.: 123
## Max. :80800 Max. :24857836 Max. :15039
## NA's :42 NA's :272
## fed_spending area density
## Min. : 0 Min. : 2.0 Min. : 0.0
## 1st Qu.: 102922 1st Qu.: 430.7 1st Qu.: 16.9
## Median : 214994 Median : 615.6 Median : 45.2
## Mean : 944376 Mean : 1123.7 Mean : 259.3
## 3rd Qu.: 522228 3rd Qu.: 924.0 3rd Qu.: 113.8
## Max. :80457156 Max. :145504.8 Max. :69467.5
## NA's :4
#selecting the data that is needed for percentage speaking house
cc2 <- cc %>%
select(state, name, foreign_spoken_at_home) %>%
group_by(state) %>%
summarise(foreign_spoken_at_home_percent = mean(foreign_spoken_at_home))
#plotting graph for percentage of foreign speaking houses
ggplot(cc2, aes(x = reorder(state, foreign_spoken_at_home_percent), y = foreign_spoken_at_home_percent)) +
geom_point(stat = "identity", fill = "black", size = 6) +
geom_segment(aes(y = 0,
x = state,
yend = foreign_spoken_at_home_percent,
xend = state),
color = "black") + #this function creates the lines leading to points
labs(title = "Lollipop Chart",
subtitle = "Percentage of foriegn speaking per state") +
coord_flip()
## Dyplr for second graph
#selecting data that is needed for percentage of foreign born
cc3 <- cc %>%
select(state, name, foreign_born) %>%
group_by(state) %>%
summarise(foreign_born_percentage = mean(foreign_born))
#plotting graph for percentage of foreign borns
ggplot(cc3, aes(x = reorder(state, foreign_born_percentage), y = foreign_born_percentage)) +
geom_point(stat = "identity", color = "red", size = 6) +
geom_segment(aes(y = 0,
x = state,
yend = foreign_born_percentage,
xend = state),
color = "black") + #this function creates the lines leading to points
labs(title = "Lollipop Chart",
subtitle = "Percentage of foriegn speaking per state") +
ylim(0, 35) + #keeping all graphs consistent
coord_flip()
## Dyplr for third graph
cc4 <- cc %>%
select(state, name, foreign_born, foreign_spoken_at_home) %>%
group_by(state) %>%
summarise(foreign_spoken_at_home_percent = mean(foreign_spoken_at_home),
foreign_born_percent = mean(foreign_born))
#comparison of the two graphs
ggplot(cc4, aes(x = reorder(state, foreign_spoken_at_home_percent), y = foreign_spoken_at_home_percent, label = foreign_spoken_at_home_percent)) +
geom_point(stat = "identity", fill = "black", size = 6) +
geom_point(aes(y = foreign_born_percent), stat = "identity", color = "red", size = 4) +
geom_segment(aes(y = 0,
x = state,
yend = foreign_spoken_at_home_percent,
xend = state),
color = "black") + #this function creates the lines leading to points, black
geom_segment(aes(y = 0,
x = state,
yend = foreign_born_percent,
xend = state),
alpha = 0.5,
size = 2,
color = "red") + #this function creates the lines leading to points, red
labs(title = "Lollipop Chart",
subtitle = "Percentage of foriegn speaking per state") +
coord_flip()