Getting started with ggplot2

library(tidyverse)

2.2 Exercises

  1. List five functions that you could use to get more information about the mpg dataset.
list(
  variables = length(mpg),
  observations = nrow(mpg),
  symmary = summary(mpg),
  class = class(mpg)
)
$variables
[1] 11

$observations
[1] 234

$symmary
 manufacturer          model               displ            year           cyl       
 Length:234         Length:234         Min.   :1.600   Min.   :1999   Min.   :4.000  
 Class :character   Class :character   1st Qu.:2.400   1st Qu.:1999   1st Qu.:4.000  
 Mode  :character   Mode  :character   Median :3.300   Median :2004   Median :6.000  
                                       Mean   :3.472   Mean   :2004   Mean   :5.889  
                                       3rd Qu.:4.600   3rd Qu.:2008   3rd Qu.:8.000  
                                       Max.   :7.000   Max.   :2008   Max.   :8.000  
    trans               drv                 cty             hwy       
 Length:234         Length:234         Min.   : 9.00   Min.   :12.00  
 Class :character   Class :character   1st Qu.:14.00   1st Qu.:18.00  
 Mode  :character   Mode  :character   Median :17.00   Median :24.00  
                                       Mean   :16.86   Mean   :23.44  
                                       3rd Qu.:19.00   3rd Qu.:27.00  
                                       Max.   :35.00   Max.   :44.00  
      fl               class          
 Length:234         Length:234        
 Class :character   Class :character  
 Mode  :character   Mode  :character  
                                      
                                      
                                      

$class
[1] "tbl_df"     "tbl"        "data.frame"
  1. How can you find out what other datasets are included with ggplot2?
  1. Apart from the US, most countries use fuel consumption (fuel consumed over fixed distance) rather than fuel economy (distance travelled with fixed amount of fuel). How could you convert cty and hwy into the European standard of l/100km?
  • cty city miles per gallon
  • hwy highway miles per gallon

tibble(
  cty = mpg$cty,
  hwy = mpg$hwy,
  gpmforcty =  map_dbl(mpg$cty, function(x) 1 / x),
  gpmforhwy =  map_dbl(mpg$hwy, function(x) 1 / x)
)
NA
  1. Which manufacturer has the most models in this dataset? Which model has the most variations? Does your answer change if you remove the redundant specification of drive train (e.g. “pathfinder 4wd”, “a4 quattro”) from the model name?
# TODO
ggplot(mpg, aes(x = displ, y = hwy)) + 
  geom_point(aes(color = class)) + 
  geom_abline()

2.3 Exercises

  1. How would you describe the relationship between cty and hwy? Do you have any concerns about drawing conclusions from that plot?
ggplot(mpg, aes(x = cty, y = hwy)) +
  geom_point()


cor(mpg$cty, mpg$hwy)
[1] 0.9559159
  1. What does ggplot(mpg, aes(model, manufacturer)) + geom_point() show? Is it useful? How could you modify the data to make it more informative?
ggplot(mpg, aes(model, manufacturer)) + 
  geom_point() +
  geom_jitter()

  1. Describe the data, aesthetic mappings and layers used for each of the following plots. You’ll need to guess a little because you haven’t seen all the datasets and functions yet, but use your common sense! See if you can predict what the plot will look like before running the code.

  2. ggplot(mpg, aes(cty, hwy)) + geom_point()

  3. ggplot(diamonds, aes(carat, price)) + geom_point()

  4. ggplot(economics, aes(date, unemploy)) + geom_line()

  5. ggplot(mpg, aes(cty)) + geom_histogram()

ggplot(mpg, aes(cty, hwy)) + geom_point()

ggplot(diamonds, aes(carat, price)) + geom_point()

ggplot(economics, aes(date, unemploy)) + geom_line()

ggplot(mpg, aes(cty)) + geom_histogram()

Instead of trying to make one very complex plot that shows everything at once, see if you can create a series of simple plots that tell a story, leading the reader from ignorance to knowledge.

LS0tCnRpdGxlOiAiZ2dwbG90MiIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIyMgR2V0dGluZyBzdGFydGVkIHdpdGggZ2dwbG90MgoKYGBge3J9CmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKCmBgYHtyfQptcGcKYGBgCgojIyMgMi4yIEV4ZXJjaXNlcwoKMS4gTGlzdCBmaXZlIGZ1bmN0aW9ucyB0aGF0IHlvdSBjb3VsZCB1c2UgdG8gZ2V0IG1vcmUgaW5mb3JtYXRpb24gYWJvdXQgdGhlIG1wZyBkYXRhc2V0LgoKYGBge3J9Cmxpc3QoCiAgdmFyaWFibGVzID0gbGVuZ3RoKG1wZyksCiAgb2JzZXJ2YXRpb25zID0gbnJvdyhtcGcpLAogIHN5bW1hcnkgPSBzdW1tYXJ5KG1wZyksCiAgY2xhc3MgPSBjbGFzcyhtcGcpCikKYGBgCgoyLiBIb3cgY2FuIHlvdSBmaW5kIG91dCB3aGF0IG90aGVyIGRhdGFzZXRzIGFyZSBpbmNsdWRlZCB3aXRoIGdncGxvdDI/CgogIC0gW2RhdGFzZXRzIGJ1bmRsZWQgd2l0aCBnZ3Bsb3QyXShodHRwczovL2dncGxvdDIudGlkeXZlcnNlLm9yZy9yZWZlcmVuY2UvaW5kZXguaHRtbCNzZWN0aW9uLWRhdGEpCiAgCiAgCjMuIEFwYXJ0IGZyb20gdGhlIFVTLCBtb3N0IGNvdW50cmllcyB1c2UgZnVlbCBjb25zdW1wdGlvbiAoZnVlbCBjb25zdW1lZCBvdmVyIGZpeGVkIGRpc3RhbmNlKSByYXRoZXIgdGhhbiBmdWVsIGVjb25vbXkgKGRpc3RhbmNlIHRyYXZlbGxlZCB3aXRoIGZpeGVkIGFtb3VudCBvZiBmdWVsKS4gSG93IGNvdWxkIHlvdSBjb252ZXJ0IGN0eSBhbmQgaHd5IGludG8gdGhlIEV1cm9wZWFuIHN0YW5kYXJkIG9mIGwvMTAwa20/CiAgCiAgLSBgY3R5YCBjaXR5IG1pbGVzIHBlciBnYWxsb24KICAtIGBod3lgIGhpZ2h3YXkgbWlsZXMgcGVyIGdhbGxvbgogIApgYGB7cn0KCnRpYmJsZSgKICBjdHkgPSBtcGckY3R5LAogIGh3eSA9IG1wZyRod3ksCiAgZ3BtZm9yY3R5ID0gIG1hcF9kYmwobXBnJGN0eSwgZnVuY3Rpb24oeCkgMSAvIHgpLAogIGdwbWZvcmh3eSA9ICBtYXBfZGJsKG1wZyRod3ksIGZ1bmN0aW9uKHgpIDEgLyB4KQopCgpgYGAKCjQuIFdoaWNoIG1hbnVmYWN0dXJlciBoYXMgdGhlIG1vc3QgbW9kZWxzIGluIHRoaXMgZGF0YXNldD8gV2hpY2ggbW9kZWwgaGFzIHRoZSBtb3N0IHZhcmlhdGlvbnM/IERvZXMgeW91ciBhbnN3ZXIgY2hhbmdlIGlmIHlvdSByZW1vdmUgdGhlIHJlZHVuZGFudCBzcGVjaWZpY2F0aW9uIG9mIGRyaXZlIHRyYWluIChlLmcuIOKAnHBhdGhmaW5kZXIgNHdk4oCdLCDigJxhNCBxdWF0dHJv4oCdKSBmcm9tIHRoZSBtb2RlbCBuYW1lPwoKICAKYGBge3J9CiMgVE9ETwpgYGAKCgpgYGB7cn0KZ2dwbG90KG1wZywgYWVzKHggPSBkaXNwbCwgeSA9IGh3eSkpICsgCiAgZ2VvbV9wb2ludChhZXMoY29sb3IgPSBjbGFzcykpICsgCmBgYAoKCiMjIyAyLjMgRXhlcmNpc2VzCgoxLiBIb3cgd291bGQgeW91IGRlc2NyaWJlIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBjdHkgYW5kIGh3eT8gRG8geW91IGhhdmUgYW55IGNvbmNlcm5zIGFib3V0IGRyYXdpbmcgY29uY2x1c2lvbnMgZnJvbSB0aGF0IHBsb3Q/CgpgYGB7cn0KZ2dwbG90KG1wZywgYWVzKHggPSBjdHksIHkgPSBod3kpKSArCiAgZ2VvbV9wb2ludCgpCgpjb3IobXBnJGN0eSwgbXBnJGh3eSkgIyAwLjk1NTkxNTkKYGBgCgoyLiBXaGF0IGRvZXMgZ2dwbG90KG1wZywgYWVzKG1vZGVsLCBtYW51ZmFjdHVyZXIpKSArIGdlb21fcG9pbnQoKSBzaG93PyBJcyBpdCB1c2VmdWw/IEhvdyBjb3VsZCB5b3UgbW9kaWZ5IHRoZSBkYXRhIHRvIG1ha2UgaXQgbW9yZSBpbmZvcm1hdGl2ZT8KCmBgYHtyfQpnZ3Bsb3QobXBnLCBhZXMobW9kZWwsIG1hbnVmYWN0dXJlcikpICsgCiAgZ2VvbV9wb2ludCgpICsKICBnZW9tX2ppdHRlcigpCmBgYAoKMy4gRGVzY3JpYmUgdGhlIGRhdGEsIGFlc3RoZXRpYyBtYXBwaW5ncyBhbmQgbGF5ZXJzIHVzZWQgZm9yIGVhY2ggb2YgdGhlIGZvbGxvd2luZyBwbG90cy4gWW914oCZbGwgbmVlZCB0byBndWVzcyBhIGxpdHRsZSBiZWNhdXNlIHlvdSBoYXZlbuKAmXQgc2VlbiBhbGwgdGhlIGRhdGFzZXRzIGFuZCBmdW5jdGlvbnMgeWV0LCBidXQgdXNlIHlvdXIgY29tbW9uIHNlbnNlISBTZWUgaWYgeW91IGNhbiBwcmVkaWN0IHdoYXQgdGhlIHBsb3Qgd2lsbCBsb29rIGxpa2UgYmVmb3JlIHJ1bm5pbmcgdGhlIGNvZGUuCgogIDEuIGdncGxvdChtcGcsIGFlcyhjdHksIGh3eSkpICsgZ2VvbV9wb2ludCgpCiAgMi4gZ2dwbG90KGRpYW1vbmRzLCBhZXMoY2FyYXQsIHByaWNlKSkgKyBnZW9tX3BvaW50KCkKICAzLiBnZ3Bsb3QoZWNvbm9taWNzLCBhZXMoZGF0ZSwgdW5lbXBsb3kpKSArIGdlb21fbGluZSgpCiAgNC4gZ2dwbG90KG1wZywgYWVzKGN0eSkpICsgZ2VvbV9oaXN0b2dyYW0oKQoKYGBge3J9CmdncGxvdChtcGcsIGFlcyhjdHksIGh3eSkpICsgZ2VvbV9wb2ludCgpCmdncGxvdChkaWFtb25kcywgYWVzKGNhcmF0LCBwcmljZSkpICsgZ2VvbV9wb2ludCgpCmdncGxvdChlY29ub21pY3MsIGFlcyhkYXRlLCB1bmVtcGxveSkpICsgZ2VvbV9saW5lKCkKZ2dwbG90KG1wZywgYWVzKGN0eSkpICsgZ2VvbV9oaXN0b2dyYW0oKQpgYGAKCj4gSW5zdGVhZCBvZiB0cnlpbmcgdG8gbWFrZSBvbmUgdmVyeSBjb21wbGV4IHBsb3QgdGhhdCBzaG93cyBldmVyeXRoaW5nIGF0IG9uY2UsIHNlZSBpZiB5b3UgY2FuIGNyZWF0ZSBhIHNlcmllcyBvZiBzaW1wbGUgcGxvdHMgdGhhdCB0ZWxsIGEgc3RvcnksIGxlYWRpbmcgdGhlIHJlYWRlciBmcm9tIGlnbm9yYW5jZSB0byBrbm93bGVkZ2UuCgo=