English Statistician Frank Anscombe designed four short datasets with de aim of demonstrate the importance of visualising data and the dangers of reliance on simple summary statistics.
His original paper Graphs in Statistical Analysis can be retrieved from JSTOR website.
x<- rnorm(1000)
x
## [1] 0.448456951 -1.144926149 0.774313946 -0.351243769 1.466452963
## [6] 0.809274119 0.388809542 0.844816406 -1.908873816 0.114630009
## [11] -0.349978294 0.867896752 -0.456897752 -0.104370321 1.432036294
## [16] 1.089690684 -2.358035231 -0.510653220 1.665276333 0.008021918
## [21] -0.572789014 -0.846165211 0.282014758 1.657878371 0.403178948
## [26] -0.825430334 -0.058643863 0.825070087 -0.796128775 2.153777205
## [31] 0.672949961 1.372970296 1.727462486 1.951216462 0.370116111
## [36] 0.764021265 0.648104388 -0.499386688 -1.061971519 0.393517655
## [41] 1.713897489 0.667398736 -1.353305213 -0.115070471 -0.415747200
## [46] -0.605395381 1.192974548 0.450541319 0.138518956 1.084955014
## [51] -0.262223227 -0.267605429 -0.678003944 -0.597539545 -0.643654568
## [56] -1.757897572 -0.320875400 -0.204660792 0.186901760 -0.959956907
## [61] -1.313367305 -0.460342212 1.621052796 1.109910347 1.151645393
## [66] 0.535815518 -0.951274638 1.024397445 0.106534357 1.297448512
## [71] 0.489879703 0.091459649 -1.189211240 0.009424147 0.026959992
## [76] 0.116246051 -1.060806814 -0.312326482 0.487936771 1.531520418
## [81] 1.559552928 -0.581668223 0.162537702 0.190111743 -0.703706074
## [86] 0.220555565 1.140645247 -0.201800043 1.447874553 -1.156359301
## [91] -0.101288804 -1.509395728 -0.866651951 -0.066492803 0.089276864
## [96] 2.340955478 0.181120982 -1.063438720 -0.825138387 0.819547543
## [101] 0.015860867 -0.160683314 0.585132823 0.959527777 0.244138107
## [106] 0.431777945 -1.054097427 0.641954570 -0.173361771 1.406417050
## [111] 1.074990176 -0.219147972 0.564041937 1.528831476 -0.780081375
## [116] -0.968585765 -0.186334644 -0.493983076 1.196191122 -0.588922212
## [121] 0.294875481 0.695606061 1.521471021 -1.057272926 2.619837138
## [126] -1.439563502 1.675270316 0.223058594 1.846061859 -0.579759508
## [131] 0.669306662 -1.574086685 -2.087668269 -0.221235603 -0.553560082
## [136] -1.008932895 1.343109335 -0.434491564 -0.127747098 0.478526133
## [141] -0.892087251 -0.814743746 -0.206091373 -2.127419610 1.202465309
## [146] 0.441045925 -0.288935807 0.815924932 -0.426706960 -0.561634187
## [151] -0.306982974 0.311785403 2.450296315 -0.252088774 0.785901037
## [156] -1.225196880 -0.111146339 -1.015828977 -0.322638580 0.075161836
## [161] 1.219672167 -0.515435128 0.847354230 0.670645135 1.740925756
## [166] 1.395793976 1.093945186 -0.233511012 0.777789828 1.208128625
## [171] -1.579933030 -0.721043072 -0.827091341 -1.671248620 -0.348564296
## [176] 2.080029547 -0.679487103 -0.708002559 -0.949764600 0.327769004
## [181] -0.345097742 -0.513461778 1.843263762 -0.700944975 1.160910226
## [186] 0.755561364 -0.542966750 0.766864784 1.975266133 -0.879742660
## [191] -1.564470024 -0.555747316 1.606194204 -0.944350052 0.791255382
## [196] -0.239104490 0.077323305 0.072754951 -0.618806222 -0.283485549
## [201] 0.957736796 -0.621908466 2.302632144 -1.119635747 -0.452503148
## [206] -0.837399732 -1.022411086 -1.326189572 -0.649580527 -1.341096008
## [211] -0.018346097 -0.118661420 0.032250664 -0.635566554 -0.556440890
## [216] -0.573579971 -0.760686419 0.176134405 0.396030348 0.086913094
## [221] -0.995180698 0.954021023 -0.861111030 -1.011141286 -0.319644062
## [226] 0.497599452 -0.846704407 -0.614084457 -1.086431551 0.416539807
## [231] 0.761146230 0.476475286 -0.015760323 0.287893021 -3.276002983
## [236] -1.619484191 -0.095136525 1.297662607 1.428545191 2.364529955
## [241] -1.670547403 0.638899223 0.469127013 -0.133615036 -0.636457543
## [246] -1.856018021 -0.843327352 -1.420451180 -0.218803681 0.370489685
## [251] 1.015071526 0.215297831 -0.442259716 1.914029264 1.519221712
## [256] -0.045220637 -0.877788924 1.306804943 0.418975615 0.027642709
## [261] -1.248381670 -1.464767835 -0.735006375 0.407219361 2.303405971
## [266] 0.926862982 1.412641856 -1.142870768 0.345343422 0.353856222
## [271] -1.005231528 1.355442022 1.225557424 -0.150303935 -1.724536945
## [276] -0.625599696 0.715800934 -1.129474442 1.467957361 -0.440897315
## [281] -0.391040082 0.035334059 1.945048979 1.501377929 0.465176434
## [286] -0.467544792 -0.470122343 1.023270102 0.772405156 0.559393318
## [291] -0.951563346 1.242054353 -0.302033225 1.593687330 -1.558636950
## [296] -2.091339931 -0.133205315 0.885213341 -0.516181215 0.325361575
## [301] -0.771355807 1.136219568 -1.075762023 -1.221434067 -0.735703045
## [306] -1.381884654 0.464761257 -0.971295397 -1.559386218 1.362167363
## [311] -1.107840976 -0.803680179 -0.396001816 0.097029209 1.083382053
## [316] -0.281241776 0.110465958 -0.378578262 -1.058500110 -1.004736705
## [321] -2.482752645 -1.715507977 -1.228229565 -1.350460308 1.049798266
## [326] -0.615606737 -0.123998102 -0.473473145 0.689817798 0.748271810
## [331] -0.438909708 -1.309178557 0.135450376 1.203567869 -1.253833425
## [336] 2.077713064 -0.090838422 -1.734777090 0.846325111 -0.855183401
## [341] 0.097133165 -1.074911320 0.228739175 0.248961862 -0.059913976
## [346] 0.037558999 -1.911334772 -0.541108275 0.275511600 1.422855043
## [351] 0.202456753 -0.043862587 -0.766130073 0.320333000 0.414930129
## [356] -1.637898052 -0.665330568 -0.807474329 -2.331976455 -0.519293332
## [361] 2.261516112 2.541945903 0.735764099 -1.572717516 -0.216960232
## [366] -1.386225520 0.098056221 -0.278453710 -1.243748260 0.305916821
## [371] -0.709927009 -0.419418492 0.161683569 0.102163607 -0.968448157
## [376] 0.402981522 2.000433798 0.012613351 0.309047468 -1.142629306
## [381] -1.831866354 0.374051084 0.810583022 -0.462904382 -0.056013858
## [386] 0.226598772 -0.053271766 -0.243134671 0.409780597 0.885270549
## [391] -0.731706183 1.404423333 -1.639574884 -0.517931625 -0.239502118
## [396] -0.933583020 1.190221828 -1.585790129 0.957276783 0.416564812
## [401] 1.145672612 0.243031257 0.903258470 1.052341725 -1.344247000
## [406] -0.284941152 0.864547584 -0.795227347 -1.568499317 0.396455536
## [411] -0.189870253 1.150337150 0.162611201 0.138561697 -0.107721296
## [416] -0.405550760 -1.817582033 -0.337548783 0.909731633 0.338579470
## [421] -0.667427208 -1.043777514 -0.584441908 0.394706803 -0.765263672
## [426] -0.507130977 -0.471380735 0.533891339 -1.346288198 -0.667821836
## [431] 0.890616202 1.418208544 -0.990379246 -0.214846681 -1.844011779
## [436] 0.222389729 -1.360498660 -0.015222300 0.316602463 0.097162342
## [441] -0.405768866 -0.237101262 -0.041933016 0.221589014 3.169117689
## [446] 2.078588663 0.030760611 -1.893750789 -2.058143121 -0.505855829
## [451] 0.143908426 0.145313656 -0.064120247 1.355066938 2.985237868
## [456] -0.384899261 0.379209687 -0.002990724 -1.784154697 -0.377011164
## [461] 1.509975277 -0.952310388 0.489543223 -1.492082153 -0.299199689
## [466] -0.089770795 -1.865615675 1.140488535 -1.473035044 0.050689866
## [471] -0.552888374 0.048661851 0.646574244 -0.477374635 -0.018307030
## [476] -3.026307585 0.266978945 0.518798635 0.979770389 -0.584920250
## [481] -0.051549551 1.612081332 0.093215057 -0.503392778 0.499837221
## [486] 1.452645858 -0.147811051 0.311692397 -0.973092364 1.331279709
## [491] -1.343361232 0.238724227 0.047822798 1.535267484 0.311368554
## [496] -0.729863098 -0.301835396 0.348630935 -0.718302906 -0.821921718
## [501] -1.138590361 -0.518194935 -0.601549801 -0.634991398 -0.166650563
## [506] -0.421340586 -0.026941420 1.640037238 -1.451562399 0.185738910
## [511] -0.332832362 -2.257667954 -2.223560384 -0.581000495 0.784932529
## [516] 1.966506102 0.058143829 -1.593287348 -0.563915667 1.811607154
## [521] 1.019821443 -0.564820749 -2.002609033 0.199999806 -0.972944505
## [526] 0.255850298 0.759842099 0.556860878 -0.996948692 -0.505725946
## [531] 0.166801911 -1.972310499 0.162777528 0.811816243 -2.486988410
## [536] 0.492574542 -0.265704267 -0.491881563 -0.323805748 0.527116817
## [541] -0.021759088 0.054264642 -1.074898422 -0.023236729 1.725655554
## [546] -0.033214541 1.619362749 -1.072043200 0.151098106 -0.304886694
## [551] 0.513537064 2.250682503 0.675998551 -1.334538891 0.361555645
## [556] -0.703781478 0.429706392 1.232052178 -1.902698736 0.530881870
## [561] -0.218150095 0.828352321 0.575284172 0.573889101 -0.795178922
## [566] 0.384977263 -0.709640244 -1.624198381 1.662838000 -0.197799421
## [571] -0.724018149 0.596254499 0.045298224 -0.948659362 0.520204312
## [576] -1.119587826 0.421009106 1.532754027 -0.022196589 0.573198840
## [581] 2.048931944 -1.109321470 0.665681824 0.761214048 -0.711458958
## [586] -1.034783845 -0.223517039 0.489221669 0.914435598 0.025845786
## [591] -0.146223557 0.744134684 0.738665035 -1.258926438 0.884853510
## [596] 2.336509952 -1.323459008 -0.598320804 0.552683313 0.737015237
## [601] -0.096583311 -0.830807747 -0.173118388 1.588860392 -1.831397042
## [606] -2.519475576 -0.067899583 1.498300465 1.544597461 -1.124532896
## [611] 0.858998521 -0.476264256 -0.038144102 -0.677977351 0.218827033
## [616] 1.385609671 -0.519059955 0.645291645 0.709761429 1.043250871
## [621] -0.011737046 1.393102529 1.402613332 0.425478110 1.384643263
## [626] 0.810953643 -0.197261493 0.077755076 -0.354966073 -0.968433616
## [631] -2.337990801 0.716778724 0.322563818 -1.104199886 0.317954237
## [636] 1.169621502 0.134434946 0.717403654 -1.381898391 -0.773727031
## [641] -1.137465554 -0.009677904 -0.834634485 0.292622716 -0.270822845
## [646] 0.344963153 0.072110645 -0.042941040 2.001513593 0.899244902
## [651] 0.126155247 0.145141961 0.824009948 1.347058953 -0.354574518
## [656] 0.259213392 -0.670495809 -0.365363150 0.858575516 0.062248403
## [661] -1.048566415 -2.008744001 0.553085529 -0.708866672 -0.073112790
## [666] 0.662421233 0.339811142 -0.694680231 -0.039903900 -0.564618290
## [671] -0.891548844 0.159193975 -0.271843414 -0.284331013 1.051165873
## [676] 1.273344003 -0.049274556 0.805533419 -0.192922614 -0.236122835
## [681] -0.945615231 -0.305073480 0.048220514 1.190630039 -0.804701748
## [686] 0.584718208 -0.878890328 0.066854596 0.698276904 1.103269261
## [691] -1.376635412 -1.887907320 -1.054857794 -0.227396222 0.684381357
## [696] 0.165040752 3.572901761 -1.256216657 -0.554589985 0.253787975
## [701] 0.342986750 -1.223340091 -0.307722195 0.170840083 -1.171958799
## [706] -0.678764840 -0.028714990 -0.336095565 0.581866292 -1.632458923
## [711] 0.941843704 1.671560265 -0.322175665 -0.065424802 0.039459403
## [716] -1.050637146 0.041385478 0.868364735 -0.139860604 0.031959108
## [721] -0.154933723 -0.119645553 0.496355756 -0.144418229 0.371736448
## [726] -0.800277387 -1.274092030 -1.594508797 0.071005499 -1.383828551
## [731] 0.545524022 1.506105180 0.753731605 0.241798264 -0.744487035
## [736] -1.894803628 -0.295203934 -0.656956750 0.854446300 -1.457808602
## [741] -0.632500579 -0.949500272 -0.972386490 0.381157033 -0.079429015
## [746] 1.263789713 -0.617892646 -0.927187029 -0.028591107 0.430445856
## [751] -1.400342258 1.470866693 0.894839895 -0.335450588 -0.405499377
## [756] 0.714338597 0.402744026 0.334080284 0.508296087 0.577568194
## [761] -0.500750181 -0.579178948 -0.972339542 0.760922613 -1.847821582
## [766] -0.048610704 1.877451031 -1.000167545 -0.959491338 1.102631317
## [771] -0.159255338 0.564394817 -0.667797415 -0.900950848 1.296770941
## [776] 0.454721360 0.226142145 0.813741842 0.266837109 1.717593380
## [781] -0.151620307 -0.107628642 2.636720557 -2.159946802 -0.538355589
## [786] 1.502155757 -1.171214460 -0.107630258 -1.324479188 0.293697141
## [791] -0.995154284 -0.592786864 -0.953493021 0.361384521 0.007257653
## [796] -1.003746030 0.510819738 -0.037642314 0.110412493 -1.116842288
## [801] 0.111064771 -0.398786573 0.611832192 -0.423735481 0.339485614
## [806] -2.145385235 0.018780906 -1.016735378 0.494029020 -0.189480564
## [811] -1.291442021 -0.434057804 2.191975678 -0.576549311 -0.311567754
## [816] 0.040487362 -0.370018409 -0.885667225 -0.246578419 0.592295683
## [821] -0.754597096 -1.653848242 0.033615477 -0.180914743 -0.897188990
## [826] -1.335654312 -0.254570709 -1.653597400 -3.050261552 0.497324634
## [831] 0.357857399 0.323870767 0.610644574 -0.241047706 0.119386763
## [836] 0.630624124 0.247129391 0.184280399 0.816466733 0.212732901
## [841] -0.992048010 0.261421739 1.459417058 -0.535101904 -2.072999280
## [846] 0.047955159 0.252212201 1.953391514 1.751792153 -1.281182700
## [851] -1.318098840 1.391578778 1.237496064 0.550970662 0.908839329
## [856] 0.256344944 0.341933197 -0.831043510 -1.758731091 1.294110258
## [861] 1.312419168 -0.868836448 0.499364023 -0.660173475 -2.030909225
## [866] 0.643092991 -1.641504362 -0.580315917 0.905210496 0.176182250
## [871] -0.076472349 0.486585619 0.383484017 -0.102026537 -0.804819274
## [876] 1.092342464 0.318699671 -0.461701528 -2.003515177 -0.621206084
## [881] -1.258585504 1.270991847 -0.115650719 0.025131172 -0.528368454
## [886] 0.254598233 1.066532591 -0.263843798 0.916044192 1.124427376
## [891] -0.666868183 0.055589661 0.909520533 1.261431007 0.312367186
## [896] 0.415445695 0.360417052 -1.547776690 0.594356678 0.595166381
## [901] 0.439324828 -0.997141282 0.148241965 1.138512817 -0.409542194
## [906] -1.284163581 0.198269331 1.001146814 0.032540586 -0.367005415
## [911] -1.064720117 1.151067304 0.230501331 -0.559035426 -0.425674462
## [916] 1.456913592 0.751606903 -0.349291776 0.230090440 -0.497019935
## [921] 1.125815012 1.310221307 -1.118829698 -0.554279909 0.407778048
## [926] 1.232503994 -0.411908359 -0.805713621 -0.162691198 0.576064317
## [931] 1.851716270 0.417318937 0.296729967 -1.193685062 0.324119750
## [936] 2.787734960 -1.373008982 -1.628546877 -0.961762363 0.453529225
## [941] 0.593353764 0.865898061 0.179117957 -0.126408702 0.444725072
## [946] 1.192063994 -0.552324421 -1.260549567 1.773572665 -0.693887793
## [951] -1.534356063 0.688187115 -1.351705085 -0.827241576 0.783785995
## [956] -2.099429977 -1.464254019 -2.634748518 -0.451910484 -0.289690728
## [961] -1.158426806 -0.256813826 -1.417603302 -1.301332277 -1.134191641
## [966] 0.490551401 -0.757177144 0.197353374 -1.184729568 -2.700836990
## [971] -0.563973631 2.031541163 0.009811435 -0.432092778 0.710700608
## [976] 1.512323826 1.259201722 1.674000018 1.379520313 0.159845752
## [981] 1.048084405 0.764312846 0.781423766 -2.071155731 -1.243737763
## [986] -0.546462404 -2.493635855 -1.103967778 -0.650231029 1.121460096
## [991] 0.150944929 1.460885902 1.795755809 -0.516597990 0.229836352
## [996] -0.908495912 -0.629386070 0.984447692 -1.983989164 -0.489806334
As with all classic datasets, the quartet is included in the R datasets package. First, load required libraries and data, and visualize them:
library(ggplot2)
library(gridExtra)
data(anscombe)
anscombe
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
## 7 6 6 6 8 7.24 6.13 6.08 5.25
## 8 4 4 4 19 4.26 3.10 5.39 12.50
## 9 12 12 12 8 10.84 9.13 8.15 5.56
## 10 7 7 7 8 4.82 7.26 6.42 7.91
## 11 5 5 5 8 5.68 4.74 5.73 6.89
The summary of the four datasets show the similarities between such datasets in terms of the mean:
summary(anscombe)
## x1 x2 x3 x4
## Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8
## 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8
## Median : 9.0 Median : 9.0 Median : 9.0 Median : 8
## Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9
## 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8
## Max. :14.0 Max. :14.0 Max. :14.0 Max. :19
## y1 y2 y3 y4
## Min. : 4.260 Min. :3.100 Min. : 5.39 Min. : 5.250
## 1st Qu.: 6.315 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
## Median : 7.580 Median :8.140 Median : 7.11 Median : 7.040
## Mean : 7.501 Mean :7.501 Mean : 7.50 Mean : 7.501
## 3rd Qu.: 8.570 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
## Max. :10.840 Max. :9.260 Max. :12.74 Max. :12.500
It is also easy to see the similarities in terms of the variance, correlation coefficient and linear regression:
# correlation
sapply(1:4, function(x) cor(anscombe[, x], anscombe[, x+4]))
## [1] 0.8164205 0.8162365 0.8162867 0.8165214
# variance
sapply(5:8, function(x) var(anscombe[, x]))
## [1] 4.127269 4.127629 4.122620 4.123249
# linear regression
lm(y1 ~ x1, data = anscombe)
##
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
##
## Coefficients:
## (Intercept) x1
## 3.0001 0.5001
lm(y2 ~ x2, data = anscombe)
##
## Call:
## lm(formula = y2 ~ x2, data = anscombe)
##
## Coefficients:
## (Intercept) x2
## 3.001 0.500
lm(y3 ~ x3, data = anscombe)
##
## Call:
## lm(formula = y3 ~ x3, data = anscombe)
##
## Coefficients:
## (Intercept) x3
## 3.0025 0.4997
lm(y4 ~ x4, data = anscombe)
##
## Call:
## lm(formula = y4 ~ x4, data = anscombe)
##
## Coefficients:
## (Intercept) x4
## 3.0017 0.4999
p1 <- ggplot(anscombe) +
geom_point(aes(x1, y1), color = "darkred", size = 3) +
#theme_bw() +
scale_x_continuous(breaks = seq(0, 20, 2)) +
scale_y_continuous(breaks = seq(0, 12, 2)) +
geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
expand_limits(x = 0, y = 0) +
labs(title = "dataset 1")
p2 <- ggplot(anscombe) +
geom_point(aes(x2, y2), color = "darkred", size = 3) +
#theme_bw() +
scale_x_continuous(breaks = seq(0, 20, 2)) +
scale_y_continuous(breaks = seq(0, 12, 2)) +
geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
expand_limits(x = 0, y = 0) +
labs(title = "dataset 2")
p3 <- ggplot(anscombe) +
geom_point(aes(x3, y3), color = "darkred", size = 3) +
#theme_bw() +
scale_x_continuous(breaks = seq(0, 20, 2)) +
scale_y_continuous(breaks = seq(0, 12, 2)) +
geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
expand_limits(x = 0, y = 0) +
labs(title = "dataset 3")
p4 <- ggplot(anscombe) +
geom_point(aes(x4, y4), color = "darkred", size = 3) +
#theme_bw() +
scale_x_continuous(breaks = seq(0, 20, 2)) +
scale_y_continuous(breaks = seq(0, 12, 2)) +
geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
expand_limits(x = 0, y = 0) +
labs(title = "dataset 4")
p <- list(p1, p2, p3, p4)
do.call(grid.arrange, c(p, list(ncol=2)))