Introduction


English Statistician Frank Anscombe designed four short datasets with de aim of demonstrate the importance of visualising data and the dangers of reliance on simple summary statistics.

His original paper Graphs in Statistical Analysis can be retrieved from JSTOR website.

1. Dataset


x<- rnorm(1000)
x
##    [1]  0.448456951 -1.144926149  0.774313946 -0.351243769  1.466452963
##    [6]  0.809274119  0.388809542  0.844816406 -1.908873816  0.114630009
##   [11] -0.349978294  0.867896752 -0.456897752 -0.104370321  1.432036294
##   [16]  1.089690684 -2.358035231 -0.510653220  1.665276333  0.008021918
##   [21] -0.572789014 -0.846165211  0.282014758  1.657878371  0.403178948
##   [26] -0.825430334 -0.058643863  0.825070087 -0.796128775  2.153777205
##   [31]  0.672949961  1.372970296  1.727462486  1.951216462  0.370116111
##   [36]  0.764021265  0.648104388 -0.499386688 -1.061971519  0.393517655
##   [41]  1.713897489  0.667398736 -1.353305213 -0.115070471 -0.415747200
##   [46] -0.605395381  1.192974548  0.450541319  0.138518956  1.084955014
##   [51] -0.262223227 -0.267605429 -0.678003944 -0.597539545 -0.643654568
##   [56] -1.757897572 -0.320875400 -0.204660792  0.186901760 -0.959956907
##   [61] -1.313367305 -0.460342212  1.621052796  1.109910347  1.151645393
##   [66]  0.535815518 -0.951274638  1.024397445  0.106534357  1.297448512
##   [71]  0.489879703  0.091459649 -1.189211240  0.009424147  0.026959992
##   [76]  0.116246051 -1.060806814 -0.312326482  0.487936771  1.531520418
##   [81]  1.559552928 -0.581668223  0.162537702  0.190111743 -0.703706074
##   [86]  0.220555565  1.140645247 -0.201800043  1.447874553 -1.156359301
##   [91] -0.101288804 -1.509395728 -0.866651951 -0.066492803  0.089276864
##   [96]  2.340955478  0.181120982 -1.063438720 -0.825138387  0.819547543
##  [101]  0.015860867 -0.160683314  0.585132823  0.959527777  0.244138107
##  [106]  0.431777945 -1.054097427  0.641954570 -0.173361771  1.406417050
##  [111]  1.074990176 -0.219147972  0.564041937  1.528831476 -0.780081375
##  [116] -0.968585765 -0.186334644 -0.493983076  1.196191122 -0.588922212
##  [121]  0.294875481  0.695606061  1.521471021 -1.057272926  2.619837138
##  [126] -1.439563502  1.675270316  0.223058594  1.846061859 -0.579759508
##  [131]  0.669306662 -1.574086685 -2.087668269 -0.221235603 -0.553560082
##  [136] -1.008932895  1.343109335 -0.434491564 -0.127747098  0.478526133
##  [141] -0.892087251 -0.814743746 -0.206091373 -2.127419610  1.202465309
##  [146]  0.441045925 -0.288935807  0.815924932 -0.426706960 -0.561634187
##  [151] -0.306982974  0.311785403  2.450296315 -0.252088774  0.785901037
##  [156] -1.225196880 -0.111146339 -1.015828977 -0.322638580  0.075161836
##  [161]  1.219672167 -0.515435128  0.847354230  0.670645135  1.740925756
##  [166]  1.395793976  1.093945186 -0.233511012  0.777789828  1.208128625
##  [171] -1.579933030 -0.721043072 -0.827091341 -1.671248620 -0.348564296
##  [176]  2.080029547 -0.679487103 -0.708002559 -0.949764600  0.327769004
##  [181] -0.345097742 -0.513461778  1.843263762 -0.700944975  1.160910226
##  [186]  0.755561364 -0.542966750  0.766864784  1.975266133 -0.879742660
##  [191] -1.564470024 -0.555747316  1.606194204 -0.944350052  0.791255382
##  [196] -0.239104490  0.077323305  0.072754951 -0.618806222 -0.283485549
##  [201]  0.957736796 -0.621908466  2.302632144 -1.119635747 -0.452503148
##  [206] -0.837399732 -1.022411086 -1.326189572 -0.649580527 -1.341096008
##  [211] -0.018346097 -0.118661420  0.032250664 -0.635566554 -0.556440890
##  [216] -0.573579971 -0.760686419  0.176134405  0.396030348  0.086913094
##  [221] -0.995180698  0.954021023 -0.861111030 -1.011141286 -0.319644062
##  [226]  0.497599452 -0.846704407 -0.614084457 -1.086431551  0.416539807
##  [231]  0.761146230  0.476475286 -0.015760323  0.287893021 -3.276002983
##  [236] -1.619484191 -0.095136525  1.297662607  1.428545191  2.364529955
##  [241] -1.670547403  0.638899223  0.469127013 -0.133615036 -0.636457543
##  [246] -1.856018021 -0.843327352 -1.420451180 -0.218803681  0.370489685
##  [251]  1.015071526  0.215297831 -0.442259716  1.914029264  1.519221712
##  [256] -0.045220637 -0.877788924  1.306804943  0.418975615  0.027642709
##  [261] -1.248381670 -1.464767835 -0.735006375  0.407219361  2.303405971
##  [266]  0.926862982  1.412641856 -1.142870768  0.345343422  0.353856222
##  [271] -1.005231528  1.355442022  1.225557424 -0.150303935 -1.724536945
##  [276] -0.625599696  0.715800934 -1.129474442  1.467957361 -0.440897315
##  [281] -0.391040082  0.035334059  1.945048979  1.501377929  0.465176434
##  [286] -0.467544792 -0.470122343  1.023270102  0.772405156  0.559393318
##  [291] -0.951563346  1.242054353 -0.302033225  1.593687330 -1.558636950
##  [296] -2.091339931 -0.133205315  0.885213341 -0.516181215  0.325361575
##  [301] -0.771355807  1.136219568 -1.075762023 -1.221434067 -0.735703045
##  [306] -1.381884654  0.464761257 -0.971295397 -1.559386218  1.362167363
##  [311] -1.107840976 -0.803680179 -0.396001816  0.097029209  1.083382053
##  [316] -0.281241776  0.110465958 -0.378578262 -1.058500110 -1.004736705
##  [321] -2.482752645 -1.715507977 -1.228229565 -1.350460308  1.049798266
##  [326] -0.615606737 -0.123998102 -0.473473145  0.689817798  0.748271810
##  [331] -0.438909708 -1.309178557  0.135450376  1.203567869 -1.253833425
##  [336]  2.077713064 -0.090838422 -1.734777090  0.846325111 -0.855183401
##  [341]  0.097133165 -1.074911320  0.228739175  0.248961862 -0.059913976
##  [346]  0.037558999 -1.911334772 -0.541108275  0.275511600  1.422855043
##  [351]  0.202456753 -0.043862587 -0.766130073  0.320333000  0.414930129
##  [356] -1.637898052 -0.665330568 -0.807474329 -2.331976455 -0.519293332
##  [361]  2.261516112  2.541945903  0.735764099 -1.572717516 -0.216960232
##  [366] -1.386225520  0.098056221 -0.278453710 -1.243748260  0.305916821
##  [371] -0.709927009 -0.419418492  0.161683569  0.102163607 -0.968448157
##  [376]  0.402981522  2.000433798  0.012613351  0.309047468 -1.142629306
##  [381] -1.831866354  0.374051084  0.810583022 -0.462904382 -0.056013858
##  [386]  0.226598772 -0.053271766 -0.243134671  0.409780597  0.885270549
##  [391] -0.731706183  1.404423333 -1.639574884 -0.517931625 -0.239502118
##  [396] -0.933583020  1.190221828 -1.585790129  0.957276783  0.416564812
##  [401]  1.145672612  0.243031257  0.903258470  1.052341725 -1.344247000
##  [406] -0.284941152  0.864547584 -0.795227347 -1.568499317  0.396455536
##  [411] -0.189870253  1.150337150  0.162611201  0.138561697 -0.107721296
##  [416] -0.405550760 -1.817582033 -0.337548783  0.909731633  0.338579470
##  [421] -0.667427208 -1.043777514 -0.584441908  0.394706803 -0.765263672
##  [426] -0.507130977 -0.471380735  0.533891339 -1.346288198 -0.667821836
##  [431]  0.890616202  1.418208544 -0.990379246 -0.214846681 -1.844011779
##  [436]  0.222389729 -1.360498660 -0.015222300  0.316602463  0.097162342
##  [441] -0.405768866 -0.237101262 -0.041933016  0.221589014  3.169117689
##  [446]  2.078588663  0.030760611 -1.893750789 -2.058143121 -0.505855829
##  [451]  0.143908426  0.145313656 -0.064120247  1.355066938  2.985237868
##  [456] -0.384899261  0.379209687 -0.002990724 -1.784154697 -0.377011164
##  [461]  1.509975277 -0.952310388  0.489543223 -1.492082153 -0.299199689
##  [466] -0.089770795 -1.865615675  1.140488535 -1.473035044  0.050689866
##  [471] -0.552888374  0.048661851  0.646574244 -0.477374635 -0.018307030
##  [476] -3.026307585  0.266978945  0.518798635  0.979770389 -0.584920250
##  [481] -0.051549551  1.612081332  0.093215057 -0.503392778  0.499837221
##  [486]  1.452645858 -0.147811051  0.311692397 -0.973092364  1.331279709
##  [491] -1.343361232  0.238724227  0.047822798  1.535267484  0.311368554
##  [496] -0.729863098 -0.301835396  0.348630935 -0.718302906 -0.821921718
##  [501] -1.138590361 -0.518194935 -0.601549801 -0.634991398 -0.166650563
##  [506] -0.421340586 -0.026941420  1.640037238 -1.451562399  0.185738910
##  [511] -0.332832362 -2.257667954 -2.223560384 -0.581000495  0.784932529
##  [516]  1.966506102  0.058143829 -1.593287348 -0.563915667  1.811607154
##  [521]  1.019821443 -0.564820749 -2.002609033  0.199999806 -0.972944505
##  [526]  0.255850298  0.759842099  0.556860878 -0.996948692 -0.505725946
##  [531]  0.166801911 -1.972310499  0.162777528  0.811816243 -2.486988410
##  [536]  0.492574542 -0.265704267 -0.491881563 -0.323805748  0.527116817
##  [541] -0.021759088  0.054264642 -1.074898422 -0.023236729  1.725655554
##  [546] -0.033214541  1.619362749 -1.072043200  0.151098106 -0.304886694
##  [551]  0.513537064  2.250682503  0.675998551 -1.334538891  0.361555645
##  [556] -0.703781478  0.429706392  1.232052178 -1.902698736  0.530881870
##  [561] -0.218150095  0.828352321  0.575284172  0.573889101 -0.795178922
##  [566]  0.384977263 -0.709640244 -1.624198381  1.662838000 -0.197799421
##  [571] -0.724018149  0.596254499  0.045298224 -0.948659362  0.520204312
##  [576] -1.119587826  0.421009106  1.532754027 -0.022196589  0.573198840
##  [581]  2.048931944 -1.109321470  0.665681824  0.761214048 -0.711458958
##  [586] -1.034783845 -0.223517039  0.489221669  0.914435598  0.025845786
##  [591] -0.146223557  0.744134684  0.738665035 -1.258926438  0.884853510
##  [596]  2.336509952 -1.323459008 -0.598320804  0.552683313  0.737015237
##  [601] -0.096583311 -0.830807747 -0.173118388  1.588860392 -1.831397042
##  [606] -2.519475576 -0.067899583  1.498300465  1.544597461 -1.124532896
##  [611]  0.858998521 -0.476264256 -0.038144102 -0.677977351  0.218827033
##  [616]  1.385609671 -0.519059955  0.645291645  0.709761429  1.043250871
##  [621] -0.011737046  1.393102529  1.402613332  0.425478110  1.384643263
##  [626]  0.810953643 -0.197261493  0.077755076 -0.354966073 -0.968433616
##  [631] -2.337990801  0.716778724  0.322563818 -1.104199886  0.317954237
##  [636]  1.169621502  0.134434946  0.717403654 -1.381898391 -0.773727031
##  [641] -1.137465554 -0.009677904 -0.834634485  0.292622716 -0.270822845
##  [646]  0.344963153  0.072110645 -0.042941040  2.001513593  0.899244902
##  [651]  0.126155247  0.145141961  0.824009948  1.347058953 -0.354574518
##  [656]  0.259213392 -0.670495809 -0.365363150  0.858575516  0.062248403
##  [661] -1.048566415 -2.008744001  0.553085529 -0.708866672 -0.073112790
##  [666]  0.662421233  0.339811142 -0.694680231 -0.039903900 -0.564618290
##  [671] -0.891548844  0.159193975 -0.271843414 -0.284331013  1.051165873
##  [676]  1.273344003 -0.049274556  0.805533419 -0.192922614 -0.236122835
##  [681] -0.945615231 -0.305073480  0.048220514  1.190630039 -0.804701748
##  [686]  0.584718208 -0.878890328  0.066854596  0.698276904  1.103269261
##  [691] -1.376635412 -1.887907320 -1.054857794 -0.227396222  0.684381357
##  [696]  0.165040752  3.572901761 -1.256216657 -0.554589985  0.253787975
##  [701]  0.342986750 -1.223340091 -0.307722195  0.170840083 -1.171958799
##  [706] -0.678764840 -0.028714990 -0.336095565  0.581866292 -1.632458923
##  [711]  0.941843704  1.671560265 -0.322175665 -0.065424802  0.039459403
##  [716] -1.050637146  0.041385478  0.868364735 -0.139860604  0.031959108
##  [721] -0.154933723 -0.119645553  0.496355756 -0.144418229  0.371736448
##  [726] -0.800277387 -1.274092030 -1.594508797  0.071005499 -1.383828551
##  [731]  0.545524022  1.506105180  0.753731605  0.241798264 -0.744487035
##  [736] -1.894803628 -0.295203934 -0.656956750  0.854446300 -1.457808602
##  [741] -0.632500579 -0.949500272 -0.972386490  0.381157033 -0.079429015
##  [746]  1.263789713 -0.617892646 -0.927187029 -0.028591107  0.430445856
##  [751] -1.400342258  1.470866693  0.894839895 -0.335450588 -0.405499377
##  [756]  0.714338597  0.402744026  0.334080284  0.508296087  0.577568194
##  [761] -0.500750181 -0.579178948 -0.972339542  0.760922613 -1.847821582
##  [766] -0.048610704  1.877451031 -1.000167545 -0.959491338  1.102631317
##  [771] -0.159255338  0.564394817 -0.667797415 -0.900950848  1.296770941
##  [776]  0.454721360  0.226142145  0.813741842  0.266837109  1.717593380
##  [781] -0.151620307 -0.107628642  2.636720557 -2.159946802 -0.538355589
##  [786]  1.502155757 -1.171214460 -0.107630258 -1.324479188  0.293697141
##  [791] -0.995154284 -0.592786864 -0.953493021  0.361384521  0.007257653
##  [796] -1.003746030  0.510819738 -0.037642314  0.110412493 -1.116842288
##  [801]  0.111064771 -0.398786573  0.611832192 -0.423735481  0.339485614
##  [806] -2.145385235  0.018780906 -1.016735378  0.494029020 -0.189480564
##  [811] -1.291442021 -0.434057804  2.191975678 -0.576549311 -0.311567754
##  [816]  0.040487362 -0.370018409 -0.885667225 -0.246578419  0.592295683
##  [821] -0.754597096 -1.653848242  0.033615477 -0.180914743 -0.897188990
##  [826] -1.335654312 -0.254570709 -1.653597400 -3.050261552  0.497324634
##  [831]  0.357857399  0.323870767  0.610644574 -0.241047706  0.119386763
##  [836]  0.630624124  0.247129391  0.184280399  0.816466733  0.212732901
##  [841] -0.992048010  0.261421739  1.459417058 -0.535101904 -2.072999280
##  [846]  0.047955159  0.252212201  1.953391514  1.751792153 -1.281182700
##  [851] -1.318098840  1.391578778  1.237496064  0.550970662  0.908839329
##  [856]  0.256344944  0.341933197 -0.831043510 -1.758731091  1.294110258
##  [861]  1.312419168 -0.868836448  0.499364023 -0.660173475 -2.030909225
##  [866]  0.643092991 -1.641504362 -0.580315917  0.905210496  0.176182250
##  [871] -0.076472349  0.486585619  0.383484017 -0.102026537 -0.804819274
##  [876]  1.092342464  0.318699671 -0.461701528 -2.003515177 -0.621206084
##  [881] -1.258585504  1.270991847 -0.115650719  0.025131172 -0.528368454
##  [886]  0.254598233  1.066532591 -0.263843798  0.916044192  1.124427376
##  [891] -0.666868183  0.055589661  0.909520533  1.261431007  0.312367186
##  [896]  0.415445695  0.360417052 -1.547776690  0.594356678  0.595166381
##  [901]  0.439324828 -0.997141282  0.148241965  1.138512817 -0.409542194
##  [906] -1.284163581  0.198269331  1.001146814  0.032540586 -0.367005415
##  [911] -1.064720117  1.151067304  0.230501331 -0.559035426 -0.425674462
##  [916]  1.456913592  0.751606903 -0.349291776  0.230090440 -0.497019935
##  [921]  1.125815012  1.310221307 -1.118829698 -0.554279909  0.407778048
##  [926]  1.232503994 -0.411908359 -0.805713621 -0.162691198  0.576064317
##  [931]  1.851716270  0.417318937  0.296729967 -1.193685062  0.324119750
##  [936]  2.787734960 -1.373008982 -1.628546877 -0.961762363  0.453529225
##  [941]  0.593353764  0.865898061  0.179117957 -0.126408702  0.444725072
##  [946]  1.192063994 -0.552324421 -1.260549567  1.773572665 -0.693887793
##  [951] -1.534356063  0.688187115 -1.351705085 -0.827241576  0.783785995
##  [956] -2.099429977 -1.464254019 -2.634748518 -0.451910484 -0.289690728
##  [961] -1.158426806 -0.256813826 -1.417603302 -1.301332277 -1.134191641
##  [966]  0.490551401 -0.757177144  0.197353374 -1.184729568 -2.700836990
##  [971] -0.563973631  2.031541163  0.009811435 -0.432092778  0.710700608
##  [976]  1.512323826  1.259201722  1.674000018  1.379520313  0.159845752
##  [981]  1.048084405  0.764312846  0.781423766 -2.071155731 -1.243737763
##  [986] -0.546462404 -2.493635855 -1.103967778 -0.650231029  1.121460096
##  [991]  0.150944929  1.460885902  1.795755809 -0.516597990  0.229836352
##  [996] -0.908495912 -0.629386070  0.984447692 -1.983989164 -0.489806334

As with all classic datasets, the quartet is included in the R datasets package. First, load required libraries and data, and visualize them:

library(ggplot2)
library(gridExtra)
data(anscombe)
anscombe
##    x1 x2 x3 x4    y1   y2    y3    y4
## 1  10 10 10  8  8.04 9.14  7.46  6.58
## 2   8  8  8  8  6.95 8.14  6.77  5.76
## 3  13 13 13  8  7.58 8.74 12.74  7.71
## 4   9  9  9  8  8.81 8.77  7.11  8.84
## 5  11 11 11  8  8.33 9.26  7.81  8.47
## 6  14 14 14  8  9.96 8.10  8.84  7.04
## 7   6  6  6  8  7.24 6.13  6.08  5.25
## 8   4  4  4 19  4.26 3.10  5.39 12.50
## 9  12 12 12  8 10.84 9.13  8.15  5.56
## 10  7  7  7  8  4.82 7.26  6.42  7.91
## 11  5  5  5  8  5.68 4.74  5.73  6.89

2. Statistics Summary


The summary of the four datasets show the similarities between such datasets in terms of the mean:

summary(anscombe)
##        x1             x2             x3             x4    
##  Min.   : 4.0   Min.   : 4.0   Min.   : 4.0   Min.   : 8  
##  1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 8  
##  Median : 9.0   Median : 9.0   Median : 9.0   Median : 8  
##  Mean   : 9.0   Mean   : 9.0   Mean   : 9.0   Mean   : 9  
##  3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.: 8  
##  Max.   :14.0   Max.   :14.0   Max.   :14.0   Max.   :19  
##        y1               y2              y3              y4        
##  Min.   : 4.260   Min.   :3.100   Min.   : 5.39   Min.   : 5.250  
##  1st Qu.: 6.315   1st Qu.:6.695   1st Qu.: 6.25   1st Qu.: 6.170  
##  Median : 7.580   Median :8.140   Median : 7.11   Median : 7.040  
##  Mean   : 7.501   Mean   :7.501   Mean   : 7.50   Mean   : 7.501  
##  3rd Qu.: 8.570   3rd Qu.:8.950   3rd Qu.: 7.98   3rd Qu.: 8.190  
##  Max.   :10.840   Max.   :9.260   Max.   :12.74   Max.   :12.500

It is also easy to see the similarities in terms of the variance, correlation coefficient and linear regression:

# correlation
sapply(1:4, function(x) cor(anscombe[, x], anscombe[, x+4]))
## [1] 0.8164205 0.8162365 0.8162867 0.8165214
# variance
sapply(5:8, function(x) var(anscombe[, x]))
## [1] 4.127269 4.127629 4.122620 4.123249
# linear regression
lm(y1 ~ x1, data = anscombe)
## 
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
## 
## Coefficients:
## (Intercept)           x1  
##      3.0001       0.5001
lm(y2 ~ x2, data = anscombe)
## 
## Call:
## lm(formula = y2 ~ x2, data = anscombe)
## 
## Coefficients:
## (Intercept)           x2  
##       3.001        0.500
lm(y3 ~ x3, data = anscombe)
## 
## Call:
## lm(formula = y3 ~ x3, data = anscombe)
## 
## Coefficients:
## (Intercept)           x3  
##      3.0025       0.4997
lm(y4 ~ x4, data = anscombe)
## 
## Call:
## lm(formula = y4 ~ x4, data = anscombe)
## 
## Coefficients:
## (Intercept)           x4  
##      3.0017       0.4999

3. Plotting the quartet with ggplot2 package


p1 <- ggplot(anscombe) +
    geom_point(aes(x1, y1), color = "darkred", size = 3) +
    #theme_bw() +
    scale_x_continuous(breaks = seq(0, 20, 2)) +
    scale_y_continuous(breaks = seq(0, 12, 2)) +
    geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
    expand_limits(x = 0, y = 0) +
    labs(title = "dataset 1")

p2 <- ggplot(anscombe) +
    geom_point(aes(x2, y2), color = "darkred", size = 3) +
    #theme_bw() +
    scale_x_continuous(breaks = seq(0, 20, 2)) +
    scale_y_continuous(breaks = seq(0, 12, 2)) +
    geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
    expand_limits(x = 0, y = 0) +
    labs(title = "dataset 2")

p3 <- ggplot(anscombe) +
    geom_point(aes(x3, y3), color = "darkred", size = 3) +
    #theme_bw() +
    scale_x_continuous(breaks = seq(0, 20, 2)) +
    scale_y_continuous(breaks = seq(0, 12, 2)) +
    geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
    expand_limits(x = 0, y = 0) +
    labs(title = "dataset 3")

p4 <- ggplot(anscombe) +
    geom_point(aes(x4, y4), color = "darkred", size = 3) +
    #theme_bw() +
    scale_x_continuous(breaks = seq(0, 20, 2)) +
    scale_y_continuous(breaks = seq(0, 12, 2)) +
    geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
    expand_limits(x = 0, y = 0) +
    labs(title = "dataset 4")

p <- list(p1, p2, p3, p4)

do.call(grid.arrange, c(p, list(ncol=2)))