Introduction


English Statistician Frank Anscombe designed four short datasets with de aim of demonstrate the importance of visualising data and the dangers of reliance on simple summary statistics.

His original paper Graphs in Statistical Analysis can be retrieved from JSTOR website.

1. Dataset


x<- rnorm(1000)
x
##    [1]  0.699977289 -0.840098382  0.980859776  0.343609445 -0.546704762
##    [6]  0.453940018 -0.510453576 -1.593244307  1.402584974 -1.186147658
##   [11] -1.121666802 -0.655157488 -0.073966949  1.686959098 -0.098396226
##   [16]  0.274593380  1.091434169 -1.068998286  0.941319866  1.328871467
##   [21] -0.285040806 -0.419497107 -1.418293833  1.308661120 -0.989357150
##   [26]  0.100615545 -0.357478657  0.070409878  1.050988195 -0.575883101
##   [31]  0.229709078  0.134120762 -1.526498116  2.308404002 -0.891230543
##   [36]  0.409732496  1.301752246 -0.198746292 -0.287630759  1.254659005
##   [41]  0.893599565 -0.898583346 -0.032878854 -1.705950631  1.358264685
##   [46]  1.015084889 -1.628512117  0.430888949  0.273395701  1.515395059
##   [51] -0.642466397  0.148831014 -1.276744897  0.633321601  0.822224435
##   [56] -0.380472215 -0.179050586  0.506429917 -1.067348100 -0.449627892
##   [61] -0.276520724 -0.018973684  0.135310221  0.917687373  0.964710312
##   [66]  0.154263116 -0.820507376  2.234776447  0.021081546  0.688900907
##   [71] -0.246062949  0.950410897 -0.225957301 -0.659536490 -2.498542288
##   [76] -1.697888354  0.495290821 -0.969800023 -0.580590754  1.897705413
##   [81] -1.732961451 -0.593243732 -1.489489454  0.346984674 -2.460092785
##   [86] -1.728183671  0.339808274 -1.195802725 -0.434847171 -0.306597187
##   [91] -0.802266695  0.231480330  1.206627297 -0.913058369 -0.289451016
##   [96] -0.963279597  0.652614597 -1.011417308 -0.708054158 -0.679483875
##  [101] -0.559515749  0.157205777  0.604344583 -1.296955537  3.448405319
##  [106]  1.042828149 -0.641737574 -0.644760143  1.046355523 -2.062015814
##  [111] -0.056944161  0.806307151  0.340620430  1.384957473  0.072015391
##  [116]  0.737054189  1.157644793 -1.374397623 -1.193574121 -0.319939591
##  [121]  0.598263173 -0.380073095  0.321713852  2.439531015 -1.605432058
##  [126]  0.969537409  0.340958411  0.099848122  1.316175678  0.893831618
##  [131]  1.022017831  0.615081633 -1.004039886  0.343071411 -0.601800531
##  [136] -1.159905323  0.963455120 -0.039858498  1.196122597 -0.441658855
##  [141]  0.393053607  1.164158742  1.316991652 -0.407310170 -0.454119410
##  [146] -0.979679494 -0.843811856  0.197647197  0.191771912  0.035021143
##  [151]  0.816440390  0.736623949  0.876268632 -0.866990606  0.202342940
##  [156]  0.318570906 -1.279557856  0.667446023  0.445181050 -0.307179275
##  [161]  1.511230327 -1.840249164  3.027644303 -0.946257448 -0.620677713
##  [166] -3.034647774 -0.793423500  1.678525184  0.168840867 -0.367970285
##  [171] -1.727463832  0.737291101  0.344370947 -0.367146659 -0.589955350
##  [176]  1.306742030 -0.020174507 -1.303021540  1.083241695 -1.244637189
##  [181] -0.333304564  0.058217109 -1.468557943  1.172734833  1.914603957
##  [186]  0.308798481  0.094650700 -1.352561223  0.735030117 -1.811481469
##  [191] -0.166768705  1.783378618  1.054764032  0.218547444 -0.406116690
##  [196]  0.851041621  1.417443511  0.165592423 -0.349321391  0.773635902
##  [201]  0.881101483 -0.160995027 -0.005374032  0.287612107  0.843278544
##  [206] -0.324868231 -1.014793799 -0.203163923  0.778663437  1.489189667
##  [211]  0.179228562  0.443369972  0.024027936  0.217731452 -0.620703805
##  [216]  0.644053356  0.353962853  0.331456127 -0.860478317  0.629207369
##  [221]  1.276475371  0.391446005 -1.243298115 -1.064018852 -0.050130331
##  [226]  0.709921228  1.310529067  0.662410738  0.651715623  0.843366859
##  [231]  0.728267269  0.421326204  0.041286243  1.414875777 -0.701352082
##  [236] -0.404737426  1.803643609  0.421745371 -0.972956443  1.894695170
##  [241]  0.155264227 -0.937820695  0.287209072 -0.752037927  0.248116358
##  [246]  0.606995255 -0.590902496 -0.093428726 -1.313364531  0.794750594
##  [251] -1.245808264  0.115493449  0.022108792 -0.419491435 -1.530825271
##  [256]  0.124647975  0.460532421  0.560853858  0.229817852  0.394217052
##  [261] -0.208686706 -0.451882571  0.517777517  1.116649145  1.813112844
##  [266]  0.178081282 -3.302487741  0.488188429  0.789594796  0.082657550
##  [271]  0.709480900 -2.074049810  0.075664869  0.710358118  1.385222770
##  [276]  1.515784989  0.445461356  1.469159113  1.087193590 -0.883225538
##  [281] -0.109419836 -0.113971140  0.961753667  1.714342738  0.385017655
##  [286] -1.073625547 -0.572547265 -1.615449539 -0.100977269  0.829479947
##  [291] -0.592929429 -0.822048619 -0.453552410  0.082885936 -0.096463443
##  [296] -0.405760775 -0.128269066  0.019021721  0.359520069 -1.570087469
##  [301] -0.144370055 -1.345125980 -0.337204478  0.439557493  0.658880589
##  [306] -1.475866653  0.471047583 -0.435168168 -0.754348437  1.293223019
##  [311] -0.439448019  1.544869809 -0.351343856  1.764118944 -0.591561439
##  [316] -0.474846910  2.558586627 -0.151703206  1.164387575 -0.978307032
##  [321]  0.408789670  1.420501908 -0.181430614  1.213063614  2.414091168
##  [326] -3.415147785 -1.285767793  0.768506456  1.820180329  0.269047817
##  [331] -1.870736446 -0.960217942  0.560881701 -1.421987256  0.078442486
##  [336] -0.603078661 -0.473044092 -0.578238872  1.198682768 -0.048866647
##  [341] -0.595527186  0.819163856  0.757455325 -0.080703878 -0.841553880
##  [346]  0.802052650  0.942654619 -0.363302338 -2.971856481  0.154368483
##  [351] -0.022330540  1.026293038  1.226846238  0.227959137 -2.143683727
##  [356]  0.072403181 -0.678319125  1.817374063  0.875896779 -0.174982517
##  [361]  0.384725707 -0.031587416 -1.293203818  0.476084103 -0.607029513
##  [366] -1.608522847 -0.649341059  0.177247331  1.301404040 -0.834512831
##  [371]  0.695062595 -0.846804551  2.237157711 -0.979796437 -0.856474017
##  [376] -0.896979328  0.072822049 -1.661918529  0.620651697 -2.060682000
##  [381] -1.908784114  0.849173563  0.447463427  0.568802775 -1.264330809
##  [386]  0.175589442 -1.780020416 -1.859783024 -1.862828049 -2.101742569
##  [391] -0.034763979  0.410642686  0.298087148  1.129392539 -0.447175983
##  [396]  0.429276917  0.930694342 -1.610982409  1.204942600 -0.411793407
##  [401]  0.333423810  2.169456453  1.166518845 -1.494950312  0.868006766
##  [406]  0.410319431  0.293881632 -0.251146660  0.691572021  0.995404555
##  [411] -0.582844371  2.917868943  0.380300836 -0.850168113  0.111602779
##  [416]  0.681028545 -0.394252257 -2.047035606 -0.065578791  0.665616527
##  [421]  1.154171753 -0.955797235  0.738052206  0.894970920 -1.491848475
##  [426] -0.801547387 -1.357061762  0.182841714  0.670663193 -0.105401349
##  [431]  0.073477895 -0.308976871  0.411790871  0.299457308  0.142768277
##  [436] -1.123451308 -0.794723504 -0.265466951 -1.741546843 -1.182731259
##  [441] -0.041153696 -0.270565105 -2.364435238 -0.768090242  1.035301209
##  [446]  0.067054260 -0.887040859 -0.741057113  0.478705958 -0.692600214
##  [451] -0.345952319  1.047647662 -0.280757873 -0.873116801 -0.746647469
##  [456] -0.153187599  1.154403171 -0.342632996 -1.411307009  0.578471311
##  [461] -1.614229560  0.938144978 -0.895923883 -2.078044488 -0.757509568
##  [466] -0.440633508 -1.683632823 -0.279129944  0.058261672  0.201629897
##  [471] -0.249105156  0.306763114 -1.205493685 -0.610373109 -1.071242196
##  [476]  0.103647739  0.181914804  0.722072979  1.425299688  0.473573135
##  [481]  0.199908682  1.005621518 -2.272478285 -0.849922947 -1.429284360
##  [486]  0.476275576  0.248063975  0.170827836  1.418306874 -1.442332654
##  [491]  0.568269524  0.240808868  1.669405681  0.535621295  0.892294070
##  [496] -1.112498133 -1.243351730 -1.644544942 -0.680381542  0.009075171
##  [501]  0.378238600  0.928629830  0.718282408  0.563978435 -1.775507150
##  [506]  0.211838800  0.192627318  1.369580863 -0.007547020 -0.819277926
##  [511] -0.763610544 -1.086272039 -0.460013700 -1.248112142  1.946314652
##  [516] -1.065093653  0.007282063  0.105627269 -1.000027909  0.558175427
##  [521] -0.521759781 -1.278289327 -0.941305077  0.875681840 -1.157321713
##  [526] -0.869917275  0.637919949  0.245877574  1.839430000  0.204313300
##  [531] -0.922689395 -2.025411730 -1.893757931 -0.184827601  0.011262577
##  [536] -0.941848890 -0.240014425 -1.578232895 -0.812110742 -1.428637595
##  [541]  2.778261005  0.366092725 -1.366725647 -0.483061655 -0.603184516
##  [546]  0.587798762 -0.283231046  0.862847266  1.147581960 -0.715453511
##  [551]  0.165147565  0.787281828  1.181557178  1.088783209  0.105820435
##  [556]  1.695414029 -0.354635445 -0.233910277  0.429237861 -0.096391018
##  [561] -0.845547117  0.183228910 -0.273078518 -0.054439548  0.260962061
##  [566] -0.399081562  0.555559707 -1.357246927 -0.344066076  0.302485232
##  [571]  0.221284626  0.750861177  0.165075123 -1.140537863  0.109059226
##  [576] -0.229481302  0.080529866  0.342919287 -0.160489328  1.679352202
##  [581] -0.142221188  2.605828955 -0.137604334 -0.681565097 -1.095495666
##  [586] -0.030343651  0.378512214  0.958788875  1.280067040  1.144590736
##  [591] -1.598979554 -0.744373850  1.123665869 -1.427301729 -0.404028965
##  [596] -1.346406204  1.401257496 -0.679881907  0.699661757  0.110766922
##  [601]  0.323926383  0.093424232  1.272067320  0.194188485 -2.342089839
##  [606]  0.364488452  1.129349772  0.037018296  1.371353204  0.806215157
##  [611] -0.550405773  1.682893779  0.029960167  0.786464277  1.232654370
##  [616] -1.602851117 -0.702582658 -0.733214529 -0.622258366 -1.421811915
##  [621] -1.402392152 -1.017460233  0.485809510 -1.082588923  0.931474458
##  [626] -0.108636881 -0.459957955 -0.529803012 -1.249162894 -0.138069582
##  [631] -0.265827882 -1.191666542 -0.614471693  0.078139974  0.080015541
##  [636] -0.358643978 -0.073599530 -0.551134848 -1.017124231 -2.412927561
##  [641]  2.127411463 -0.485648753  0.448393495  1.259976639  1.475053845
##  [646]  0.422729816 -0.473341287  0.566656637  1.318005055 -0.531242930
##  [651]  0.463514775 -0.806744538  0.758056906  0.632215749 -2.120168087
##  [656] -0.804816085 -0.080207859  0.128851583 -1.497449021 -0.613791266
##  [661] -1.517494312 -0.032269313 -0.060184445 -0.876305527 -0.287318796
##  [666] -1.330627557  1.034229468  0.842425361 -0.652180228  0.849657096
##  [671] -0.093805813  0.087208190 -1.419832163  0.522217497  0.086829732
##  [676]  0.474731267  0.411273605  0.909809299  0.598151783  1.081509836
##  [681] -0.818024023 -0.543653919  1.276277867 -1.750291730 -1.516529040
##  [686]  0.201659356 -0.482242297  0.129056429  0.001440538 -1.081810044
##  [691] -2.684560044  0.748849559  0.715136290 -0.685717864 -0.668848235
##  [696] -0.918726315  1.432664909  2.076653174 -0.552355648 -0.018831491
##  [701]  0.256856146 -0.703783906  1.526448620  0.129441058 -0.106236272
##  [706] -0.360094210  2.319864037  0.744064352 -0.871735167 -0.036731798
##  [711] -0.819170107  1.699377194 -0.316553762 -0.303510093 -0.530574910
##  [716]  0.671110540 -0.483112487  0.030283039  1.382357643 -1.344673519
##  [721]  0.524138008  0.174953070  1.641786051 -0.448653124 -0.792133410
##  [726]  0.222296043  0.106072504  1.672751030 -2.799506421 -2.370942083
##  [731] -0.511600239 -0.655866947 -0.082326584  0.352006478 -0.003078283
##  [736] -0.010692294 -0.079161321  0.717012834  0.618636955  1.295184496
##  [741] -0.921609039 -0.087587855  0.351966023  0.272131232 -0.308909132
##  [746]  0.553182875  0.416128975  0.137743132  0.800686851 -0.433510718
##  [751]  0.922900661 -0.833464006 -2.119007753 -0.559666785  0.261786109
##  [756]  0.033343531  0.082016450 -0.480340357  0.673014087  1.424872876
##  [761] -0.063607659  0.776007910 -1.517511858 -0.639333893 -1.555534643
##  [766] -1.091902227 -0.148932074  1.659477228  0.741298654 -1.137405342
##  [771]  0.397129427  0.763599572 -0.616594112 -0.294563063  0.358380164
##  [776]  0.390500586  1.227093618 -0.353623311  0.990695758 -1.376523482
##  [781]  1.746476420  0.206175654 -0.393191928 -0.249541179  1.020022824
##  [786]  0.949221898 -2.195678644 -0.116241112 -2.663248195 -0.456907759
##  [791] -0.503827519 -0.490820890 -1.613149844  0.235482971  0.717380558
##  [796] -0.839161862  0.637013319 -0.664712081  1.346219453 -0.829625092
##  [801] -0.234887691 -0.645143862  0.414463536 -0.659071261  0.770963989
##  [806] -1.679990184  0.994678186 -0.420977604  1.610722667 -0.956086987
##  [811] -2.086412260 -0.759414618 -0.695711902  1.613115947 -0.190428038
##  [816] -1.385819952 -0.396978285  0.342872843 -0.433839211  0.921753872
##  [821] -1.718587932 -0.301132337 -1.637573712 -1.169827834 -0.908102856
##  [826] -1.236459673  1.914752824 -0.290086739  0.914512573  2.575046981
##  [831]  0.065227237  0.854067247  0.894326254  0.686649945 -1.275120500
##  [836]  0.695261755  1.987119433  0.629575295 -0.087414054  2.024544269
##  [841] -0.474378553  0.756232412 -0.603170980 -0.144266759 -0.403768444
##  [846]  0.042432218  1.944419018  1.150314153 -0.043009546 -2.361074898
##  [851]  0.828035756 -0.885265772  0.396133185 -0.070347789  0.844973216
##  [856] -0.228585936 -1.564384930 -2.168094674  0.024649728  0.053935405
##  [861]  0.101233016 -1.013122575 -0.613680788 -0.542240688 -1.555446066
##  [866]  0.573186875  0.538937582  0.935955643 -0.727156571 -0.815625015
##  [871] -0.999306953  1.043485293  1.721079843 -0.325400536 -0.580514456
##  [876]  0.383586370 -0.879616203  0.327754392  1.376527531 -0.703110718
##  [881] -2.461022051  0.659719974  0.039987269 -0.134994898 -1.350703268
##  [886]  1.087364833 -1.904028679  0.927031304 -1.042421952  0.799589703
##  [891]  1.007004664  1.189197510  0.586489653 -1.135921122  1.597183086
##  [896]  0.023140712  0.368325813  1.300300141 -0.143448665 -0.225477457
##  [901]  0.529161379 -0.822917212  1.156526038 -1.725447423  0.541754553
##  [906] -0.191609820 -2.167581897  0.344232958  0.289576091 -0.149269394
##  [911]  1.696990350  1.664225298  0.714535748  1.022355446  1.580487505
##  [916]  0.699989491 -0.451631818 -1.109535903  1.133309493 -0.172982788
##  [921]  1.751559440  1.158280571 -1.393349559 -1.301389623 -1.501173498
##  [926]  0.784498759 -1.067279395  1.449727689 -0.345738822 -1.433399325
##  [931] -0.209026368 -0.134124208 -0.299254723  2.118694722 -1.507456801
##  [936] -1.021227998 -0.662986765 -0.813843742 -0.983807558 -0.946887650
##  [941] -1.068560308  0.418947250 -3.152763854  0.064319644  0.826927625
##  [946]  1.055467463  0.985417007  0.623409549 -1.992580477  0.441484073
##  [951] -0.445032737 -0.631396848 -1.006564970 -0.513999295  1.573925197
##  [956] -0.025384905 -1.186939867  0.789673532 -0.270728084  0.007967770
##  [961]  0.221576327 -2.090018412 -0.112290371  0.040842861  0.013467587
##  [966]  1.486777167  0.506518765  0.403080694 -1.342270996 -1.027115996
##  [971] -1.108451169 -1.398056813  0.410236884 -1.189395502  0.749700641
##  [976] -0.822734172  0.092024337  0.753830526  0.624274669 -1.122117545
##  [981] -0.558370556 -0.043501336  1.590334452  0.056323351  1.954624730
##  [986] -0.393956236 -0.607800375  0.930702137 -0.819200476  2.116773401
##  [991]  0.858271400  1.459642160  0.604104288  1.991716144  0.094260069
##  [996] -0.801750043 -1.418953618 -1.680349599 -0.021479605 -1.329357208

Ahora un histograma:

hist(x)

As with all classic datasets, the quartet is included in the R datasets package. First, load required libraries and data, and visualize them:

library(ggplot2)
library(gridExtra)
data(anscombe)
anscombe
##    x1 x2 x3 x4    y1   y2    y3    y4
## 1  10 10 10  8  8.04 9.14  7.46  6.58
## 2   8  8  8  8  6.95 8.14  6.77  5.76
## 3  13 13 13  8  7.58 8.74 12.74  7.71
## 4   9  9  9  8  8.81 8.77  7.11  8.84
## 5  11 11 11  8  8.33 9.26  7.81  8.47
## 6  14 14 14  8  9.96 8.10  8.84  7.04
## 7   6  6  6  8  7.24 6.13  6.08  5.25
## 8   4  4  4 19  4.26 3.10  5.39 12.50
## 9  12 12 12  8 10.84 9.13  8.15  5.56
## 10  7  7  7  8  4.82 7.26  6.42  7.91
## 11  5  5  5  8  5.68 4.74  5.73  6.89

2. Statistics Summary


The summary of the four datasets show the similarities between such datasets in terms of the mean:

summary(anscombe)
##        x1             x2             x3             x4    
##  Min.   : 4.0   Min.   : 4.0   Min.   : 4.0   Min.   : 8  
##  1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 6.5   1st Qu.: 8  
##  Median : 9.0   Median : 9.0   Median : 9.0   Median : 8  
##  Mean   : 9.0   Mean   : 9.0   Mean   : 9.0   Mean   : 9  
##  3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.:11.5   3rd Qu.: 8  
##  Max.   :14.0   Max.   :14.0   Max.   :14.0   Max.   :19  
##        y1               y2              y3              y4        
##  Min.   : 4.260   Min.   :3.100   Min.   : 5.39   Min.   : 5.250  
##  1st Qu.: 6.315   1st Qu.:6.695   1st Qu.: 6.25   1st Qu.: 6.170  
##  Median : 7.580   Median :8.140   Median : 7.11   Median : 7.040  
##  Mean   : 7.501   Mean   :7.501   Mean   : 7.50   Mean   : 7.501  
##  3rd Qu.: 8.570   3rd Qu.:8.950   3rd Qu.: 7.98   3rd Qu.: 8.190  
##  Max.   :10.840   Max.   :9.260   Max.   :12.74   Max.   :12.500

It is also easy to see the similarities in terms of the variance, correlation coefficient and linear regression:

# correlation
sapply(1:4, function(x) cor(anscombe[, x], anscombe[, x+4]))
## [1] 0.8164205 0.8162365 0.8162867 0.8165214
# variance
sapply(5:8, function(x) var(anscombe[, x]))
## [1] 4.127269 4.127629 4.122620 4.123249
# linear regression
lm(y1 ~ x1, data = anscombe)
## 
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
## 
## Coefficients:
## (Intercept)           x1  
##      3.0001       0.5001
lm(y2 ~ x2, data = anscombe)
## 
## Call:
## lm(formula = y2 ~ x2, data = anscombe)
## 
## Coefficients:
## (Intercept)           x2  
##       3.001        0.500
lm(y3 ~ x3, data = anscombe)
## 
## Call:
## lm(formula = y3 ~ x3, data = anscombe)
## 
## Coefficients:
## (Intercept)           x3  
##      3.0025       0.4997
lm(y4 ~ x4, data = anscombe)
## 
## Call:
## lm(formula = y4 ~ x4, data = anscombe)
## 
## Coefficients:
## (Intercept)           x4  
##      3.0017       0.4999

3. Plotting the quartet with ggplot2 package


p1 <- ggplot(anscombe) +
    geom_point(aes(x1, y1), color = "darkred", size = 3) +
    #theme_bw() +
    scale_x_continuous(breaks = seq(0, 20, 2)) +
    scale_y_continuous(breaks = seq(0, 12, 2)) +
    geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
    expand_limits(x = 0, y = 0) +
    labs(title = "dataset 1")

p2 <- ggplot(anscombe) +
    geom_point(aes(x2, y2), color = "darkred", size = 3) +
    #theme_bw() +
    scale_x_continuous(breaks = seq(0, 20, 2)) +
    scale_y_continuous(breaks = seq(0, 12, 2)) +
    geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
    expand_limits(x = 0, y = 0) +
    labs(title = "dataset 2")

p3 <- ggplot(anscombe) +
    geom_point(aes(x3, y3), color = "darkred", size = 3) +
    #theme_bw() +
    scale_x_continuous(breaks = seq(0, 20, 2)) +
    scale_y_continuous(breaks = seq(0, 12, 2)) +
    geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
    expand_limits(x = 0, y = 0) +
    labs(title = "dataset 3")

p4 <- ggplot(anscombe) +
    geom_point(aes(x4, y4), color = "darkred", size = 3) +
    #theme_bw() +
    scale_x_continuous(breaks = seq(0, 20, 2)) +
    scale_y_continuous(breaks = seq(0, 12, 2)) +
    geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
    expand_limits(x = 0, y = 0) +
    labs(title = "dataset 4")

p <- list(p1, p2, p3, p4)

do.call(grid.arrange, c(p, list(ncol=2)))