English Statistician Frank Anscombe designed four short datasets with de aim of demonstrate the importance of visualising data and the dangers of reliance on simple summary statistics.
His original paper Graphs in Statistical Analysis can be retrieved from JSTOR website.
x<- rnorm(1000)
x
## [1] 0.699977289 -0.840098382 0.980859776 0.343609445 -0.546704762
## [6] 0.453940018 -0.510453576 -1.593244307 1.402584974 -1.186147658
## [11] -1.121666802 -0.655157488 -0.073966949 1.686959098 -0.098396226
## [16] 0.274593380 1.091434169 -1.068998286 0.941319866 1.328871467
## [21] -0.285040806 -0.419497107 -1.418293833 1.308661120 -0.989357150
## [26] 0.100615545 -0.357478657 0.070409878 1.050988195 -0.575883101
## [31] 0.229709078 0.134120762 -1.526498116 2.308404002 -0.891230543
## [36] 0.409732496 1.301752246 -0.198746292 -0.287630759 1.254659005
## [41] 0.893599565 -0.898583346 -0.032878854 -1.705950631 1.358264685
## [46] 1.015084889 -1.628512117 0.430888949 0.273395701 1.515395059
## [51] -0.642466397 0.148831014 -1.276744897 0.633321601 0.822224435
## [56] -0.380472215 -0.179050586 0.506429917 -1.067348100 -0.449627892
## [61] -0.276520724 -0.018973684 0.135310221 0.917687373 0.964710312
## [66] 0.154263116 -0.820507376 2.234776447 0.021081546 0.688900907
## [71] -0.246062949 0.950410897 -0.225957301 -0.659536490 -2.498542288
## [76] -1.697888354 0.495290821 -0.969800023 -0.580590754 1.897705413
## [81] -1.732961451 -0.593243732 -1.489489454 0.346984674 -2.460092785
## [86] -1.728183671 0.339808274 -1.195802725 -0.434847171 -0.306597187
## [91] -0.802266695 0.231480330 1.206627297 -0.913058369 -0.289451016
## [96] -0.963279597 0.652614597 -1.011417308 -0.708054158 -0.679483875
## [101] -0.559515749 0.157205777 0.604344583 -1.296955537 3.448405319
## [106] 1.042828149 -0.641737574 -0.644760143 1.046355523 -2.062015814
## [111] -0.056944161 0.806307151 0.340620430 1.384957473 0.072015391
## [116] 0.737054189 1.157644793 -1.374397623 -1.193574121 -0.319939591
## [121] 0.598263173 -0.380073095 0.321713852 2.439531015 -1.605432058
## [126] 0.969537409 0.340958411 0.099848122 1.316175678 0.893831618
## [131] 1.022017831 0.615081633 -1.004039886 0.343071411 -0.601800531
## [136] -1.159905323 0.963455120 -0.039858498 1.196122597 -0.441658855
## [141] 0.393053607 1.164158742 1.316991652 -0.407310170 -0.454119410
## [146] -0.979679494 -0.843811856 0.197647197 0.191771912 0.035021143
## [151] 0.816440390 0.736623949 0.876268632 -0.866990606 0.202342940
## [156] 0.318570906 -1.279557856 0.667446023 0.445181050 -0.307179275
## [161] 1.511230327 -1.840249164 3.027644303 -0.946257448 -0.620677713
## [166] -3.034647774 -0.793423500 1.678525184 0.168840867 -0.367970285
## [171] -1.727463832 0.737291101 0.344370947 -0.367146659 -0.589955350
## [176] 1.306742030 -0.020174507 -1.303021540 1.083241695 -1.244637189
## [181] -0.333304564 0.058217109 -1.468557943 1.172734833 1.914603957
## [186] 0.308798481 0.094650700 -1.352561223 0.735030117 -1.811481469
## [191] -0.166768705 1.783378618 1.054764032 0.218547444 -0.406116690
## [196] 0.851041621 1.417443511 0.165592423 -0.349321391 0.773635902
## [201] 0.881101483 -0.160995027 -0.005374032 0.287612107 0.843278544
## [206] -0.324868231 -1.014793799 -0.203163923 0.778663437 1.489189667
## [211] 0.179228562 0.443369972 0.024027936 0.217731452 -0.620703805
## [216] 0.644053356 0.353962853 0.331456127 -0.860478317 0.629207369
## [221] 1.276475371 0.391446005 -1.243298115 -1.064018852 -0.050130331
## [226] 0.709921228 1.310529067 0.662410738 0.651715623 0.843366859
## [231] 0.728267269 0.421326204 0.041286243 1.414875777 -0.701352082
## [236] -0.404737426 1.803643609 0.421745371 -0.972956443 1.894695170
## [241] 0.155264227 -0.937820695 0.287209072 -0.752037927 0.248116358
## [246] 0.606995255 -0.590902496 -0.093428726 -1.313364531 0.794750594
## [251] -1.245808264 0.115493449 0.022108792 -0.419491435 -1.530825271
## [256] 0.124647975 0.460532421 0.560853858 0.229817852 0.394217052
## [261] -0.208686706 -0.451882571 0.517777517 1.116649145 1.813112844
## [266] 0.178081282 -3.302487741 0.488188429 0.789594796 0.082657550
## [271] 0.709480900 -2.074049810 0.075664869 0.710358118 1.385222770
## [276] 1.515784989 0.445461356 1.469159113 1.087193590 -0.883225538
## [281] -0.109419836 -0.113971140 0.961753667 1.714342738 0.385017655
## [286] -1.073625547 -0.572547265 -1.615449539 -0.100977269 0.829479947
## [291] -0.592929429 -0.822048619 -0.453552410 0.082885936 -0.096463443
## [296] -0.405760775 -0.128269066 0.019021721 0.359520069 -1.570087469
## [301] -0.144370055 -1.345125980 -0.337204478 0.439557493 0.658880589
## [306] -1.475866653 0.471047583 -0.435168168 -0.754348437 1.293223019
## [311] -0.439448019 1.544869809 -0.351343856 1.764118944 -0.591561439
## [316] -0.474846910 2.558586627 -0.151703206 1.164387575 -0.978307032
## [321] 0.408789670 1.420501908 -0.181430614 1.213063614 2.414091168
## [326] -3.415147785 -1.285767793 0.768506456 1.820180329 0.269047817
## [331] -1.870736446 -0.960217942 0.560881701 -1.421987256 0.078442486
## [336] -0.603078661 -0.473044092 -0.578238872 1.198682768 -0.048866647
## [341] -0.595527186 0.819163856 0.757455325 -0.080703878 -0.841553880
## [346] 0.802052650 0.942654619 -0.363302338 -2.971856481 0.154368483
## [351] -0.022330540 1.026293038 1.226846238 0.227959137 -2.143683727
## [356] 0.072403181 -0.678319125 1.817374063 0.875896779 -0.174982517
## [361] 0.384725707 -0.031587416 -1.293203818 0.476084103 -0.607029513
## [366] -1.608522847 -0.649341059 0.177247331 1.301404040 -0.834512831
## [371] 0.695062595 -0.846804551 2.237157711 -0.979796437 -0.856474017
## [376] -0.896979328 0.072822049 -1.661918529 0.620651697 -2.060682000
## [381] -1.908784114 0.849173563 0.447463427 0.568802775 -1.264330809
## [386] 0.175589442 -1.780020416 -1.859783024 -1.862828049 -2.101742569
## [391] -0.034763979 0.410642686 0.298087148 1.129392539 -0.447175983
## [396] 0.429276917 0.930694342 -1.610982409 1.204942600 -0.411793407
## [401] 0.333423810 2.169456453 1.166518845 -1.494950312 0.868006766
## [406] 0.410319431 0.293881632 -0.251146660 0.691572021 0.995404555
## [411] -0.582844371 2.917868943 0.380300836 -0.850168113 0.111602779
## [416] 0.681028545 -0.394252257 -2.047035606 -0.065578791 0.665616527
## [421] 1.154171753 -0.955797235 0.738052206 0.894970920 -1.491848475
## [426] -0.801547387 -1.357061762 0.182841714 0.670663193 -0.105401349
## [431] 0.073477895 -0.308976871 0.411790871 0.299457308 0.142768277
## [436] -1.123451308 -0.794723504 -0.265466951 -1.741546843 -1.182731259
## [441] -0.041153696 -0.270565105 -2.364435238 -0.768090242 1.035301209
## [446] 0.067054260 -0.887040859 -0.741057113 0.478705958 -0.692600214
## [451] -0.345952319 1.047647662 -0.280757873 -0.873116801 -0.746647469
## [456] -0.153187599 1.154403171 -0.342632996 -1.411307009 0.578471311
## [461] -1.614229560 0.938144978 -0.895923883 -2.078044488 -0.757509568
## [466] -0.440633508 -1.683632823 -0.279129944 0.058261672 0.201629897
## [471] -0.249105156 0.306763114 -1.205493685 -0.610373109 -1.071242196
## [476] 0.103647739 0.181914804 0.722072979 1.425299688 0.473573135
## [481] 0.199908682 1.005621518 -2.272478285 -0.849922947 -1.429284360
## [486] 0.476275576 0.248063975 0.170827836 1.418306874 -1.442332654
## [491] 0.568269524 0.240808868 1.669405681 0.535621295 0.892294070
## [496] -1.112498133 -1.243351730 -1.644544942 -0.680381542 0.009075171
## [501] 0.378238600 0.928629830 0.718282408 0.563978435 -1.775507150
## [506] 0.211838800 0.192627318 1.369580863 -0.007547020 -0.819277926
## [511] -0.763610544 -1.086272039 -0.460013700 -1.248112142 1.946314652
## [516] -1.065093653 0.007282063 0.105627269 -1.000027909 0.558175427
## [521] -0.521759781 -1.278289327 -0.941305077 0.875681840 -1.157321713
## [526] -0.869917275 0.637919949 0.245877574 1.839430000 0.204313300
## [531] -0.922689395 -2.025411730 -1.893757931 -0.184827601 0.011262577
## [536] -0.941848890 -0.240014425 -1.578232895 -0.812110742 -1.428637595
## [541] 2.778261005 0.366092725 -1.366725647 -0.483061655 -0.603184516
## [546] 0.587798762 -0.283231046 0.862847266 1.147581960 -0.715453511
## [551] 0.165147565 0.787281828 1.181557178 1.088783209 0.105820435
## [556] 1.695414029 -0.354635445 -0.233910277 0.429237861 -0.096391018
## [561] -0.845547117 0.183228910 -0.273078518 -0.054439548 0.260962061
## [566] -0.399081562 0.555559707 -1.357246927 -0.344066076 0.302485232
## [571] 0.221284626 0.750861177 0.165075123 -1.140537863 0.109059226
## [576] -0.229481302 0.080529866 0.342919287 -0.160489328 1.679352202
## [581] -0.142221188 2.605828955 -0.137604334 -0.681565097 -1.095495666
## [586] -0.030343651 0.378512214 0.958788875 1.280067040 1.144590736
## [591] -1.598979554 -0.744373850 1.123665869 -1.427301729 -0.404028965
## [596] -1.346406204 1.401257496 -0.679881907 0.699661757 0.110766922
## [601] 0.323926383 0.093424232 1.272067320 0.194188485 -2.342089839
## [606] 0.364488452 1.129349772 0.037018296 1.371353204 0.806215157
## [611] -0.550405773 1.682893779 0.029960167 0.786464277 1.232654370
## [616] -1.602851117 -0.702582658 -0.733214529 -0.622258366 -1.421811915
## [621] -1.402392152 -1.017460233 0.485809510 -1.082588923 0.931474458
## [626] -0.108636881 -0.459957955 -0.529803012 -1.249162894 -0.138069582
## [631] -0.265827882 -1.191666542 -0.614471693 0.078139974 0.080015541
## [636] -0.358643978 -0.073599530 -0.551134848 -1.017124231 -2.412927561
## [641] 2.127411463 -0.485648753 0.448393495 1.259976639 1.475053845
## [646] 0.422729816 -0.473341287 0.566656637 1.318005055 -0.531242930
## [651] 0.463514775 -0.806744538 0.758056906 0.632215749 -2.120168087
## [656] -0.804816085 -0.080207859 0.128851583 -1.497449021 -0.613791266
## [661] -1.517494312 -0.032269313 -0.060184445 -0.876305527 -0.287318796
## [666] -1.330627557 1.034229468 0.842425361 -0.652180228 0.849657096
## [671] -0.093805813 0.087208190 -1.419832163 0.522217497 0.086829732
## [676] 0.474731267 0.411273605 0.909809299 0.598151783 1.081509836
## [681] -0.818024023 -0.543653919 1.276277867 -1.750291730 -1.516529040
## [686] 0.201659356 -0.482242297 0.129056429 0.001440538 -1.081810044
## [691] -2.684560044 0.748849559 0.715136290 -0.685717864 -0.668848235
## [696] -0.918726315 1.432664909 2.076653174 -0.552355648 -0.018831491
## [701] 0.256856146 -0.703783906 1.526448620 0.129441058 -0.106236272
## [706] -0.360094210 2.319864037 0.744064352 -0.871735167 -0.036731798
## [711] -0.819170107 1.699377194 -0.316553762 -0.303510093 -0.530574910
## [716] 0.671110540 -0.483112487 0.030283039 1.382357643 -1.344673519
## [721] 0.524138008 0.174953070 1.641786051 -0.448653124 -0.792133410
## [726] 0.222296043 0.106072504 1.672751030 -2.799506421 -2.370942083
## [731] -0.511600239 -0.655866947 -0.082326584 0.352006478 -0.003078283
## [736] -0.010692294 -0.079161321 0.717012834 0.618636955 1.295184496
## [741] -0.921609039 -0.087587855 0.351966023 0.272131232 -0.308909132
## [746] 0.553182875 0.416128975 0.137743132 0.800686851 -0.433510718
## [751] 0.922900661 -0.833464006 -2.119007753 -0.559666785 0.261786109
## [756] 0.033343531 0.082016450 -0.480340357 0.673014087 1.424872876
## [761] -0.063607659 0.776007910 -1.517511858 -0.639333893 -1.555534643
## [766] -1.091902227 -0.148932074 1.659477228 0.741298654 -1.137405342
## [771] 0.397129427 0.763599572 -0.616594112 -0.294563063 0.358380164
## [776] 0.390500586 1.227093618 -0.353623311 0.990695758 -1.376523482
## [781] 1.746476420 0.206175654 -0.393191928 -0.249541179 1.020022824
## [786] 0.949221898 -2.195678644 -0.116241112 -2.663248195 -0.456907759
## [791] -0.503827519 -0.490820890 -1.613149844 0.235482971 0.717380558
## [796] -0.839161862 0.637013319 -0.664712081 1.346219453 -0.829625092
## [801] -0.234887691 -0.645143862 0.414463536 -0.659071261 0.770963989
## [806] -1.679990184 0.994678186 -0.420977604 1.610722667 -0.956086987
## [811] -2.086412260 -0.759414618 -0.695711902 1.613115947 -0.190428038
## [816] -1.385819952 -0.396978285 0.342872843 -0.433839211 0.921753872
## [821] -1.718587932 -0.301132337 -1.637573712 -1.169827834 -0.908102856
## [826] -1.236459673 1.914752824 -0.290086739 0.914512573 2.575046981
## [831] 0.065227237 0.854067247 0.894326254 0.686649945 -1.275120500
## [836] 0.695261755 1.987119433 0.629575295 -0.087414054 2.024544269
## [841] -0.474378553 0.756232412 -0.603170980 -0.144266759 -0.403768444
## [846] 0.042432218 1.944419018 1.150314153 -0.043009546 -2.361074898
## [851] 0.828035756 -0.885265772 0.396133185 -0.070347789 0.844973216
## [856] -0.228585936 -1.564384930 -2.168094674 0.024649728 0.053935405
## [861] 0.101233016 -1.013122575 -0.613680788 -0.542240688 -1.555446066
## [866] 0.573186875 0.538937582 0.935955643 -0.727156571 -0.815625015
## [871] -0.999306953 1.043485293 1.721079843 -0.325400536 -0.580514456
## [876] 0.383586370 -0.879616203 0.327754392 1.376527531 -0.703110718
## [881] -2.461022051 0.659719974 0.039987269 -0.134994898 -1.350703268
## [886] 1.087364833 -1.904028679 0.927031304 -1.042421952 0.799589703
## [891] 1.007004664 1.189197510 0.586489653 -1.135921122 1.597183086
## [896] 0.023140712 0.368325813 1.300300141 -0.143448665 -0.225477457
## [901] 0.529161379 -0.822917212 1.156526038 -1.725447423 0.541754553
## [906] -0.191609820 -2.167581897 0.344232958 0.289576091 -0.149269394
## [911] 1.696990350 1.664225298 0.714535748 1.022355446 1.580487505
## [916] 0.699989491 -0.451631818 -1.109535903 1.133309493 -0.172982788
## [921] 1.751559440 1.158280571 -1.393349559 -1.301389623 -1.501173498
## [926] 0.784498759 -1.067279395 1.449727689 -0.345738822 -1.433399325
## [931] -0.209026368 -0.134124208 -0.299254723 2.118694722 -1.507456801
## [936] -1.021227998 -0.662986765 -0.813843742 -0.983807558 -0.946887650
## [941] -1.068560308 0.418947250 -3.152763854 0.064319644 0.826927625
## [946] 1.055467463 0.985417007 0.623409549 -1.992580477 0.441484073
## [951] -0.445032737 -0.631396848 -1.006564970 -0.513999295 1.573925197
## [956] -0.025384905 -1.186939867 0.789673532 -0.270728084 0.007967770
## [961] 0.221576327 -2.090018412 -0.112290371 0.040842861 0.013467587
## [966] 1.486777167 0.506518765 0.403080694 -1.342270996 -1.027115996
## [971] -1.108451169 -1.398056813 0.410236884 -1.189395502 0.749700641
## [976] -0.822734172 0.092024337 0.753830526 0.624274669 -1.122117545
## [981] -0.558370556 -0.043501336 1.590334452 0.056323351 1.954624730
## [986] -0.393956236 -0.607800375 0.930702137 -0.819200476 2.116773401
## [991] 0.858271400 1.459642160 0.604104288 1.991716144 0.094260069
## [996] -0.801750043 -1.418953618 -1.680349599 -0.021479605 -1.329357208
Ahora un histograma:
hist(x)
As with all classic datasets, the quartet is included in the R datasets package. First, load required libraries and data, and visualize them:
library(ggplot2)
library(gridExtra)
data(anscombe)
anscombe
## x1 x2 x3 x4 y1 y2 y3 y4
## 1 10 10 10 8 8.04 9.14 7.46 6.58
## 2 8 8 8 8 6.95 8.14 6.77 5.76
## 3 13 13 13 8 7.58 8.74 12.74 7.71
## 4 9 9 9 8 8.81 8.77 7.11 8.84
## 5 11 11 11 8 8.33 9.26 7.81 8.47
## 6 14 14 14 8 9.96 8.10 8.84 7.04
## 7 6 6 6 8 7.24 6.13 6.08 5.25
## 8 4 4 4 19 4.26 3.10 5.39 12.50
## 9 12 12 12 8 10.84 9.13 8.15 5.56
## 10 7 7 7 8 4.82 7.26 6.42 7.91
## 11 5 5 5 8 5.68 4.74 5.73 6.89
The summary of the four datasets show the similarities between such datasets in terms of the mean:
summary(anscombe)
## x1 x2 x3 x4
## Min. : 4.0 Min. : 4.0 Min. : 4.0 Min. : 8
## 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 6.5 1st Qu.: 8
## Median : 9.0 Median : 9.0 Median : 9.0 Median : 8
## Mean : 9.0 Mean : 9.0 Mean : 9.0 Mean : 9
## 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.:11.5 3rd Qu.: 8
## Max. :14.0 Max. :14.0 Max. :14.0 Max. :19
## y1 y2 y3 y4
## Min. : 4.260 Min. :3.100 Min. : 5.39 Min. : 5.250
## 1st Qu.: 6.315 1st Qu.:6.695 1st Qu.: 6.25 1st Qu.: 6.170
## Median : 7.580 Median :8.140 Median : 7.11 Median : 7.040
## Mean : 7.501 Mean :7.501 Mean : 7.50 Mean : 7.501
## 3rd Qu.: 8.570 3rd Qu.:8.950 3rd Qu.: 7.98 3rd Qu.: 8.190
## Max. :10.840 Max. :9.260 Max. :12.74 Max. :12.500
It is also easy to see the similarities in terms of the variance, correlation coefficient and linear regression:
# correlation
sapply(1:4, function(x) cor(anscombe[, x], anscombe[, x+4]))
## [1] 0.8164205 0.8162365 0.8162867 0.8165214
# variance
sapply(5:8, function(x) var(anscombe[, x]))
## [1] 4.127269 4.127629 4.122620 4.123249
# linear regression
lm(y1 ~ x1, data = anscombe)
##
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
##
## Coefficients:
## (Intercept) x1
## 3.0001 0.5001
lm(y2 ~ x2, data = anscombe)
##
## Call:
## lm(formula = y2 ~ x2, data = anscombe)
##
## Coefficients:
## (Intercept) x2
## 3.001 0.500
lm(y3 ~ x3, data = anscombe)
##
## Call:
## lm(formula = y3 ~ x3, data = anscombe)
##
## Coefficients:
## (Intercept) x3
## 3.0025 0.4997
lm(y4 ~ x4, data = anscombe)
##
## Call:
## lm(formula = y4 ~ x4, data = anscombe)
##
## Coefficients:
## (Intercept) x4
## 3.0017 0.4999
p1 <- ggplot(anscombe) +
geom_point(aes(x1, y1), color = "darkred", size = 3) +
#theme_bw() +
scale_x_continuous(breaks = seq(0, 20, 2)) +
scale_y_continuous(breaks = seq(0, 12, 2)) +
geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
expand_limits(x = 0, y = 0) +
labs(title = "dataset 1")
p2 <- ggplot(anscombe) +
geom_point(aes(x2, y2), color = "darkred", size = 3) +
#theme_bw() +
scale_x_continuous(breaks = seq(0, 20, 2)) +
scale_y_continuous(breaks = seq(0, 12, 2)) +
geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
expand_limits(x = 0, y = 0) +
labs(title = "dataset 2")
p3 <- ggplot(anscombe) +
geom_point(aes(x3, y3), color = "darkred", size = 3) +
#theme_bw() +
scale_x_continuous(breaks = seq(0, 20, 2)) +
scale_y_continuous(breaks = seq(0, 12, 2)) +
geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
expand_limits(x = 0, y = 0) +
labs(title = "dataset 3")
p4 <- ggplot(anscombe) +
geom_point(aes(x4, y4), color = "darkred", size = 3) +
#theme_bw() +
scale_x_continuous(breaks = seq(0, 20, 2)) +
scale_y_continuous(breaks = seq(0, 12, 2)) +
geom_abline(intercept = 3, slope = 0.5, color = "darkblue") +
expand_limits(x = 0, y = 0) +
labs(title = "dataset 4")
p <- list(p1, p2, p3, p4)
do.call(grid.arrange, c(p, list(ncol=2)))