TidyDataUpdatedDataProcessing_tidyrdplyrinR_ MinervaSingh_Udemy
TidyDataUpdatedDataProcessing_tidyrdplyrinR_ MinervaSingh_Udemy
Source file ⇒ TidyDataUpdatedDataProcessing_tidyrdplyrinR.rmd
Read in Data From Different Sources
setwd("C:/Users/HP/Desktop/TidyDataUpdatedData Processing_tidyrdplyrinR_MinervaSingh_Udemy/rfiles")
#read in the CSV data UCL website:
#https://archive.ics.uci.edu/ml/datasets/Wine+Quality
winer1=read.csv("winequality-red.csv",header=T)
#header= T will read in column names as well
head(winer1)## fixed.acidity.volatile.acidity.citric.acid.residual.sugar.chlorides.free.sulfur.dioxide.total.sulfur.dioxide.density.pH.sulphates.alcohol.quality
## 1 7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5
## 2 7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5
## 3 7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;9.8;5
## 4 11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58;9.8;6
## 5 7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5
## 6 7.4;0.66;0;1.8;0.075;13;40;0.9978;3.51;0.56;9.4;5
## fixed.acidity.volatile.acidity.citric.acid.residual.sugar.chlorides.free.sulfur.dioxide.total.sulfur.dioxide.density.pH.sulphates.alcohol.quality
## 6.7;0.46;0.24;1.7;0.077;18;34;0.9948;3.39;0.6;10.6;6 : 4
## 7.2;0.36;0.46;2.1;0.074;24;44;0.99534;3.4;0.85;11;7 : 4
## 7.2;0.695;0.13;2;0.076;12;20;0.99546;3.29;0.54;10.1;5 : 4
## 7.5;0.51;0.02;1.7;0.084;13;31;0.99538;3.36;0.54;10.5;6: 4
## 11.5;0.18;0.51;4;0.104;4;23;0.9996;3.28;0.97;10.1;6 : 3
## 6.4;0.64;0.21;1.8;0.081;14;31;0.99689;3.59;0.66;9.8;5 : 3
## (Other) :1577
winer1=read.csv("winequality-red.csv",header=T,sep=",")
#header= T will read in column names as well
head(winer1)## fixed.acidity.volatile.acidity.citric.acid.residual.sugar.chlorides.free.sulfur.dioxide.total.sulfur.dioxide.density.pH.sulphates.alcohol.quality
## 1 7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5
## 2 7.8;0.88;0;2.6;0.098;25;67;0.9968;3.2;0.68;9.8;5
## 3 7.8;0.76;0.04;2.3;0.092;15;54;0.997;3.26;0.65;9.8;5
## 4 11.2;0.28;0.56;1.9;0.075;17;60;0.998;3.16;0.58;9.8;6
## 5 7.4;0.7;0;1.9;0.076;11;34;0.9978;3.51;0.56;9.4;5
## 6 7.4;0.66;0;1.8;0.075;13;40;0.9978;3.51;0.56;9.4;5
## fixed.acidity.volatile.acidity.citric.acid.residual.sugar.chlorides.free.sulfur.dioxide.total.sulfur.dioxide.density.pH.sulphates.alcohol.quality
## 6.7;0.46;0.24;1.7;0.077;18;34;0.9948;3.39;0.6;10.6;6 : 4
## 7.2;0.36;0.46;2.1;0.074;24;44;0.99534;3.4;0.85;11;7 : 4
## 7.2;0.695;0.13;2;0.076;12;20;0.99546;3.29;0.54;10.1;5 : 4
## 7.5;0.51;0.02;1.7;0.084;13;31;0.99538;3.36;0.54;10.5;6: 4
## 11.5;0.18;0.51;4;0.104;4;23;0.9996;3.28;0.97;10.1;6 : 3
## 6.4;0.64;0.21;1.8;0.081;14;31;0.99689;3.59;0.66;9.8;5 : 3
## (Other) :1577
#specify the correct seperator
winer=read.table("winequality-red.csv",header=T,sep=";")
#header= T will read in column names as well
head(winer)## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 11 34 1 3.5 0.56 9.4
## 2 25 67 1 3.2 0.68 9.8
## 3 15 54 1 3.3 0.65 9.8
## 4 17 60 1 3.2 0.58 9.8
## 5 11 34 1 3.5 0.56 9.4
## 6 13 40 1 3.5 0.56 9.4
## quality
## 1 5
## 2 5
## 3 5
## 4 6
## 5 5
## 6 5
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.6 Min. :0.12 Min. :0.00 Min. : 0.9
## 1st Qu.: 7.1 1st Qu.:0.39 1st Qu.:0.09 1st Qu.: 1.9
## Median : 7.9 Median :0.52 Median :0.26 Median : 2.2
## Mean : 8.3 Mean :0.53 Mean :0.27 Mean : 2.5
## 3rd Qu.: 9.2 3rd Qu.:0.64 3rd Qu.:0.42 3rd Qu.: 2.6
## Max. :15.9 Max. :1.58 Max. :1.00 Max. :15.5
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.01 Min. : 1 Min. : 6 Min. :0.99
## 1st Qu.:0.07 1st Qu.: 7 1st Qu.: 22 1st Qu.:1.00
## Median :0.08 Median :14 Median : 38 Median :1.00
## Mean :0.09 Mean :16 Mean : 46 Mean :1.00
## 3rd Qu.:0.09 3rd Qu.:21 3rd Qu.: 62 3rd Qu.:1.00
## Max. :0.61 Max. :72 Max. :289 Max. :1.00
## pH sulphates alcohol quality
## Min. :2.7 Min. :0.33 Min. : 8.4 Min. :3.0
## 1st Qu.:3.2 1st Qu.:0.55 1st Qu.: 9.5 1st Qu.:5.0
## Median :3.3 Median :0.62 Median :10.2 Median :6.0
## Mean :3.3 Mean :0.66 Mean :10.4 Mean :5.6
## 3rd Qu.:3.4 3rd Qu.:0.73 3rd Qu.:11.1 3rd Qu.:6.0
## Max. :4.0 Max. :2.00 Max. :14.9 Max. :8.0
##Read in excel data
#excel
#summary(boston1)
library(readxl)
dfb <- read_excel("boston1.xls")
head(dfb)## # A tibble: 6 x 10
## MV INDUS NOX RM TAX PT LSTAT X__1 X__2 X__3
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <lgl> <chr>
## 1 24 2.31 53.8 6.58 296 15.3 4.98 NA NA Subset of Boston h~
## 2 21.6 7.07 46.9 6.42 242 17.8 9.14 NA NA data of Harrison a~
## 3 34.7 7.07 46.9 7.18 242 17.8 4.03 NA NA (1978). Each case~
## 4 33.4 2.18 45.8 7.00 222 18.7 2.94 NA NA Census tract in th~
## 5 36.2 2.18 45.8 7.15 222 18.7 5.33 NA NA <NA>
## 6 28.7 2.18 45.8 6.43 222 18.7 5.21 NA NA <NA>
## MV INDUS NOX RM TAX
## Min. : 5 Min. : 0.5 Min. :38 Min. :3.6 Min. :187
## 1st Qu.:17 1st Qu.: 5.2 1st Qu.:45 1st Qu.:5.9 1st Qu.:279
## Median :21 Median : 9.7 Median :54 Median :6.2 Median :330
## Mean :23 Mean :11.1 Mean :55 Mean :6.3 Mean :408
## 3rd Qu.:25 3rd Qu.:18.1 3rd Qu.:62 3rd Qu.:6.6 3rd Qu.:666
## Max. :50 Max. :27.7 Max. :87 Max. :8.8 Max. :711
## PT LSTAT X__1 X__2
## Min. :12.6 Min. : 2 Mode:logical Mode:logical
## 1st Qu.:17.4 1st Qu.: 7 NA's:506 NA's:506
## Median :19.1 Median :11
## Mean :18.5 Mean :13
## 3rd Qu.:20.2 3rd Qu.:17
## Max. :22.0 Max. :38
## X__3
## Length:506
## Class :character
## Mode :character
##
##
##
##################################################################
### Read in data from Wikipedia HTML tables
library(rvest)
#Summer olympics medal tally
url <- "https://en.wikipedia.org/wiki/2016_Summer_Olympics_medal_table"
medal_tally <- url %>% read_html() %>%
html_nodes(xpath='//*[@id="mw-content-text"]/div/table[2]') %>% html_table(fill=TRUE)
## copy xpath from
### Go the table at Wikipedia--->rt click-->inspect--->go to html code on the right----> select the table class tag which will highlight the table in the webpage---> rtclick---> copy xpath
## //*[@id="mw-content-text"]/div/table[2]
medal_tally <- medal_tally[[1]]
head(medal_tally)## Rank NOC Gold Silver Bronze Total
## 1 1 United States (USA) 46 37 38 121
## 2 2 Great Britain (GBR) 27 23 17 67
## 3 3 China (CHN) 26 18 26 70
## 4 4 Russia (RUS) 19 17 20 56
## 5 5 Germany (GER) 17 10 15 42
## 6 6 Japan (JPN) 12 8 21 41
#WHS Sites in the UK
url2="https://en.wikipedia.org/wiki/List_of_World_Heritage_Sites_in_the_United_Kingdom_and_the_British_Overseas_Territories"
whsuk <- url2 %>% read_html() %>%
html_nodes(xpath='//*[@id="mw-content-text"]/div/table[3]') %>% html_table(fill=TRUE)
whsuk <- whsuk[[1]]
head(whsuk)## Name Image
## 1 Blaenavon Industrial Landscape NA
## 2 Blenheim Palace NA
## 3 Canterbury Cathedral, St Augustine's Abbey, and St Martin's Church NA
## 4 Castles and Town Walls of King Edward in Gwynedd NA
## 5 City of Bath NA
## 6 Cornwall and West Devon Mining Landscape NA
## Location
## 1 Blaenavon, Wales51°47'N 3°05'W<U+FEFF> / <U+FEFF>51.78°N 3.08°W<U+FEFF> / 51.78; -3.08<U+FEFF> (Blaenavon Industrial Landscape)[14]
## 2 Woodstock, Oxfordshire, England51°50'28<U+2033>N 1°21'40<U+2033>W<U+FEFF> / <U+FEFF>51.841°N 1.361°W<U+FEFF> / 51.841; -1.361<U+FEFF> (Blenheim Palace)[15]
## 3 Canterbury, Kent, England51°17'N 1°05'E<U+FEFF> / <U+FEFF>51.28°N 1.08°E<U+FEFF> / 51.28; 1.08<U+FEFF> (Canterbury Cathedral, St Augustine's Abbey, and St Martin's Church)[16]
## 4 Conwy, Isle of Anglesey and Gwynedd, Wales53°08'20<U+2033>N 4°16'34<U+2033>W<U+FEFF> / <U+FEFF>53.139°N 4.276°W<U+FEFF> / 53.139; -4.276<U+FEFF> (Castles and Town Walls of King Edward in Gwynedd)[19]
## 5 Bath, Somerset, England51°22'48<U+2033>N 2°21'36<U+2033>W<U+FEFF> / <U+FEFF>51.380°N 2.360°W<U+FEFF> / 51.380; -2.360<U+FEFF> (City of Bath)[21]
## 6 Cornwall and Devon, England50°08'N 5°23'W<U+FEFF> / <U+FEFF>50.13°N 5.38°W<U+FEFF> / 50.13; -5.38<U+FEFF> (Cornwall and West Devon Mining Landscape)[22]
## Date UNESCO data
## 1 19th century[14] 984; 2000;iii, iv[14]
## 2 17051722[15] 425; 1987;ii, iv[15]
## 3 11th century[16] 496; 1988;i, ii, vi[16]
## 4 13th14th centuries[19] 374; 1986;i, iii, iv[19]
## 5 1st19th centuries[21] 428; 1987;i, ii, iv[21]
## 6 18th and 19th centuries[22] 1,215; 2006;ii, iii, iv[22]
## Description
## 1 In the 19th century, Wales was the world's foremost producer of iron and coal. Blaenavon is an example of the landscape created by the industrial processes associated with the production of these materials. The site includes quarries, public buildings, workers' housing, and a railway.[14]
## 2 Blenheim Palace, the residence of John Churchill, 1st Duke of Marlborough, was designed by architects John Vanbrugh and Nicholas Hawksmoor. The associated park was landscaped by Capability Brown. The palace celebrated victory over the French and is significant for establishing English Romantic Architecture as a separate entity from French Classical Architecture.[15]
## 3 St Martin's Church is the oldest church in England. The church and St Augustine's Abbey were founded during the early stages of the introduction of Christianity to the Anglo-Saxons. The cathedral exhibits Romanesque and Gothic architecture, and is the seat of the Church of England.[16][17][18]
## 4 During the reign of Edward I of England (12721307), a series of castles was constructed in Wales with the purpose of subduing the population and establishing English colonies in Wales. The World Heritage Site covers many castles including Beaumaris, Caernarfon, Conwy, and Harlech. The castles of Edward I are considered the pinnacle of military architecture by military historians.[19][20]
## 5 Founded by the Romans as a spa, an important centre of the wool industry in the medieval period, and a spa town in the 18th century, Bath has a varied history. The city is preserved for its Roman remains and Palladian architecture.[21]
## 6 Tin and copper mining in Devon and Cornwall boomed in the 18th and 19th centuries, and at its peak the area produced two-thirds of the world's copper. The techniques and technology involved in deep mining developed in Devon and Cornwall were used around the world.[22]
##################################################
#####Extract data from SQLite database
## Database is a collection of tables
library(RSQLite)
library(DBI)
db = RSQLite::datasetsDb()
dbListTables(db) #all the tables in the database## [1] "BOD" "CO2" "ChickWeight"
## [4] "DNase" "Formaldehyde" "Indometh"
## [7] "InsectSprays" "LifeCycleSavings" "Loblolly"
## [10] "Orange" "OrchardSprays" "PlantGrowth"
## [13] "Puromycin" "Theoph" "ToothGrowth"
## [16] "USArrests" "USJudgeRatings" "airquality"
## [19] "anscombe" "attenu" "attitude"
## [22] "cars" "chickwts" "esoph"
## [25] "faithful" "freeny" "infert"
## [28] "iris" "longley" "morley"
## [31] "mtcars" "npk" "pressure"
## [34] "quakes" "randu" "rock"
## [37] "sleep" "stackloss" "swiss"
## [40] "trees" "warpbreaks" "women"
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn1 Quebec nonchilled 175 30.4
## 3 Qn1 Quebec nonchilled 250 34.8
## 4 Qn1 Quebec nonchilled 350 37.2
## 5 Qn1 Quebec nonchilled 500 35.3
## 6 Qn1 Quebec nonchilled 675 39.2
## 7 Qn1 Quebec nonchilled 1000 39.7
## 8 Qn2 Quebec nonchilled 95 13.6
## 9 Qn2 Quebec nonchilled 175 27.3
## 10 Qn2 Quebec nonchilled 250 37.1
## 11 Qn2 Quebec nonchilled 350 41.8
## 12 Qn2 Quebec nonchilled 500 40.6
## 13 Qn2 Quebec nonchilled 675 41.4
## 14 Qn2 Quebec nonchilled 1000 44.3
## 15 Qn3 Quebec nonchilled 95 16.2
## 16 Qn3 Quebec nonchilled 175 32.4
## 17 Qn3 Quebec nonchilled 250 40.3
## 18 Qn3 Quebec nonchilled 350 42.1
## 19 Qn3 Quebec nonchilled 500 42.9
## 20 Qn3 Quebec nonchilled 675 43.9
## 21 Qn3 Quebec nonchilled 1000 45.5
## 22 Qc1 Quebec chilled 95 14.2
## 23 Qc1 Quebec chilled 175 24.1
## 24 Qc1 Quebec chilled 250 30.3
## 25 Qc1 Quebec chilled 350 34.6
## 26 Qc1 Quebec chilled 500 32.5
## 27 Qc1 Quebec chilled 675 35.4
## 28 Qc1 Quebec chilled 1000 38.7
## 29 Qc2 Quebec chilled 95 9.3
## 30 Qc2 Quebec chilled 175 27.3
## 31 Qc2 Quebec chilled 250 35.0
## 32 Qc2 Quebec chilled 350 38.8
## 33 Qc2 Quebec chilled 500 38.6
## 34 Qc2 Quebec chilled 675 37.5
## 35 Qc2 Quebec chilled 1000 42.4
## 36 Qc3 Quebec chilled 95 15.1
## 37 Qc3 Quebec chilled 175 21.0
## 38 Qc3 Quebec chilled 250 38.1
## 39 Qc3 Quebec chilled 350 34.0
## 40 Qc3 Quebec chilled 500 38.9
## 41 Qc3 Quebec chilled 675 39.6
## 42 Qc3 Quebec chilled 1000 41.4
## 43 Mn1 Mississippi nonchilled 95 10.6
## 44 Mn1 Mississippi nonchilled 175 19.2
## 45 Mn1 Mississippi nonchilled 250 26.2
## 46 Mn1 Mississippi nonchilled 350 30.0
## 47 Mn1 Mississippi nonchilled 500 30.9
## 48 Mn1 Mississippi nonchilled 675 32.4
## 49 Mn1 Mississippi nonchilled 1000 35.5
## 50 Mn2 Mississippi nonchilled 95 12.0
## 51 Mn2 Mississippi nonchilled 175 22.0
## 52 Mn2 Mississippi nonchilled 250 30.6
## 53 Mn2 Mississippi nonchilled 350 31.8
## 54 Mn2 Mississippi nonchilled 500 32.4
## 55 Mn2 Mississippi nonchilled 675 31.1
## 56 Mn2 Mississippi nonchilled 1000 31.5
## 57 Mn3 Mississippi nonchilled 95 11.3
## 58 Mn3 Mississippi nonchilled 175 19.4
## 59 Mn3 Mississippi nonchilled 250 25.8
## 60 Mn3 Mississippi nonchilled 350 27.9
## 61 Mn3 Mississippi nonchilled 500 28.5
## 62 Mn3 Mississippi nonchilled 675 28.1
## 63 Mn3 Mississippi nonchilled 1000 27.8
## 64 Mc1 Mississippi chilled 95 10.5
## 65 Mc1 Mississippi chilled 175 14.9
## 66 Mc1 Mississippi chilled 250 18.1
## 67 Mc1 Mississippi chilled 350 18.9
## 68 Mc1 Mississippi chilled 500 19.5
## 69 Mc1 Mississippi chilled 675 22.2
## 70 Mc1 Mississippi chilled 1000 21.9
## 71 Mc2 Mississippi chilled 95 7.7
## 72 Mc2 Mississippi chilled 175 11.4
## 73 Mc2 Mississippi chilled 250 12.3
## 74 Mc2 Mississippi chilled 350 13.0
## 75 Mc2 Mississippi chilled 500 12.5
## 76 Mc2 Mississippi chilled 675 13.7
## 77 Mc2 Mississippi chilled 1000 14.4
## 78 Mc3 Mississippi chilled 95 10.6
## 79 Mc3 Mississippi chilled 175 18.0
## 80 Mc3 Mississippi chilled 250 17.9
## 81 Mc3 Mississippi chilled 350 17.9
## 82 Mc3 Mississippi chilled 500 17.9
## 83 Mc3 Mississippi chilled 675 18.9
## 84 Mc3 Mississippi chilled 1000 19.9
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn2 Quebec nonchilled 95 13.6
## 3 Qn3 Quebec nonchilled 95 16.2
## 4 Qc1 Quebec chilled 95 14.2
## 5 Qc2 Quebec chilled 95 9.3
## 6 Qc3 Quebec chilled 95 15.1
## 7 Mn1 Mississippi nonchilled 95 10.6
## 8 Mn2 Mississippi nonchilled 95 12.0
## 9 Mn3 Mississippi nonchilled 95 11.3
## 10 Mc1 Mississippi chilled 95 10.5
## 11 Mc2 Mississippi chilled 95 7.7
## 12 Mc3 Mississippi chilled 95 10.6
dbDisconnect(db)
## Amazon food reviews
setwd("C:\\Users\\HP\\Desktop\\TidyDataUpdatedData Processing_tidyrdplyrinR_MinervaSingh_Udemy\\section2_\\database.sqlite")
db = dbConnect(dbDriver("SQLite"),"database.sqlite")
## database.sqlite is where our data are stored
## connect to this using dbConnect
alltables = dbListTables(db)
head(alltables)## [1] "Reviews"
## Id ProductId UserId ProfileName
## 1 1 B001E4KFG0 A3SGXH7AUHU8GW delmartian
## 2 2 B00813GRG4 A1D87F6ZCVE5NK dll pa
## 3 3 B000LQOCH0 ABXLMWJIXXAIN Natalia Corres "Natalia Corres"
## 4 4 B000UA0QIQ A395BORC6FGVXV Karl
## 5 5 B006K2ZZ7K A1UQRSCLF8GW1T Michael D. Bigham "M. Wassir"
## 6 6 B006K2ZZ7K ADT0SRK1MGOEU Twoapennything
## HelpfulnessNumerator HelpfulnessDenominator Score Time
## 1 1 1 5 1303862400
## 2 0 0 1 1346976000
## 3 1 1 4 1219017600
## 4 3 3 2 1307923200
## 5 0 0 5 1350777600
## 6 0 0 4 1342051200
## Summary
## 1 Good Quality Dog Food
## 2 Not as Advertised
## 3 "Delight" says it all
## 4 Cough Medicine
## 5 Great taffy
## 6 Nice Taffy
## Text
## 1 I have bought several of the Vitality canned dog food products and have found them all to be of good quality. The product looks more like a stew than a processed meat and it smells better. My Labrador is finicky and she appreciates this product better than most.
## 2 Product arrived labeled as Jumbo Salted Peanuts...the peanuts were actually small sized unsalted. Not sure if this was an error or if the vendor intended to represent the product as "Jumbo".
## 3 This is a confection that has been around a few centuries. It is a light, pillowy citrus gelatin with nuts - in this case Filberts. And it is cut into tiny squares and then liberally coated with powdered sugar. And it is a tiny mouthful of heaven. Not too chewy, and very flavorful. I highly recommend this yummy treat. If you are familiar with the story of C.S. Lewis' "The Lion, The Witch, and The Wardrobe" - this is the treat that seduces Edmund into selling out his Brother and Sisters to the Witch.
## 4 If you are looking for the secret ingredient in Robitussin I believe I have found it. I got this in addition to the Root Beer Extract I ordered (which was good) and made some cherry soda. The flavor is very medicinal.
## 5 Great taffy at a great price. There was a wide assortment of yummy taffy. Delivery was very quick. If your a taffy lover, this is a deal.
## 6 I got a wild hair for taffy and ordered this five pound bag. The taffy was all very enjoyable with many flavors: watermelon, root beer, melon, peppermint, grape, etc. My only complaint is there was a bit too much red/black licorice-flavored pieces (just not my particular favorites). Between me, my kids, and my husband, this lasted only two weeks! I would recommend this brand of taffy -- it was a delightful treat.
##ipl database
setwd("C:\\Users\\HP\\Desktop\\TidyDataUpdatedData Processing_tidyrdplyrinR_MinervaSingh_Udemy\\section2_\\ipl\\database.sqlite")
db = dbConnect(dbDriver("SQLite"),"database.sqlite")
## database.sqlite is where our data are stored
## connect to this using dbConnect
alltables = dbListTables(db)
head(alltables)## [1] "Ball_by_Ball" "Batsman_Scored" "Batting_Style" "Bowling_Style"
## [5] "City" "Country"
## Country_Id Country_Name
## 1 1 India
## 2 2 South Africa
## 3 3 U.A.E
## 4 4 New Zealand
## 5 5 Australia
## 6 6 Pakistan
## 7 7 Sri Lanka
## 8 8 West Indies
## 9 9 Zimbabwea
## 10 10 England
## 11 11 Bangladesh
## 12 12 Netherlands
## Match_Id Over_Id Ball_Id Runs_Scored Innings_No
## 1 335987 1 1 0 1
## 2 335987 1 1 1 2
## 3 335987 1 2 0 1
## 4 335987 1 3 0 2
## 5 335987 1 4 0 1
## 6 335987 1 4 1 2
## Match_Id Over_Id Ball_Id Runs_Scored Innings_No
## Min. :335987 Min. : 1.0 Min. :1.0 Min. :0.0 Min. :1.0
## 1st Qu.:419141 1st Qu.: 5.0 1st Qu.:2.0 1st Qu.:0.0 1st Qu.:1.0
## Median :548354 Median :10.0 Median :4.0 Median :1.0 Median :1.0
## Mean :590912 Mean :10.2 Mean :3.6 Mean :1.2 Mean :1.5
## 3rd Qu.:734000 3rd Qu.:15.0 3rd Qu.:5.0 3rd Qu.:1.0 3rd Qu.:2.0
## Max. :981024 Max. :20.0 Max. :9.0 Max. :6.0 Max. :4.0
## 'data.frame': 133097 obs. of 5 variables:
## $ Match_Id : int 335987 335987 335987 335987 335987 335987 335987 335987 335987 335987 ...
## $ Over_Id : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Ball_Id : int 1 1 2 3 4 4 5 5 6 6 ...
## $ Runs_Scored: int 0 1 0 0 0 1 0 1 0 0 ...
## $ Innings_No : int 1 2 1 2 1 2 1 2 1 2 ...
dbDisconnect(db)
###############################
### JSON-->Javascript Object Notation
setwd("C:/Users/HP/Desktop/TidyDataUpdatedData Processing_tidyrdplyrinR_MinervaSingh_Udemy/rfiles")
library(rjson)
#name/url of json file
json_file <- "http://api.worldbank.org/country?per_page=10®ion=OED&lendingtype=LNX&format=json"
#json data is stored in json_data
json_data <- fromJSON(file=json_file)
#you can see that this json file has two objects in the outer most list
json_data[[1]]## $page
## [1] 1
##
## $pages
## [1] 4
##
## $per_page
## [1] "10"
##
## $total
## [1] 32
## [[1]]
## [[1]]$id
## [1] "AUS"
##
## [[1]]$iso2Code
## [1] "AU"
##
## [[1]]$name
## [1] "Australia"
##
## [[1]]$region
## [[1]]$region$id
## [1] "EAS"
##
## [[1]]$region$value
## [1] "East Asia & Pacific"
##
##
## [[1]]$adminregion
## [[1]]$adminregion$id
## [1] ""
##
## [[1]]$adminregion$value
## [1] ""
##
##
## [[1]]$incomeLevel
## [[1]]$incomeLevel$id
## [1] "HIC"
##
## [[1]]$incomeLevel$value
## [1] "High income"
##
##
## [[1]]$lendingType
## [[1]]$lendingType$id
## [1] "LNX"
##
## [[1]]$lendingType$value
## [1] "Not classified"
##
##
## [[1]]$capitalCity
## [1] "Canberra"
##
## [[1]]$longitude
## [1] "149.129"
##
## [[1]]$latitude
## [1] "-35.282"
##
##
## [[2]]
## [[2]]$id
## [1] "AUT"
##
## [[2]]$iso2Code
## [1] "AT"
##
## [[2]]$name
## [1] "Austria"
##
## [[2]]$region
## [[2]]$region$id
## [1] "ECS"
##
## [[2]]$region$value
## [1] "Europe & Central Asia"
##
##
## [[2]]$adminregion
## [[2]]$adminregion$id
## [1] ""
##
## [[2]]$adminregion$value
## [1] ""
##
##
## [[2]]$incomeLevel
## [[2]]$incomeLevel$id
## [1] "HIC"
##
## [[2]]$incomeLevel$value
## [1] "High income"
##
##
## [[2]]$lendingType
## [[2]]$lendingType$id
## [1] "LNX"
##
## [[2]]$lendingType$value
## [1] "Not classified"
##
##
## [[2]]$capitalCity
## [1] "Vienna"
##
## [[2]]$longitude
## [1] "16.3798"
##
## [[2]]$latitude
## [1] "48.2201"
##
##
## [[3]]
## [[3]]$id
## [1] "BEL"
##
## [[3]]$iso2Code
## [1] "BE"
##
## [[3]]$name
## [1] "Belgium"
##
## [[3]]$region
## [[3]]$region$id
## [1] "ECS"
##
## [[3]]$region$value
## [1] "Europe & Central Asia"
##
##
## [[3]]$adminregion
## [[3]]$adminregion$id
## [1] ""
##
## [[3]]$adminregion$value
## [1] ""
##
##
## [[3]]$incomeLevel
## [[3]]$incomeLevel$id
## [1] "HIC"
##
## [[3]]$incomeLevel$value
## [1] "High income"
##
##
## [[3]]$lendingType
## [[3]]$lendingType$id
## [1] "LNX"
##
## [[3]]$lendingType$value
## [1] "Not classified"
##
##
## [[3]]$capitalCity
## [1] "Brussels"
##
## [[3]]$longitude
## [1] "4.36761"
##
## [[3]]$latitude
## [1] "50.8371"
##
##
## [[4]]
## [[4]]$id
## [1] "CAN"
##
## [[4]]$iso2Code
## [1] "CA"
##
## [[4]]$name
## [1] "Canada"
##
## [[4]]$region
## [[4]]$region$id
## [1] "NAC"
##
## [[4]]$region$value
## [1] "North America"
##
##
## [[4]]$adminregion
## [[4]]$adminregion$id
## [1] ""
##
## [[4]]$adminregion$value
## [1] ""
##
##
## [[4]]$incomeLevel
## [[4]]$incomeLevel$id
## [1] "HIC"
##
## [[4]]$incomeLevel$value
## [1] "High income"
##
##
## [[4]]$lendingType
## [[4]]$lendingType$id
## [1] "LNX"
##
## [[4]]$lendingType$value
## [1] "Not classified"
##
##
## [[4]]$capitalCity
## [1] "Ottawa"
##
## [[4]]$longitude
## [1] "-75.6919"
##
## [[4]]$latitude
## [1] "45.4215"
##
##
## [[5]]
## [[5]]$id
## [1] "CHE"
##
## [[5]]$iso2Code
## [1] "CH"
##
## [[5]]$name
## [1] "Switzerland"
##
## [[5]]$region
## [[5]]$region$id
## [1] "ECS"
##
## [[5]]$region$value
## [1] "Europe & Central Asia"
##
##
## [[5]]$adminregion
## [[5]]$adminregion$id
## [1] ""
##
## [[5]]$adminregion$value
## [1] ""
##
##
## [[5]]$incomeLevel
## [[5]]$incomeLevel$id
## [1] "HIC"
##
## [[5]]$incomeLevel$value
## [1] "High income"
##
##
## [[5]]$lendingType
## [[5]]$lendingType$id
## [1] "LNX"
##
## [[5]]$lendingType$value
## [1] "Not classified"
##
##
## [[5]]$capitalCity
## [1] "Bern"
##
## [[5]]$longitude
## [1] "7.44821"
##
## [[5]]$latitude
## [1] "46.948"
##
##
## [[6]]
## [[6]]$id
## [1] "CZE"
##
## [[6]]$iso2Code
## [1] "CZ"
##
## [[6]]$name
## [1] "Czech Republic"
##
## [[6]]$region
## [[6]]$region$id
## [1] "ECS"
##
## [[6]]$region$value
## [1] "Europe & Central Asia"
##
##
## [[6]]$adminregion
## [[6]]$adminregion$id
## [1] ""
##
## [[6]]$adminregion$value
## [1] ""
##
##
## [[6]]$incomeLevel
## [[6]]$incomeLevel$id
## [1] "HIC"
##
## [[6]]$incomeLevel$value
## [1] "High income"
##
##
## [[6]]$lendingType
## [[6]]$lendingType$id
## [1] "LNX"
##
## [[6]]$lendingType$value
## [1] "Not classified"
##
##
## [[6]]$capitalCity
## [1] "Prague"
##
## [[6]]$longitude
## [1] "14.4205"
##
## [[6]]$latitude
## [1] "50.0878"
##
##
## [[7]]
## [[7]]$id
## [1] "DEU"
##
## [[7]]$iso2Code
## [1] "DE"
##
## [[7]]$name
## [1] "Germany"
##
## [[7]]$region
## [[7]]$region$id
## [1] "ECS"
##
## [[7]]$region$value
## [1] "Europe & Central Asia"
##
##
## [[7]]$adminregion
## [[7]]$adminregion$id
## [1] ""
##
## [[7]]$adminregion$value
## [1] ""
##
##
## [[7]]$incomeLevel
## [[7]]$incomeLevel$id
## [1] "HIC"
##
## [[7]]$incomeLevel$value
## [1] "High income"
##
##
## [[7]]$lendingType
## [[7]]$lendingType$id
## [1] "LNX"
##
## [[7]]$lendingType$value
## [1] "Not classified"
##
##
## [[7]]$capitalCity
## [1] "Berlin"
##
## [[7]]$longitude
## [1] "13.4115"
##
## [[7]]$latitude
## [1] "52.5235"
##
##
## [[8]]
## [[8]]$id
## [1] "DNK"
##
## [[8]]$iso2Code
## [1] "DK"
##
## [[8]]$name
## [1] "Denmark"
##
## [[8]]$region
## [[8]]$region$id
## [1] "ECS"
##
## [[8]]$region$value
## [1] "Europe & Central Asia"
##
##
## [[8]]$adminregion
## [[8]]$adminregion$id
## [1] ""
##
## [[8]]$adminregion$value
## [1] ""
##
##
## [[8]]$incomeLevel
## [[8]]$incomeLevel$id
## [1] "HIC"
##
## [[8]]$incomeLevel$value
## [1] "High income"
##
##
## [[8]]$lendingType
## [[8]]$lendingType$id
## [1] "LNX"
##
## [[8]]$lendingType$value
## [1] "Not classified"
##
##
## [[8]]$capitalCity
## [1] "Copenhagen"
##
## [[8]]$longitude
## [1] "12.5681"
##
## [[8]]$latitude
## [1] "55.6763"
##
##
## [[9]]
## [[9]]$id
## [1] "ESP"
##
## [[9]]$iso2Code
## [1] "ES"
##
## [[9]]$name
## [1] "Spain"
##
## [[9]]$region
## [[9]]$region$id
## [1] "ECS"
##
## [[9]]$region$value
## [1] "Europe & Central Asia"
##
##
## [[9]]$adminregion
## [[9]]$adminregion$id
## [1] ""
##
## [[9]]$adminregion$value
## [1] ""
##
##
## [[9]]$incomeLevel
## [[9]]$incomeLevel$id
## [1] "HIC"
##
## [[9]]$incomeLevel$value
## [1] "High income"
##
##
## [[9]]$lendingType
## [[9]]$lendingType$id
## [1] "LNX"
##
## [[9]]$lendingType$value
## [1] "Not classified"
##
##
## [[9]]$capitalCity
## [1] "Madrid"
##
## [[9]]$longitude
## [1] "-3.70327"
##
## [[9]]$latitude
## [1] "40.4167"
##
##
## [[10]]
## [[10]]$id
## [1] "EST"
##
## [[10]]$iso2Code
## [1] "EE"
##
## [[10]]$name
## [1] "Estonia"
##
## [[10]]$region
## [[10]]$region$id
## [1] "ECS"
##
## [[10]]$region$value
## [1] "Europe & Central Asia"
##
##
## [[10]]$adminregion
## [[10]]$adminregion$id
## [1] ""
##
## [[10]]$adminregion$value
## [1] ""
##
##
## [[10]]$incomeLevel
## [[10]]$incomeLevel$id
## [1] "HIC"
##
## [[10]]$incomeLevel$value
## [1] "High income"
##
##
## [[10]]$lendingType
## [[10]]$lendingType$id
## [1] "LNX"
##
## [[10]]$lendingType$value
## [1] "Not classified"
##
##
## [[10]]$capitalCity
## [1] "Tallinn"
##
## [[10]]$longitude
## [1] "24.7586"
##
## [[10]]$latitude
## [1] "59.4392"
#you can access any particular object from the json data as shown below
d3 <- lapply(json_data[[2]], function(x) c(x["id"], x["iso2Code"]))
d3 <- do.call(rbind, d3)
d3## id iso2Code
## [1,] "AUS" "AU"
## [2,] "AUT" "AT"
## [3,] "BEL" "BE"
## [4,] "CAN" "CA"
## [5,] "CHE" "CH"
## [6,] "CZE" "CZ"
## [7,] "DEU" "DE"
## [8,] "DNK" "DK"
## [9,] "ESP" "ES"
## [10,] "EST" "EE"
d4 <- lapply(json_data[[2]], function(x) c(x["id"], x["iso2Code"], x$region["id"], x$region["value"], x["capitalCity"]))
d4 <- do.call(rbind, d4)
d4## id iso2Code id value capitalCity
## [1,] "AUS" "AU" "EAS" "East Asia & Pacific" "Canberra"
## [2,] "AUT" "AT" "ECS" "Europe & Central Asia" "Vienna"
## [3,] "BEL" "BE" "ECS" "Europe & Central Asia" "Brussels"
## [4,] "CAN" "CA" "NAC" "North America" "Ottawa"
## [5,] "CHE" "CH" "ECS" "Europe & Central Asia" "Bern"
## [6,] "CZE" "CZ" "ECS" "Europe & Central Asia" "Prague"
## [7,] "DEU" "DE" "ECS" "Europe & Central Asia" "Berlin"
## [8,] "DNK" "DK" "ECS" "Europe & Central Asia" "Copenhagen"
## [9,] "ESP" "ES" "ECS" "Europe & Central Asia" "Madrid"
## [10,] "EST" "EE" "ECS" "Europe & Central Asia" "Tallinn"
#other example
json_file <- "skorea.json"
#json data is stored in json_data
json_data <- fromJSON(file=json_file)
#you can access your data as simply shown below
json_data[[1]]## $Description
## [1] ""
##
## $Image
## [1] "/wiki/File:MuryeongsTomb.jpg"
##
## $Criteria
## [1] "Cultural: (ii)(iii)"
##
## $Site
## [1] "Baekje Historic Areas"
##
## $`Area ha (acre)`
## [1] "135 (330)"
##
## $Location
## [1] "South Chungcheong, North Jeolla"
##
## $Year
## [1] "2015"
## $Description
## [1] ""
##
## $Image
## [1] "/wiki/File:Korea-Gwangju-Gochang_Dolmens_5350-06.JPG"
##
## $Criteria
## [1] "Cultural: (iii)"
##
## $Site
## [1] "Gochang, Hwasun and Ganghwa Dolmen Sites"
##
## $`Area ha (acre)`
## [1] ""
##
## $Location
## [1] "Incheon, North Jeolla, South Jeolla"
##
## $Year
## [1] "2000"
## $Description
## [1] ""
##
## $Image
## [1] "/wiki/File:Haeinsa_Temple_(6222053899).jpg"
##
## $Criteria
## [1] "Cultural: (iv)(vi)"
##
## $Site
## [1] "Haeinsa Temple Janggyeong Panjeon, the Depositories for the Tripitaka Koreana Woodblocks"
##
## $`Area ha (acre)`
## [1] ""
##
## $Location
## [1] "South Gyeongsang"
##
## $Year
## [1] "1995"
#or you can extract some usefull data
d <- lapply(json_data, function(x) c(x['Image'], x['Criteria'], x['Site'], x['Area ha (acre)']))
d <- do.call(rbind, d)
d## Image
## [1,] "/wiki/File:MuryeongsTomb.jpg"
## [2,] "/wiki/File:Korea-Gwangju-Gochang_Dolmens_5350-06.JPG"
## [3,] "/wiki/File:Juhamnu,_Changdeokgung_-_Seoul,_Korea.JPG"
## [4,] "/wiki/File:Korea-Gyeongju-Bunhwangsa-Lanterns-03.jpg"
## [5,] "/wiki/File:Haeinsa_Temple_(6222053899).jpg"
## [6,] "/wiki/File:Hahoe_8784.jpg"
## [7,] "/wiki/File:Hwaseong2.jpg"
## [8,] "/wiki/File:KOCIS_Halla_Mountain_in_Jeju-do_(6387785543).jpg"
## [9,] "/wiki/File:Chongmyo_repository_(1509268349).jpg"
## [10,] "/wiki/File:Khitai5.jpg"
## [11,] "/wiki/File:Sejong_tomb_1.jpg"
## [12,] "/wiki/File:Bulguk_Tempel.jpg"
## Criteria
## [1,] "Cultural: (ii)(iii)"
## [2,] "Cultural: (iii)"
## [3,] "Cultural: (ii)(iii)(iv)"
## [4,] "Cultural: (ii)(iii)"
## [5,] "Cultural: (iv)(vi)"
## [6,] "Cultural: (iii)(iv)"
## [7,] "Cultural: (ii)(iii)"
## [8,] "Natural: (vii)(viii)"
## [9,] "Cultural: (iv)"
## [10,] "Cultural: (ii)(iv)"
## [11,] "Cultural: (iii)(iv)(vi)"
## [12,] "Cultural: (i)(iv)"
## Site
## [1,] "Baekje Historic Areas"
## [2,] "Gochang, Hwasun and Ganghwa Dolmen Sites"
## [3,] "Changdeokgung Palace Complex"
## [4,] "Gyeongju Historic Areas"
## [5,] "Haeinsa Temple Janggyeong Panjeon, the Depositories for the Tripitaka Koreana Woodblocks"
## [6,] "Historic Villages of Korea: Hahoe and Yangdong"
## [7,] "Hwaseong Fortress"
## [8,] "Jeju Volcanic Island and Lava Tubes"
## [9,] "Jongmyo Shrine"
## [10,] "Namhansanseong"
## [11,] "Royal Tombs of the Joseon Dynasty"
## [12,] "Seokguram Grotto and Bulguksa Temple"
## Area ha (acre)
## [1,] "135 (330)"
## [2,] ""
## [3,] ""
## [4,] "2,880 (7,100)"
## [5,] ""
## [6,] "600 (1,500)"
## [7,] ""
## [8,] "9,475 (23,410)"
## [9,] "19 (47)"
## [10,] "409 (1,010)"
## [11,] "1,891 (4,670)"
## [12,] ""
## [,1] [,2] [,3] [,4]
## [1,] 1 2 3 4
## [2,] 1 2 3 4
## [3,] 1 2 3 4
## [4,] 1 2 3 4
## [5,] 1 2 3 4
## [6,] 1 2 3 4
## [7,] 1 2 3 4
## [8,] 1 2 3 4
## [9,] 1 2 3 4
## [10,] 1 2 3 4
## [11,] 1 2 3 4
## [12,] 1 2 3 4
## [[1]]
## [1] "/wiki/File:MuryeongsTomb.jpg"
##
## [[2]]
## [1] "/wiki/File:Korea-Gwangju-Gochang_Dolmens_5350-06.JPG"
##
## [[3]]
## [1] "/wiki/File:Juhamnu,_Changdeokgung_-_Seoul,_Korea.JPG"
##
## [[4]]
## [1] "/wiki/File:Korea-Gyeongju-Bunhwangsa-Lanterns-03.jpg"
##
## [[5]]
## [1] "/wiki/File:Haeinsa_Temple_(6222053899).jpg"
##
## [[6]]
## [1] "/wiki/File:Hahoe_8784.jpg"
##
## [[7]]
## [1] "/wiki/File:Hwaseong2.jpg"
##
## [[8]]
## [1] "/wiki/File:KOCIS_Halla_Mountain_in_Jeju-do_(6387785543).jpg"
##
## [[9]]
## [1] "/wiki/File:Chongmyo_repository_(1509268349).jpg"
##
## [[10]]
## [1] "/wiki/File:Khitai5.jpg"
##
## [[11]]
## [1] "/wiki/File:Sejong_tomb_1.jpg"
##
## [[12]]
## [1] "/wiki/File:Bulguk_Tempel.jpg"
## $Image
## [1] "/wiki/File:MuryeongsTomb.jpg"
##
## $Criteria
## [1] "Cultural: (ii)(iii)"
##
## $Site
## [1] "Baekje Historic Areas"
##
## $`Area ha (acre)`
## [1] "135 (330)"
Data Processing With dplyr
Pipe Operator %>%
################################################################
library(magrittr)
library(dplyr)
head(iris)## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## tells R to take the value of that which is to the left
## and pass it to the right as an argument
## Select Columns
iris %>% select(Species,Petal.Width)## Species Petal.Width
## 1 setosa 0.2
## 2 setosa 0.2
## 3 setosa 0.2
## 4 setosa 0.2
## 5 setosa 0.2
## 6 setosa 0.4
## 7 setosa 0.3
## 8 setosa 0.2
## 9 setosa 0.2
## 10 setosa 0.1
## 11 setosa 0.2
## 12 setosa 0.2
## 13 setosa 0.1
## 14 setosa 0.1
## 15 setosa 0.2
## 16 setosa 0.4
## 17 setosa 0.4
## 18 setosa 0.3
## 19 setosa 0.3
## 20 setosa 0.3
## 21 setosa 0.2
## 22 setosa 0.4
## 23 setosa 0.2
## 24 setosa 0.5
## 25 setosa 0.2
## 26 setosa 0.2
## 27 setosa 0.4
## 28 setosa 0.2
## 29 setosa 0.2
## 30 setosa 0.2
## 31 setosa 0.2
## 32 setosa 0.4
## 33 setosa 0.1
## 34 setosa 0.2
## 35 setosa 0.2
## 36 setosa 0.2
## 37 setosa 0.2
## 38 setosa 0.1
## 39 setosa 0.2
## 40 setosa 0.2
## 41 setosa 0.3
## 42 setosa 0.3
## 43 setosa 0.2
## 44 setosa 0.6
## 45 setosa 0.4
## 46 setosa 0.3
## 47 setosa 0.2
## 48 setosa 0.2
## 49 setosa 0.2
## 50 setosa 0.2
## 51 versicolor 1.4
## 52 versicolor 1.5
## 53 versicolor 1.5
## 54 versicolor 1.3
## 55 versicolor 1.5
## 56 versicolor 1.3
## 57 versicolor 1.6
## 58 versicolor 1.0
## 59 versicolor 1.3
## 60 versicolor 1.4
## 61 versicolor 1.0
## 62 versicolor 1.5
## 63 versicolor 1.0
## 64 versicolor 1.4
## 65 versicolor 1.3
## 66 versicolor 1.4
## 67 versicolor 1.5
## 68 versicolor 1.0
## 69 versicolor 1.5
## 70 versicolor 1.1
## 71 versicolor 1.8
## 72 versicolor 1.3
## 73 versicolor 1.5
## 74 versicolor 1.2
## 75 versicolor 1.3
## 76 versicolor 1.4
## 77 versicolor 1.4
## 78 versicolor 1.7
## 79 versicolor 1.5
## 80 versicolor 1.0
## 81 versicolor 1.1
## 82 versicolor 1.0
## 83 versicolor 1.2
## 84 versicolor 1.6
## 85 versicolor 1.5
## 86 versicolor 1.6
## 87 versicolor 1.5
## 88 versicolor 1.3
## 89 versicolor 1.3
## 90 versicolor 1.3
## 91 versicolor 1.2
## 92 versicolor 1.4
## 93 versicolor 1.2
## 94 versicolor 1.0
## 95 versicolor 1.3
## 96 versicolor 1.2
## 97 versicolor 1.3
## 98 versicolor 1.3
## 99 versicolor 1.1
## 100 versicolor 1.3
## 101 virginica 2.5
## 102 virginica 1.9
## 103 virginica 2.1
## 104 virginica 1.8
## 105 virginica 2.2
## 106 virginica 2.1
## 107 virginica 1.7
## 108 virginica 1.8
## 109 virginica 1.8
## 110 virginica 2.5
## 111 virginica 2.0
## 112 virginica 1.9
## 113 virginica 2.1
## 114 virginica 2.0
## 115 virginica 2.4
## 116 virginica 2.3
## 117 virginica 1.8
## 118 virginica 2.2
## 119 virginica 2.3
## 120 virginica 1.5
## 121 virginica 2.3
## 122 virginica 2.0
## 123 virginica 2.0
## 124 virginica 1.8
## 125 virginica 2.1
## 126 virginica 1.8
## 127 virginica 1.8
## 128 virginica 1.8
## 129 virginica 2.1
## 130 virginica 1.6
## 131 virginica 1.9
## 132 virginica 2.0
## 133 virginica 2.2
## 134 virginica 1.5
## 135 virginica 1.4
## 136 virginica 2.3
## 137 virginica 2.4
## 138 virginica 1.8
## 139 virginica 1.8
## 140 virginica 2.1
## 141 virginica 2.4
## 142 virginica 2.3
## 143 virginica 1.9
## 144 virginica 2.3
## 145 virginica 2.5
## 146 virginica 2.3
## 147 virginica 1.9
## 148 virginica 2.0
## 149 virginica 2.3
## 150 virginica 1.8
## Species Petal.Width
## 1 setosa 0.2
## 2 setosa 0.2
## 3 setosa 0.2
## 4 setosa 0.2
## 5 setosa 0.2
## 6 setosa 0.4
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4
## 7 4.6 3.4 1.4 0.3
## 8 5.0 3.4 1.5 0.2
## 9 4.4 2.9 1.4 0.2
## 10 4.9 3.1 1.5 0.1
## 11 5.4 3.7 1.5 0.2
## 12 4.8 3.4 1.6 0.2
## 13 4.8 3.0 1.4 0.1
## 14 4.3 3.0 1.1 0.1
## 15 5.8 4.0 1.2 0.2
## 16 5.7 4.4 1.5 0.4
## 17 5.4 3.9 1.3 0.4
## 18 5.1 3.5 1.4 0.3
## 19 5.7 3.8 1.7 0.3
## 20 5.1 3.8 1.5 0.3
## 21 5.4 3.4 1.7 0.2
## 22 5.1 3.7 1.5 0.4
## 23 4.6 3.6 1.0 0.2
## 24 5.1 3.3 1.7 0.5
## 25 4.8 3.4 1.9 0.2
## 26 5.0 3.0 1.6 0.2
## 27 5.0 3.4 1.6 0.4
## 28 5.2 3.5 1.5 0.2
## 29 5.2 3.4 1.4 0.2
## 30 4.7 3.2 1.6 0.2
## 31 4.8 3.1 1.6 0.2
## 32 5.4 3.4 1.5 0.4
## 33 5.2 4.1 1.5 0.1
## 34 5.5 4.2 1.4 0.2
## 35 4.9 3.1 1.5 0.2
## 36 5.0 3.2 1.2 0.2
## 37 5.5 3.5 1.3 0.2
## 38 4.9 3.6 1.4 0.1
## 39 4.4 3.0 1.3 0.2
## 40 5.1 3.4 1.5 0.2
## 41 5.0 3.5 1.3 0.3
## 42 4.5 2.3 1.3 0.3
## 43 4.4 3.2 1.3 0.2
## 44 5.0 3.5 1.6 0.6
## 45 5.1 3.8 1.9 0.4
## 46 4.8 3.0 1.4 0.3
## 47 5.1 3.8 1.6 0.2
## 48 4.6 3.2 1.4 0.2
## 49 5.3 3.7 1.5 0.2
## 50 5.0 3.3 1.4 0.2
## 51 7.0 3.2 4.7 1.4
## 52 6.4 3.2 4.5 1.5
## 53 6.9 3.1 4.9 1.5
## 54 5.5 2.3 4.0 1.3
## 55 6.5 2.8 4.6 1.5
## 56 5.7 2.8 4.5 1.3
## 57 6.3 3.3 4.7 1.6
## 58 4.9 2.4 3.3 1.0
## 59 6.6 2.9 4.6 1.3
## 60 5.2 2.7 3.9 1.4
## 61 5.0 2.0 3.5 1.0
## 62 5.9 3.0 4.2 1.5
## 63 6.0 2.2 4.0 1.0
## 64 6.1 2.9 4.7 1.4
## 65 5.6 2.9 3.6 1.3
## 66 6.7 3.1 4.4 1.4
## 67 5.6 3.0 4.5 1.5
## 68 5.8 2.7 4.1 1.0
## 69 6.2 2.2 4.5 1.5
## 70 5.6 2.5 3.9 1.1
## 71 5.9 3.2 4.8 1.8
## 72 6.1 2.8 4.0 1.3
## 73 6.3 2.5 4.9 1.5
## 74 6.1 2.8 4.7 1.2
## 75 6.4 2.9 4.3 1.3
## 76 6.6 3.0 4.4 1.4
## 77 6.8 2.8 4.8 1.4
## 78 6.7 3.0 5.0 1.7
## 79 6.0 2.9 4.5 1.5
## 80 5.7 2.6 3.5 1.0
## 81 5.5 2.4 3.8 1.1
## 82 5.5 2.4 3.7 1.0
## 83 5.8 2.7 3.9 1.2
## 84 6.0 2.7 5.1 1.6
## 85 5.4 3.0 4.5 1.5
## 86 6.0 3.4 4.5 1.6
## 87 6.7 3.1 4.7 1.5
## 88 6.3 2.3 4.4 1.3
## 89 5.6 3.0 4.1 1.3
## 90 5.5 2.5 4.0 1.3
## 91 5.5 2.6 4.4 1.2
## 92 6.1 3.0 4.6 1.4
## 93 5.8 2.6 4.0 1.2
## 94 5.0 2.3 3.3 1.0
## 95 5.6 2.7 4.2 1.3
## 96 5.7 3.0 4.2 1.2
## 97 5.7 2.9 4.2 1.3
## 98 6.2 2.9 4.3 1.3
## 99 5.1 2.5 3.0 1.1
## 100 5.7 2.8 4.1 1.3
## 101 6.3 3.3 6.0 2.5
## 102 5.8 2.7 5.1 1.9
## 103 7.1 3.0 5.9 2.1
## 104 6.3 2.9 5.6 1.8
## 105 6.5 3.0 5.8 2.2
## 106 7.6 3.0 6.6 2.1
## 107 4.9 2.5 4.5 1.7
## 108 7.3 2.9 6.3 1.8
## 109 6.7 2.5 5.8 1.8
## 110 7.2 3.6 6.1 2.5
## 111 6.5 3.2 5.1 2.0
## 112 6.4 2.7 5.3 1.9
## 113 6.8 3.0 5.5 2.1
## 114 5.7 2.5 5.0 2.0
## 115 5.8 2.8 5.1 2.4
## 116 6.4 3.2 5.3 2.3
## 117 6.5 3.0 5.5 1.8
## 118 7.7 3.8 6.7 2.2
## 119 7.7 2.6 6.9 2.3
## 120 6.0 2.2 5.0 1.5
## 121 6.9 3.2 5.7 2.3
## 122 5.6 2.8 4.9 2.0
## 123 7.7 2.8 6.7 2.0
## 124 6.3 2.7 4.9 1.8
## 125 6.7 3.3 5.7 2.1
## 126 7.2 3.2 6.0 1.8
## 127 6.2 2.8 4.8 1.8
## 128 6.1 3.0 4.9 1.8
## 129 6.4 2.8 5.6 2.1
## 130 7.2 3.0 5.8 1.6
## 131 7.4 2.8 6.1 1.9
## 132 7.9 3.8 6.4 2.0
## 133 6.4 2.8 5.6 2.2
## 134 6.3 2.8 5.1 1.5
## 135 6.1 2.6 5.6 1.4
## 136 7.7 3.0 6.1 2.3
## 137 6.3 3.4 5.6 2.4
## 138 6.4 3.1 5.5 1.8
## 139 6.0 3.0 4.8 1.8
## 140 6.9 3.1 5.4 2.1
## 141 6.7 3.1 5.6 2.4
## 142 6.9 3.1 5.1 2.3
## 143 5.8 2.7 5.1 1.9
## 144 6.8 3.2 5.9 2.3
## 145 6.7 3.3 5.7 2.5
## 146 6.7 3.0 5.2 2.3
## 147 6.3 2.5 5.0 1.9
## 148 6.5 3.0 5.2 2.0
## 149 6.2 3.4 5.4 2.3
## 150 5.9 3.0 5.1 1.8
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4
## Sepal.Length Sepal.Width Petal.Length
## 1 5.1 3.5 1.4
## 2 4.9 3.0 1.4
## 3 4.7 3.2 1.3
## 4 4.6 3.1 1.5
## 5 5.0 3.6 1.4
## 6 5.4 3.9 1.7
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4
## Petal.Width Species
## 1 0.2 setosa
## 2 0.2 setosa
## 3 0.2 setosa
## 4 0.2 setosa
## 5 0.2 setosa
## 6 0.4 setosa
## Sepal.Length Sepal.Width Species
## 1 5.1 3.5 setosa
## 2 4.9 3.0 setosa
## 3 4.7 3.2 setosa
## 4 4.6 3.1 setosa
## 5 5.0 3.6 setosa
## 6 5.4 3.9 setosa
## Species
## 1 setosa
## 2 setosa
## 3 setosa
## 4 setosa
## 5 setosa
## 6 setosa
## 7 setosa
## 8 setosa
## 9 setosa
## 10 setosa
## 11 setosa
## 12 setosa
## 13 setosa
## 14 setosa
## 15 setosa
## 16 setosa
## 17 setosa
## 18 setosa
## 19 setosa
## 20 setosa
## 21 setosa
## 22 setosa
## 23 setosa
## 24 setosa
## 25 setosa
## 26 setosa
## 27 setosa
## 28 setosa
## 29 setosa
## 30 setosa
## 31 setosa
## 32 setosa
## 33 setosa
## 34 setosa
## 35 setosa
## 36 setosa
## 37 setosa
## 38 setosa
## 39 setosa
## 40 setosa
## 41 setosa
## 42 setosa
## 43 setosa
## 44 setosa
## 45 setosa
## 46 setosa
## 47 setosa
## 48 setosa
## 49 setosa
## 50 setosa
## 51 versicolor
## 52 versicolor
## 53 versicolor
## 54 versicolor
## 55 versicolor
## 56 versicolor
## 57 versicolor
## 58 versicolor
## 59 versicolor
## 60 versicolor
## 61 versicolor
## 62 versicolor
## 63 versicolor
## 64 versicolor
## 65 versicolor
## 66 versicolor
## 67 versicolor
## 68 versicolor
## 69 versicolor
## 70 versicolor
## 71 versicolor
## 72 versicolor
## 73 versicolor
## 74 versicolor
## 75 versicolor
## 76 versicolor
## 77 versicolor
## 78 versicolor
## 79 versicolor
## 80 versicolor
## 81 versicolor
## 82 versicolor
## 83 versicolor
## 84 versicolor
## 85 versicolor
## 86 versicolor
## 87 versicolor
## 88 versicolor
## 89 versicolor
## 90 versicolor
## 91 versicolor
## 92 versicolor
## 93 versicolor
## 94 versicolor
## 95 versicolor
## 96 versicolor
## 97 versicolor
## 98 versicolor
## 99 versicolor
## 100 versicolor
## 101 virginica
## 102 virginica
## 103 virginica
## 104 virginica
## 105 virginica
## 106 virginica
## 107 virginica
## 108 virginica
## 109 virginica
## 110 virginica
## 111 virginica
## 112 virginica
## 113 virginica
## 114 virginica
## 115 virginica
## 116 virginica
## 117 virginica
## 118 virginica
## 119 virginica
## 120 virginica
## 121 virginica
## 122 virginica
## 123 virginica
## 124 virginica
## 125 virginica
## 126 virginica
## 127 virginica
## 128 virginica
## 129 virginica
## 130 virginica
## 131 virginica
## 132 virginica
## 133 virginica
## 134 virginica
## 135 virginica
## 136 virginica
## 137 virginica
## 138 virginica
## 139 virginica
## 140 virginica
## 141 virginica
## 142 virginica
## 143 virginica
## 144 virginica
## 145 virginica
## 146 virginica
## 147 virginica
## 148 virginica
## 149 virginica
## 150 virginica
## Sepal.Width Petal.Width
## 1 3.5 0.2
## 2 3.0 0.2
## 3 3.2 0.2
## 4 3.1 0.2
## 5 3.6 0.2
## 6 3.9 0.4
## Sepal.Length Petal.Length
## 1 5.1 1.4
## 2 4.9 1.4
## 3 4.7 1.3
## 4 4.6 1.5
## 5 5.0 1.4
## 6 5.4 1.7
Dplyr select
## Country_region Ladder SD_Ladder Positive_affect Negative_affect
## 1 Finland 1 4 41 10
## 2 Denmark 2 13 24 26
## 3 Norway 3 8 16 29
## 4 Iceland 4 9 3 3
## 5 Netherlands 5 1 12 25
## 6 Switzerland 6 11 44 21
## Social_support Freedom Corruption Generosity Log_GDP life_expectancy
## 1 2 5 4 47 22 27
## 2 4 6 3 22 14 23
## 3 3 3 8 11 7 12
## 4 1 7 45 3 15 13
## 5 15 19 12 7 12 18
## 6 13 11 7 16 8 4
## Observations: 156
## Variables: 11
## $ Country_region <fct> Finland, Denmark, Norway, Iceland, Netherlands...
## $ Ladder <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ SD_Ladder <int> 4, 13, 8, 9, 1, 11, 18, 15, 23, 10, 26, 62, 14...
## $ Positive_affect <int> 41, 24, 16, 3, 12, 44, 34, 22, 18, 64, 47, 4, ...
## $ Negative_affect <int> 10, 26, 29, 3, 25, 21, 8, 12, 49, 24, 37, 87, ...
## $ Social_support <int> 2, 4, 3, 1, 15, 13, 25, 5, 20, 31, 7, 42, 38, ...
## $ Freedom <int> 5, 6, 3, 7, 19, 11, 10, 8, 9, 26, 17, 16, 93, ...
## $ Corruption <int> 4, 3, 8, 45, 12, 7, 6, 5, 11, 19, 13, 58, 74, ...
## $ Generosity <int> 47, 22, 11, 3, 7, 16, 17, 8, 14, 25, 6, 75, 24...
## $ Log_GDP <int> 22, 14, 7, 15, 12, 8, 13, 26, 19, 16, 18, 67, ...
## $ life_expectancy <int> 27, 23, 12, 13, 18, 4, 17, 14, 8, 15, 10, 28, ...
## SD_Ladder
## 1 4
## 2 13
## 3 8
## 4 9
## 5 1
## 6 11
## 7 18
## 8 15
## 9 23
## 10 10
## 11 26
## 12 62
## 13 14
## 14 3
## 15 16
## 16 34
## 17 17
## 18 7
## 19 49
## 20 20
## 21 65
## 22 42
## 23 76
## 24 19
## 25 37
## 26 61
## 27 136
## 28 93
## 29 86
## 30 21
## 31 121
## 32 116
## 33 88
## 34 5
## 35 112
## 36 31
## 37 83
## 38 39
## 39 89
## 40 28
## 41 99
## 42 55
## 43 120
## 44 54
## 45 133
## 46 107
## 47 97
## 48 75
## 49 95
## 50 113
## 51 98
## 52 81
## 53 30
## 54 57
## 55 32
## 56 102
## 57 94
## 58 43
## 59 151
## 60 40
## 61 71
## 62 36
## 63 90
## 64 35
## 65 114
## 66 73
## 67 53
## 68 64
## 69 119
## 70 100
## 71 45
## 72 115
## 73 84
## 74 50
## 75 29
## 76 33
## 77 155
## 78 80
## 79 58
## 80 12
## 81 22
## 82 87
## 83 48
## 84 67
## 85 130
## 86 46
## 87 2
## 88 56
## 89 101
## 90 24
## 91 60
## 92 108
## 93 72
## 94 27
## 95 6
## 96 131
## 97 47
## 98 129
## 99 134
## 100 128
## 101 127
## 102 149
## 103 152
## 104 105
## 105 59
## 106 124
## 107 126
## 108 141
## 109 135
## 110 110
## 111 44
## 112 74
## 113 106
## 114 144
## 115 92
## 116 82
## 117 109
## 118 146
## 119 51
## 120 142
## 121 118
## 122 68
## 123 154
## 124 79
## 125 52
## 126 147
## 127 78
## 128 96
## 129 153
## 130 91
## 131 70
## 132 139
## 133 69
## 134 38
## 135 104
## 136 148
## 137 66
## 138 145
## 139 103
## 140 41
## 141 156
## 142 143
## 143 77
## 144 150
## 145 138
## 146 123
## 147 111
## 148 125
## 149 137
## 150 132
## 151 85
## 152 63
## 153 122
## 154 25
## 155 117
## 156 140
## Social_support Freedom
## 1 2 5
## 2 4 6
## 3 3 3
## 4 1 7
## 5 15 19
## 6 13 11
## 7 25 10
## 8 5 8
## 9 20 9
## 10 31 26
## 11 7 17
## 12 42 16
## 13 38 93
## 14 27 28
## 15 9 63
## 16 6 33
## 17 39 44
## 18 22 53
## 19 37 62
## 20 24 58
## 21 72 4
## 22 16 12
## 23 67 71
## 24 32 69
## 25 48 102
## 26 58 98
## 27 78 25
## 28 62 68
## 29 NA NA
## 30 26 95
## 31 41 32
## 32 43 84
## 33 35 30
## 34 36 20
## 35 83 74
## 36 23 132
## 37 59 24
## 38 21 108
## 39 29 51
## 40 44 52
## 41 11 1
## 42 17 122
## 43 52 56
## 44 14 13
## 45 66 70
## 46 85 50
## 47 46 54
## 48 86 57
## 49 90 81
## 50 71 42
## 51 69 47
## 52 53 18
## 53 34 126
## 54 91 144
## 55 12 45
## 56 28 49
## 57 54 40
## 58 50 64
## 59 84 39
## 60 19 80
## 61 93 35
## 62 51 138
## 63 30 34
## 64 81 77
## 65 77 61
## 66 47 37
## 67 130 114
## 68 40 107
## 69 75 15
## 70 57 124
## 71 65 128
## 72 73 79
## 73 60 139
## 74 113 86
## 75 79 118
## 76 76 66
## 77 55 43
## 78 92 137
## 79 61 140
## 80 97 36
## 81 33 131
## 82 102 150
## 83 10 112
## 84 74 105
## 85 111 75
## 86 45 38
## 87 8 83
## 88 101 149
## 89 139 76
## 90 104 101
## 91 89 136
## 92 94 48
## 93 108 31
## 94 64 23
## 95 68 59
## 96 129 90
## 97 18 115
## 98 132 91
## 99 137 100
## 100 87 67
## 101 88 88
## 102 153 103
## 103 138 92
## 104 95 119
## 105 120 22
## 106 63 85
## 107 133 87
## 108 49 145
## 109 109 2
## 110 82 134
## 111 106 121
## 112 145 14
## 113 70 82
## 114 140 111
## 115 116 127
## 116 117 123
## 117 134 117
## 118 136 109
## 119 147 104
## 120 125 89
## 121 123 72
## 122 99 151
## 123 122 46
## 124 121 143
## 125 126 27
## 126 124 130
## 127 107 125
## 128 112 110
## 129 135 116
## 130 80 55
## 131 96 29
## 132 141 142
## 133 56 141
## 134 119 106
## 135 103 113
## 136 114 99
## 137 118 129
## 138 115 73
## 139 149 120
## 140 142 41
## 141 127 94
## 142 143 148
## 143 128 146
## 144 98 97
## 145 152 135
## 146 110 96
## 147 146 152
## 148 105 60
## 149 154 153
## 150 150 65
## 151 100 147
## 152 144 21
## 153 131 78
## 154 151 155
## 155 155 133
## 156 148 154
## Country_region Ladder SD_Ladder Positive_affect
## 1 Finland 1 4 41
## 2 Denmark 2 13 24
## 3 Norway 3 8 16
## 4 Iceland 4 9 3
## 5 Netherlands 5 1 12
## 6 Switzerland 6 11 44
## 7 Sweden 7 18 34
## 8 New Zealand 8 15 22
## 9 Canada 9 23 18
## 10 Austria 10 10 64
## 11 Australia 11 26 47
## 12 Costa Rica 12 62 4
## 13 Israel 13 14 104
## 14 Luxembourg 14 3 62
## 15 United Kingdom 15 16 52
## 16 Ireland 16 34 33
## 17 Germany 17 17 65
## 18 Belgium 18 7 57
## 19 United States 19 49 35
## 20 Czech Republic 20 20 74
## 21 United Arab Emirates 21 65 43
## 22 Malta 22 42 83
## 23 Mexico 23 76 6
## 24 France 24 19 56
## 25 Taiwan 25 37 17
## 26 Chile 26 61 15
## 27 Guatemala 27 136 8
## 28 Saudi Arabia 28 93 49
## 29 Qatar 29 86 NA
## 30 Spain 30 21 107
## 31 Panama 31 121 7
## 32 Brazil 32 116 69
## 33 Uruguay 33 88 10
## 34 Singapore 34 5 38
## 35 El Salvador 35 112 23
## 36 Italy 36 31 99
## 37 Bahrain 37 83 39
## 38 Slovakia 38 39 53
## 39 Trinidad and Tobago 39 89 14
## 40 Poland 40 28 76
## 41 Uzbekistan 41 99 19
## 42 Lithuania 42 55 138
## 43 Colombia 43 120 30
## 44 Slovenia 44 54 114
## 45 Nicaragua 45 133 31
## 46 Kosovo 46 107 71
## 47 Argentina 47 97 28
## 48 Romania 48 75 80
## 49 Cyprus 49 95 60
## 50 Ecuador 50 113 11
## 51 Kuwait 51 98 89
## 52 Thailand 52 81 20
## 53 Latvia 53 30 119
## 54 South Korea 54 57 101
## 55 Estonia 55 32 50
## 56 Jamaica 56 102 51
## 57 Mauritius 57 94 55
## 58 Japan 58 43 73
## 59 Honduras 59 151 13
## 60 Kazakhstan 60 40 81
## 61 Bolivia 61 71 70
## 62 Hungary 62 36 86
## 63 Paraguay 63 90 1
## 64 Northern Cyprus 64 35 144
## 65 Peru 65 114 36
## 66 Portugal 66 73 97
## 67 Pakistan 67 53 130
## 68 Russia 68 64 96
## 69 Philippines 69 119 42
## 70 Serbia 70 100 148
## 71 Moldova 71 45 133
## 72 Libya 72 115 85
## 73 Montenegro 73 84 143
## 74 Tajikistan 74 50 120
## 75 Croatia 75 29 122
## 76 Hong Kong 76 33 105
## 77 Dominican Republic 77 155 66
## 78 Bosnia and Herzegovina 78 80 116
## 79 Turkey 79 58 154
## 80 Malaysia 80 12 25
## 81 Belarus 81 22 149
## 82 Greece 82 87 102
## 83 Mongolia 83 48 95
## 84 Macedonia 84 67 140
## 85 Nigeria 85 130 61
## 86 Kyrgyzstan 86 46 58
## 87 Turkmenistan 87 2 135
## 88 Algeria 88 56 113
## 89 Morocco 89 101 110
## 90 Azerbaijan 90 24 134
## 91 Lebanon 91 60 150
## 92 Indonesia 92 108 9
## 93 China 93 72 21
## 94 Vietnam 94 27 121
## 95 Bhutan 95 6 37
## 96 Cameroon 96 131 106
## 97 Bulgaria 97 47 117
## 98 Ghana 98 129 92
## 99 Ivory Coast 99 134 88
## 100 Nepal 100 128 137
## 101 Jordan 101 127 112
## 102 Benin 102 149 118
## 103 Congo (Brazzaville) 103 152 124
## 104 Gabon 104 105 111
## 105 Laos 105 59 5
## 106 South Africa 106 124 40
## 107 Albania 107 126 90
## 108 Venezuela 108 141 77
## 109 Cambodia 109 135 27
## 110 Palestinian Territories 110 110 128
## 111 Senegal 111 44 68
## 112 Somalia 112 74 2
## 113 Namibia 113 106 75
## 114 Niger 114 144 79
## 115 Burkina Faso 115 92 115
## 116 Armenia 116 82 126
## 117 Iran 117 109 109
## 118 Guinea 118 146 82
## 119 Georgia 119 51 141
## 120 Gambia 120 142 29
## 121 Kenya 121 118 59
## 122 Mauritania 122 68 94
## 123 Mozambique 123 154 108
## 124 Tunisia 124 79 147
## 125 Bangladesh 125 52 145
## 126 Iraq 126 147 151
## 127 Congo (Kinshasa) 127 78 125
## 128 Mali 128 96 48
## 129 Sierra Leone 129 153 139
## 130 Sri Lanka 130 91 32
## 131 Myanmar 131 70 45
## 132 Chad 132 139 136
## 133 Ukraine 133 69 131
## 134 Ethiopia 134 38 100
## 135 Swaziland 135 104 26
## 136 Uganda 136 148 91
## 137 Egypt 137 66 146
## 138 Zambia 138 145 84
## 139 Togo 139 103 123
## 140 India 140 41 93
## 141 Liberia 141 156 103
## 142 Comoros 142 143 67
## 143 Madagascar 143 77 46
## 144 Lesotho 144 150 72
## 145 Burundi 145 138 98
## 146 Zimbabwe 146 123 63
## 147 Haiti 147 111 142
## 148 Botswana 148 125 87
## 149 Syria 149 137 155
## 150 Malawi 150 132 129
## 151 Yemen 151 85 153
## 152 Rwanda 152 63 54
## 153 Tanzania 153 122 78
## 154 Afghanistan 154 25 152
## 155 Central African Republic 155 117 132
## 156 South Sudan 156 140 127
## Negative_affect Corruption Generosity Log_GDP life_expectancy
## 1 10 4 47 22 27
## 2 26 3 22 14 23
## 3 29 8 11 7 12
## 4 3 45 3 15 13
## 5 25 12 7 12 18
## 6 21 7 16 8 4
## 7 8 6 17 13 17
## 8 12 5 8 26 14
## 9 49 11 14 19 8
## 10 24 19 25 16 15
## 11 37 13 6 18 10
## 12 87 58 75 67 28
## 13 69 74 24 31 11
## 14 19 9 30 2 16
## 15 42 15 4 23 24
## 16 32 10 9 6 20
## 17 30 17 19 17 25
## 18 53 20 44 21 26
## 19 70 42 12 10 39
## 20 22 121 117 32 31
## 21 56 NA 15 4 60
## 22 103 32 5 28 19
## 23 40 87 120 57 46
## 24 66 21 68 25 5
## 25 1 56 56 NA NA
## 26 78 99 45 49 30
## 27 85 82 78 99 85
## 28 82 NA 82 11 74
## 29 NA NA NA 1 43
## 30 107 78 50 30 3
## 31 48 104 88 51 33
## 32 105 71 108 70 72
## 33 76 33 80 52 35
## 34 2 1 21 3 1
## 35 84 85 134 100 75
## 36 123 128 48 29 7
## 37 83 NA 23 20 42
## 38 47 142 70 35 38
## 39 52 141 41 38 93
## 40 33 108 77 41 36
## 41 15 18 29 104 83
## 42 41 113 124 36 62
## 43 88 124 111 74 51
## 44 71 97 54 34 29
## 45 125 43 71 108 53
## 46 7 144 31 88 NA
## 47 93 109 123 55 37
## 48 62 146 102 48 61
## 49 99 115 39 33 6
## 50 113 68 95 86 45
## 51 97 NA 42 5 70
## 52 35 131 10 62 58
## 53 38 92 105 43 68
## 54 45 100 40 27 9
## 55 6 30 83 37 41
## 56 51 130 119 93 55
## 57 16 96 37 53 73
## 58 14 39 92 24 2
## 59 73 79 51 113 57
## 60 5 57 57 47 88
## 61 138 91 104 101 94
## 62 31 140 100 42 56
## 63 39 76 67 90 81
## 64 90 29 43 NA NA
## 65 127 132 126 76 47
## 66 100 135 122 39 22
## 67 111 55 58 110 114
## 68 9 127 101 45 89
## 69 116 49 115 97 99
## 70 92 118 84 71 48
## 71 67 148 86 109 86
## 72 137 31 87 63 96
## 73 118 77 76 61 44
## 74 54 35 72 123 92
## 75 101 139 81 50 32
## 76 28 14 18 9 NA
## 77 77 52 93 69 80
## 78 79 145 32 82 50
## 79 121 50 98 44 69
## 80 23 137 27 40 59
## 81 36 37 103 58 76
## 82 94 123 152 46 21
## 83 17 119 38 80 97
## 84 89 125 55 75 52
## 85 55 114 59 107 145
## 86 4 138 36 120 91
## 87 63 NA 33 60 100
## 88 106 46 128 72 78
## 89 91 84 154 98 79
## 90 20 22 146 65 82
## 91 61 133 63 73 66
## 92 104 129 2 83 98
## 93 11 NA 133 68 34
## 94 27 86 97 105 49
## 95 98 25 13 95 104
## 96 129 120 91 121 141
## 97 13 147 112 56 65
## 98 72 117 52 114 121
## 99 130 62 114 118 147
## 100 134 65 46 127 95
## 101 120 NA 118 92 63
## 102 148 75 116 128 133
## 103 136 60 140 111 116
## 104 144 103 143 59 108
## 105 112 27 34 102 112
## 106 80 102 89 77 123
## 107 108 134 60 81 40
## 108 135 110 139 78 71
## 109 142 94 61 116 102
## 110 140 90 147 112 NA
## 111 60 88 130 126 109
## 112 18 16 96 NA 144
## 113 59 98 142 89 122
## 114 141 51 135 148 138
## 115 117 47 125 137 136
## 116 145 93 129 91 64
## 117 150 44 28 54 77
## 118 143 70 94 130 137
## 119 43 28 153 87 84
## 120 109 26 64 139 130
## 121 46 105 26 122 106
## 122 58 67 148 117 120
## 123 131 40 121 146 134
## 124 132 101 144 84 67
## 125 68 36 107 119 90
## 126 154 66 73 64 107
## 127 95 106 127 149 140
## 128 122 107 138 129 142
## 129 149 112 79 145 146
## 130 81 111 35 79 54
## 131 86 24 1 106 110
## 132 151 80 106 133 148
## 133 44 143 66 94 87
## 134 74 53 99 135 115
## 135 57 41 145 96 NA
## 136 139 95 74 136 127
## 137 124 89 132 85 101
## 138 128 69 53 115 131
## 139 147 72 131 142 132
## 140 115 73 65 103 105
## 141 146 126 110 150 126
## 142 114 81 62 143 117
## 143 96 116 136 144 111
## 144 64 59 151 124 149
## 145 126 23 149 151 135
## 146 34 63 141 131 129
## 147 119 48 20 138 125
## 148 65 54 150 66 113
## 149 155 38 69 NA 128
## 150 110 64 109 147 119
## 151 75 83 155 141 124
## 152 102 2 90 132 103
## 153 50 34 49 125 118
## 154 133 136 137 134 139
## 155 153 122 113 152 150
## 156 152 61 85 140 143
## Freedom
## 1 5
## 2 6
## 3 3
## 4 7
## 5 19
## 6 11
## 7 10
## 8 8
## 9 9
## 10 26
## 11 17
## 12 16
## 13 93
## 14 28
## 15 63
## 16 33
## 17 44
## 18 53
## 19 62
## 20 58
## 21 4
## 22 12
## 23 71
## 24 69
## 25 102
## 26 98
## 27 25
## 28 68
## 29 NA
## 30 95
## 31 32
## 32 84
## 33 30
## 34 20
## 35 74
## 36 132
## 37 24
## 38 108
## 39 51
## 40 52
## 41 1
## 42 122
## 43 56
## 44 13
## 45 70
## 46 50
## 47 54
## 48 57
## 49 81
## 50 42
## 51 47
## 52 18
## 53 126
## 54 144
## 55 45
## 56 49
## 57 40
## 58 64
## 59 39
## 60 80
## 61 35
## 62 138
## 63 34
## 64 77
## 65 61
## 66 37
## 67 114
## 68 107
## 69 15
## 70 124
## 71 128
## 72 79
## 73 139
## 74 86
## 75 118
## 76 66
## 77 43
## 78 137
## 79 140
## 80 36
## 81 131
## 82 150
## 83 112
## 84 105
## 85 75
## 86 38
## 87 83
## 88 149
## 89 76
## 90 101
## 91 136
## 92 48
## 93 31
## 94 23
## 95 59
## 96 90
## 97 115
## 98 91
## 99 100
## 100 67
## 101 88
## 102 103
## 103 92
## 104 119
## 105 22
## 106 85
## 107 87
## 108 145
## 109 2
## 110 134
## 111 121
## 112 14
## 113 82
## 114 111
## 115 127
## 116 123
## 117 117
## 118 109
## 119 104
## 120 89
## 121 72
## 122 151
## 123 46
## 124 143
## 125 27
## 126 130
## 127 125
## 128 110
## 129 116
## 130 55
## 131 29
## 132 142
## 133 141
## 134 106
## 135 113
## 136 99
## 137 129
## 138 73
## 139 120
## 140 41
## 141 94
## 142 148
## 143 146
## 144 97
## 145 135
## 146 96
## 147 152
## 148 60
## 149 153
## 150 65
## 151 147
## 152 21
## 153 78
## 154 155
## 155 133
## 156 154
## Freedom
## 1 5
## 2 6
## 3 3
## 4 7
## 5 19
## 6 11
## 7 10
## 8 8
## 9 9
## 10 26
## 11 17
## 12 16
## 13 93
## 14 28
## 15 63
## 16 33
## 17 44
## 18 53
## 19 62
## 20 58
## 21 4
## 22 12
## 23 71
## 24 69
## 25 102
## 26 98
## 27 25
## 28 68
## 29 NA
## 30 95
## 31 32
## 32 84
## 33 30
## 34 20
## 35 74
## 36 132
## 37 24
## 38 108
## 39 51
## 40 52
## 41 1
## 42 122
## 43 56
## 44 13
## 45 70
## 46 50
## 47 54
## 48 57
## 49 81
## 50 42
## 51 47
## 52 18
## 53 126
## 54 144
## 55 45
## 56 49
## 57 40
## 58 64
## 59 39
## 60 80
## 61 35
## 62 138
## 63 34
## 64 77
## 65 61
## 66 37
## 67 114
## 68 107
## 69 15
## 70 124
## 71 128
## 72 79
## 73 139
## 74 86
## 75 118
## 76 66
## 77 43
## 78 137
## 79 140
## 80 36
## 81 131
## 82 150
## 83 112
## 84 105
## 85 75
## 86 38
## 87 83
## 88 149
## 89 76
## 90 101
## 91 136
## 92 48
## 93 31
## 94 23
## 95 59
## 96 90
## 97 115
## 98 91
## 99 100
## 100 67
## 101 88
## 102 103
## 103 92
## 104 119
## 105 22
## 106 85
## 107 87
## 108 145
## 109 2
## 110 134
## 111 121
## 112 14
## 113 82
## 114 111
## 115 127
## 116 123
## 117 117
## 118 109
## 119 104
## 120 89
## 121 72
## 122 151
## 123 46
## 124 143
## 125 27
## 126 130
## 127 125
## 128 110
## 129 116
## 130 55
## 131 29
## 132 142
## 133 141
## 134 106
## 135 113
## 136 99
## 137 129
## 138 73
## 139 120
## 140 41
## 141 94
## 142 148
## 143 146
## 144 97
## 145 135
## 146 96
## 147 152
## 148 60
## 149 153
## 150 65
## 151 147
## 152 21
## 153 78
## 154 155
## 155 133
## 156 154
## Country_region Ladder SD_Ladder Positive_affect Negative_affect
## 1 Finland 1 4 41 10
## 2 Denmark 2 13 24 26
## 3 Norway 3 8 16 29
## 4 Iceland 4 9 3 3
## 5 Netherlands 5 1 12 25
## 6 Switzerland 6 11 44 21
## Social_support Freedom Corruption Generosity Log_GDP life_expectancy
## 1 2 5 4 47 22 27
## 2 4 6 3 22 14 23
## 3 3 3 8 11 7 12
## 4 1 7 45 3 15 13
## 5 15 19 12 7 12 18
## 6 13 11 7 16 8 4
## Observations: 156
## Variables: 11
## $ Country_region <fct> Finland, Denmark, Norway, Iceland, Netherlands...
## $ Ladder <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ SD_Ladder <int> 4, 13, 8, 9, 1, 11, 18, 15, 23, 10, 26, 62, 14...
## $ Positive_affect <int> 41, 24, 16, 3, 12, 44, 34, 22, 18, 64, 47, 4, ...
## $ Negative_affect <int> 10, 26, 29, 3, 25, 21, 8, 12, 49, 24, 37, 87, ...
## $ Social_support <int> 2, 4, 3, 1, 15, 13, 25, 5, 20, 31, 7, 42, 38, ...
## $ Freedom <int> 5, 6, 3, 7, 19, 11, 10, 8, 9, 26, 17, 16, 93, ...
## $ Corruption <int> 4, 3, 8, 45, 12, 7, 6, 5, 11, 19, 13, 58, 74, ...
## $ Generosity <int> 47, 22, 11, 3, 7, 16, 17, 8, 14, 25, 6, 75, 24...
## $ Log_GDP <int> 22, 14, 7, 15, 12, 8, 13, 26, 19, 16, 18, 67, ...
## $ life_expectancy <int> 27, 23, 12, 13, 18, 4, 17, 14, 8, 15, 10, 28, ...
## SD_Ladder
## 1 4
## 2 13
## 3 8
## 4 9
## 5 1
## 6 11
## 7 18
## 8 15
## 9 23
## 10 10
## 11 26
## 12 62
## 13 14
## 14 3
## 15 16
## 16 34
## 17 17
## 18 7
## 19 49
## 20 20
## 21 65
## 22 42
## 23 76
## 24 19
## 25 37
## 26 61
## 27 136
## 28 93
## 29 86
## 30 21
## 31 121
## 32 116
## 33 88
## 34 5
## 35 112
## 36 31
## 37 83
## 38 39
## 39 89
## 40 28
## 41 99
## 42 55
## 43 120
## 44 54
## 45 133
## 46 107
## 47 97
## 48 75
## 49 95
## 50 113
## 51 98
## 52 81
## 53 30
## 54 57
## 55 32
## 56 102
## 57 94
## 58 43
## 59 151
## 60 40
## 61 71
## 62 36
## 63 90
## 64 35
## 65 114
## 66 73
## 67 53
## 68 64
## 69 119
## 70 100
## 71 45
## 72 115
## 73 84
## 74 50
## 75 29
## 76 33
## 77 155
## 78 80
## 79 58
## 80 12
## 81 22
## 82 87
## 83 48
## 84 67
## 85 130
## 86 46
## 87 2
## 88 56
## 89 101
## 90 24
## 91 60
## 92 108
## 93 72
## 94 27
## 95 6
## 96 131
## 97 47
## 98 129
## 99 134
## 100 128
## 101 127
## 102 149
## 103 152
## 104 105
## 105 59
## 106 124
## 107 126
## 108 141
## 109 135
## 110 110
## 111 44
## 112 74
## 113 106
## 114 144
## 115 92
## 116 82
## 117 109
## 118 146
## 119 51
## 120 142
## 121 118
## 122 68
## 123 154
## 124 79
## 125 52
## 126 147
## 127 78
## 128 96
## 129 153
## 130 91
## 131 70
## 132 139
## 133 69
## 134 38
## 135 104
## 136 148
## 137 66
## 138 145
## 139 103
## 140 41
## 141 156
## 142 143
## 143 77
## 144 150
## 145 138
## 146 123
## 147 111
## 148 125
## 149 137
## 150 132
## 151 85
## 152 63
## 153 122
## 154 25
## 155 117
## 156 140
## Social_support Freedom
## 1 2 5
## 2 4 6
## 3 3 3
## 4 1 7
## 5 15 19
## 6 13 11
## 7 25 10
## 8 5 8
## 9 20 9
## 10 31 26
## 11 7 17
## 12 42 16
## 13 38 93
## 14 27 28
## 15 9 63
## 16 6 33
## 17 39 44
## 18 22 53
## 19 37 62
## 20 24 58
## 21 72 4
## 22 16 12
## 23 67 71
## 24 32 69
## 25 48 102
## 26 58 98
## 27 78 25
## 28 62 68
## 29 NA NA
## 30 26 95
## 31 41 32
## 32 43 84
## 33 35 30
## 34 36 20
## 35 83 74
## 36 23 132
## 37 59 24
## 38 21 108
## 39 29 51
## 40 44 52
## 41 11 1
## 42 17 122
## 43 52 56
## 44 14 13
## 45 66 70
## 46 85 50
## 47 46 54
## 48 86 57
## 49 90 81
## 50 71 42
## 51 69 47
## 52 53 18
## 53 34 126
## 54 91 144
## 55 12 45
## 56 28 49
## 57 54 40
## 58 50 64
## 59 84 39
## 60 19 80
## 61 93 35
## 62 51 138
## 63 30 34
## 64 81 77
## 65 77 61
## 66 47 37
## 67 130 114
## 68 40 107
## 69 75 15
## 70 57 124
## 71 65 128
## 72 73 79
## 73 60 139
## 74 113 86
## 75 79 118
## 76 76 66
## 77 55 43
## 78 92 137
## 79 61 140
## 80 97 36
## 81 33 131
## 82 102 150
## 83 10 112
## 84 74 105
## 85 111 75
## 86 45 38
## 87 8 83
## 88 101 149
## 89 139 76
## 90 104 101
## 91 89 136
## 92 94 48
## 93 108 31
## 94 64 23
## 95 68 59
## 96 129 90
## 97 18 115
## 98 132 91
## 99 137 100
## 100 87 67
## 101 88 88
## 102 153 103
## 103 138 92
## 104 95 119
## 105 120 22
## 106 63 85
## 107 133 87
## 108 49 145
## 109 109 2
## 110 82 134
## 111 106 121
## 112 145 14
## 113 70 82
## 114 140 111
## 115 116 127
## 116 117 123
## 117 134 117
## 118 136 109
## 119 147 104
## 120 125 89
## 121 123 72
## 122 99 151
## 123 122 46
## 124 121 143
## 125 126 27
## 126 124 130
## 127 107 125
## 128 112 110
## 129 135 116
## 130 80 55
## 131 96 29
## 132 141 142
## 133 56 141
## 134 119 106
## 135 103 113
## 136 114 99
## 137 118 129
## 138 115 73
## 139 149 120
## 140 142 41
## 141 127 94
## 142 143 148
## 143 128 146
## 144 98 97
## 145 152 135
## 146 110 96
## 147 146 152
## 148 105 60
## 149 154 153
## 150 150 65
## 151 100 147
## 152 144 21
## 153 131 78
## 154 151 155
## 155 155 133
## 156 148 154
## Country_region Ladder SD_Ladder Positive_affect
## 1 Finland 1 4 41
## 2 Denmark 2 13 24
## 3 Norway 3 8 16
## 4 Iceland 4 9 3
## 5 Netherlands 5 1 12
## 6 Switzerland 6 11 44
## 7 Sweden 7 18 34
## 8 New Zealand 8 15 22
## 9 Canada 9 23 18
## 10 Austria 10 10 64
## 11 Australia 11 26 47
## 12 Costa Rica 12 62 4
## 13 Israel 13 14 104
## 14 Luxembourg 14 3 62
## 15 United Kingdom 15 16 52
## 16 Ireland 16 34 33
## 17 Germany 17 17 65
## 18 Belgium 18 7 57
## 19 United States 19 49 35
## 20 Czech Republic 20 20 74
## 21 United Arab Emirates 21 65 43
## 22 Malta 22 42 83
## 23 Mexico 23 76 6
## 24 France 24 19 56
## 25 Taiwan 25 37 17
## 26 Chile 26 61 15
## 27 Guatemala 27 136 8
## 28 Saudi Arabia 28 93 49
## 29 Qatar 29 86 NA
## 30 Spain 30 21 107
## 31 Panama 31 121 7
## 32 Brazil 32 116 69
## 33 Uruguay 33 88 10
## 34 Singapore 34 5 38
## 35 El Salvador 35 112 23
## 36 Italy 36 31 99
## 37 Bahrain 37 83 39
## 38 Slovakia 38 39 53
## 39 Trinidad and Tobago 39 89 14
## 40 Poland 40 28 76
## 41 Uzbekistan 41 99 19
## 42 Lithuania 42 55 138
## 43 Colombia 43 120 30
## 44 Slovenia 44 54 114
## 45 Nicaragua 45 133 31
## 46 Kosovo 46 107 71
## 47 Argentina 47 97 28
## 48 Romania 48 75 80
## 49 Cyprus 49 95 60
## 50 Ecuador 50 113 11
## 51 Kuwait 51 98 89
## 52 Thailand 52 81 20
## 53 Latvia 53 30 119
## 54 South Korea 54 57 101
## 55 Estonia 55 32 50
## 56 Jamaica 56 102 51
## 57 Mauritius 57 94 55
## 58 Japan 58 43 73
## 59 Honduras 59 151 13
## 60 Kazakhstan 60 40 81
## 61 Bolivia 61 71 70
## 62 Hungary 62 36 86
## 63 Paraguay 63 90 1
## 64 Northern Cyprus 64 35 144
## 65 Peru 65 114 36
## 66 Portugal 66 73 97
## 67 Pakistan 67 53 130
## 68 Russia 68 64 96
## 69 Philippines 69 119 42
## 70 Serbia 70 100 148
## 71 Moldova 71 45 133
## 72 Libya 72 115 85
## 73 Montenegro 73 84 143
## 74 Tajikistan 74 50 120
## 75 Croatia 75 29 122
## 76 Hong Kong 76 33 105
## 77 Dominican Republic 77 155 66
## 78 Bosnia and Herzegovina 78 80 116
## 79 Turkey 79 58 154
## 80 Malaysia 80 12 25
## 81 Belarus 81 22 149
## 82 Greece 82 87 102
## 83 Mongolia 83 48 95
## 84 Macedonia 84 67 140
## 85 Nigeria 85 130 61
## 86 Kyrgyzstan 86 46 58
## 87 Turkmenistan 87 2 135
## 88 Algeria 88 56 113
## 89 Morocco 89 101 110
## 90 Azerbaijan 90 24 134
## 91 Lebanon 91 60 150
## 92 Indonesia 92 108 9
## 93 China 93 72 21
## 94 Vietnam 94 27 121
## 95 Bhutan 95 6 37
## 96 Cameroon 96 131 106
## 97 Bulgaria 97 47 117
## 98 Ghana 98 129 92
## 99 Ivory Coast 99 134 88
## 100 Nepal 100 128 137
## 101 Jordan 101 127 112
## 102 Benin 102 149 118
## 103 Congo (Brazzaville) 103 152 124
## 104 Gabon 104 105 111
## 105 Laos 105 59 5
## 106 South Africa 106 124 40
## 107 Albania 107 126 90
## 108 Venezuela 108 141 77
## 109 Cambodia 109 135 27
## 110 Palestinian Territories 110 110 128
## 111 Senegal 111 44 68
## 112 Somalia 112 74 2
## 113 Namibia 113 106 75
## 114 Niger 114 144 79
## 115 Burkina Faso 115 92 115
## 116 Armenia 116 82 126
## 117 Iran 117 109 109
## 118 Guinea 118 146 82
## 119 Georgia 119 51 141
## 120 Gambia 120 142 29
## 121 Kenya 121 118 59
## 122 Mauritania 122 68 94
## 123 Mozambique 123 154 108
## 124 Tunisia 124 79 147
## 125 Bangladesh 125 52 145
## 126 Iraq 126 147 151
## 127 Congo (Kinshasa) 127 78 125
## 128 Mali 128 96 48
## 129 Sierra Leone 129 153 139
## 130 Sri Lanka 130 91 32
## 131 Myanmar 131 70 45
## 132 Chad 132 139 136
## 133 Ukraine 133 69 131
## 134 Ethiopia 134 38 100
## 135 Swaziland 135 104 26
## 136 Uganda 136 148 91
## 137 Egypt 137 66 146
## 138 Zambia 138 145 84
## 139 Togo 139 103 123
## 140 India 140 41 93
## 141 Liberia 141 156 103
## 142 Comoros 142 143 67
## 143 Madagascar 143 77 46
## 144 Lesotho 144 150 72
## 145 Burundi 145 138 98
## 146 Zimbabwe 146 123 63
## 147 Haiti 147 111 142
## 148 Botswana 148 125 87
## 149 Syria 149 137 155
## 150 Malawi 150 132 129
## 151 Yemen 151 85 153
## 152 Rwanda 152 63 54
## 153 Tanzania 153 122 78
## 154 Afghanistan 154 25 152
## 155 Central African Republic 155 117 132
## 156 South Sudan 156 140 127
## Negative_affect Corruption Generosity Log_GDP life_expectancy
## 1 10 4 47 22 27
## 2 26 3 22 14 23
## 3 29 8 11 7 12
## 4 3 45 3 15 13
## 5 25 12 7 12 18
## 6 21 7 16 8 4
## 7 8 6 17 13 17
## 8 12 5 8 26 14
## 9 49 11 14 19 8
## 10 24 19 25 16 15
## 11 37 13 6 18 10
## 12 87 58 75 67 28
## 13 69 74 24 31 11
## 14 19 9 30 2 16
## 15 42 15 4 23 24
## 16 32 10 9 6 20
## 17 30 17 19 17 25
## 18 53 20 44 21 26
## 19 70 42 12 10 39
## 20 22 121 117 32 31
## 21 56 NA 15 4 60
## 22 103 32 5 28 19
## 23 40 87 120 57 46
## 24 66 21 68 25 5
## 25 1 56 56 NA NA
## 26 78 99 45 49 30
## 27 85 82 78 99 85
## 28 82 NA 82 11 74
## 29 NA NA NA 1 43
## 30 107 78 50 30 3
## 31 48 104 88 51 33
## 32 105 71 108 70 72
## 33 76 33 80 52 35
## 34 2 1 21 3 1
## 35 84 85 134 100 75
## 36 123 128 48 29 7
## 37 83 NA 23 20 42
## 38 47 142 70 35 38
## 39 52 141 41 38 93
## 40 33 108 77 41 36
## 41 15 18 29 104 83
## 42 41 113 124 36 62
## 43 88 124 111 74 51
## 44 71 97 54 34 29
## 45 125 43 71 108 53
## 46 7 144 31 88 NA
## 47 93 109 123 55 37
## 48 62 146 102 48 61
## 49 99 115 39 33 6
## 50 113 68 95 86 45
## 51 97 NA 42 5 70
## 52 35 131 10 62 58
## 53 38 92 105 43 68
## 54 45 100 40 27 9
## 55 6 30 83 37 41
## 56 51 130 119 93 55
## 57 16 96 37 53 73
## 58 14 39 92 24 2
## 59 73 79 51 113 57
## 60 5 57 57 47 88
## 61 138 91 104 101 94
## 62 31 140 100 42 56
## 63 39 76 67 90 81
## 64 90 29 43 NA NA
## 65 127 132 126 76 47
## 66 100 135 122 39 22
## 67 111 55 58 110 114
## 68 9 127 101 45 89
## 69 116 49 115 97 99
## 70 92 118 84 71 48
## 71 67 148 86 109 86
## 72 137 31 87 63 96
## 73 118 77 76 61 44
## 74 54 35 72 123 92
## 75 101 139 81 50 32
## 76 28 14 18 9 NA
## 77 77 52 93 69 80
## 78 79 145 32 82 50
## 79 121 50 98 44 69
## 80 23 137 27 40 59
## 81 36 37 103 58 76
## 82 94 123 152 46 21
## 83 17 119 38 80 97
## 84 89 125 55 75 52
## 85 55 114 59 107 145
## 86 4 138 36 120 91
## 87 63 NA 33 60 100
## 88 106 46 128 72 78
## 89 91 84 154 98 79
## 90 20 22 146 65 82
## 91 61 133 63 73 66
## 92 104 129 2 83 98
## 93 11 NA 133 68 34
## 94 27 86 97 105 49
## 95 98 25 13 95 104
## 96 129 120 91 121 141
## 97 13 147 112 56 65
## 98 72 117 52 114 121
## 99 130 62 114 118 147
## 100 134 65 46 127 95
## 101 120 NA 118 92 63
## 102 148 75 116 128 133
## 103 136 60 140 111 116
## 104 144 103 143 59 108
## 105 112 27 34 102 112
## 106 80 102 89 77 123
## 107 108 134 60 81 40
## 108 135 110 139 78 71
## 109 142 94 61 116 102
## 110 140 90 147 112 NA
## 111 60 88 130 126 109
## 112 18 16 96 NA 144
## 113 59 98 142 89 122
## 114 141 51 135 148 138
## 115 117 47 125 137 136
## 116 145 93 129 91 64
## 117 150 44 28 54 77
## 118 143 70 94 130 137
## 119 43 28 153 87 84
## 120 109 26 64 139 130
## 121 46 105 26 122 106
## 122 58 67 148 117 120
## 123 131 40 121 146 134
## 124 132 101 144 84 67
## 125 68 36 107 119 90
## 126 154 66 73 64 107
## 127 95 106 127 149 140
## 128 122 107 138 129 142
## 129 149 112 79 145 146
## 130 81 111 35 79 54
## 131 86 24 1 106 110
## 132 151 80 106 133 148
## 133 44 143 66 94 87
## 134 74 53 99 135 115
## 135 57 41 145 96 NA
## 136 139 95 74 136 127
## 137 124 89 132 85 101
## 138 128 69 53 115 131
## 139 147 72 131 142 132
## 140 115 73 65 103 105
## 141 146 126 110 150 126
## 142 114 81 62 143 117
## 143 96 116 136 144 111
## 144 64 59 151 124 149
## 145 126 23 149 151 135
## 146 34 63 141 131 129
## 147 119 48 20 138 125
## 148 65 54 150 66 113
## 149 155 38 69 NA 128
## 150 110 64 109 147 119
## 151 75 83 155 141 124
## 152 102 2 90 132 103
## 153 50 34 49 125 118
## 154 133 136 137 134 139
## 155 153 122 113 152 150
## 156 152 61 85 140 143
## Freedom
## 1 5
## 2 6
## 3 3
## 4 7
## 5 19
## 6 11
## 7 10
## 8 8
## 9 9
## 10 26
## 11 17
## 12 16
## 13 93
## 14 28
## 15 63
## 16 33
## 17 44
## 18 53
## 19 62
## 20 58
## 21 4
## 22 12
## 23 71
## 24 69
## 25 102
## 26 98
## 27 25
## 28 68
## 29 NA
## 30 95
## 31 32
## 32 84
## 33 30
## 34 20
## 35 74
## 36 132
## 37 24
## 38 108
## 39 51
## 40 52
## 41 1
## 42 122
## 43 56
## 44 13
## 45 70
## 46 50
## 47 54
## 48 57
## 49 81
## 50 42
## 51 47
## 52 18
## 53 126
## 54 144
## 55 45
## 56 49
## 57 40
## 58 64
## 59 39
## 60 80
## 61 35
## 62 138
## 63 34
## 64 77
## 65 61
## 66 37
## 67 114
## 68 107
## 69 15
## 70 124
## 71 128
## 72 79
## 73 139
## 74 86
## 75 118
## 76 66
## 77 43
## 78 137
## 79 140
## 80 36
## 81 131
## 82 150
## 83 112
## 84 105
## 85 75
## 86 38
## 87 83
## 88 149
## 89 76
## 90 101
## 91 136
## 92 48
## 93 31
## 94 23
## 95 59
## 96 90
## 97 115
## 98 91
## 99 100
## 100 67
## 101 88
## 102 103
## 103 92
## 104 119
## 105 22
## 106 85
## 107 87
## 108 145
## 109 2
## 110 134
## 111 121
## 112 14
## 113 82
## 114 111
## 115 127
## 116 123
## 117 117
## 118 109
## 119 104
## 120 89
## 121 72
## 122 151
## 123 46
## 124 143
## 125 27
## 126 130
## 127 125
## 128 110
## 129 116
## 130 55
## 131 29
## 132 142
## 133 141
## 134 106
## 135 113
## 136 99
## 137 129
## 138 73
## 139 120
## 140 41
## 141 94
## 142 148
## 143 146
## 144 97
## 145 135
## 146 96
## 147 152
## 148 60
## 149 153
## 150 65
## 151 147
## 152 21
## 153 78
## 154 155
## 155 133
## 156 154
## Freedom
## 1 5
## 2 6
## 3 3
## 4 7
## 5 19
## 6 11
## 7 10
## 8 8
## 9 9
## 10 26
## 11 17
## 12 16
## 13 93
## 14 28
## 15 63
## 16 33
## 17 44
## 18 53
## 19 62
## 20 58
## 21 4
## 22 12
## 23 71
## 24 69
## 25 102
## 26 98
## 27 25
## 28 68
## 29 NA
## 30 95
## 31 32
## 32 84
## 33 30
## 34 20
## 35 74
## 36 132
## 37 24
## 38 108
## 39 51
## 40 52
## 41 1
## 42 122
## 43 56
## 44 13
## 45 70
## 46 50
## 47 54
## 48 57
## 49 81
## 50 42
## 51 47
## 52 18
## 53 126
## 54 144
## 55 45
## 56 49
## 57 40
## 58 64
## 59 39
## 60 80
## 61 35
## 62 138
## 63 34
## 64 77
## 65 61
## 66 37
## 67 114
## 68 107
## 69 15
## 70 124
## 71 128
## 72 79
## 73 139
## 74 86
## 75 118
## 76 66
## 77 43
## 78 137
## 79 140
## 80 36
## 81 131
## 82 150
## 83 112
## 84 105
## 85 75
## 86 38
## 87 83
## 88 149
## 89 76
## 90 101
## 91 136
## 92 48
## 93 31
## 94 23
## 95 59
## 96 90
## 97 115
## 98 91
## 99 100
## 100 67
## 101 88
## 102 103
## 103 92
## 104 119
## 105 22
## 106 85
## 107 87
## 108 145
## 109 2
## 110 134
## 111 121
## 112 14
## 113 82
## 114 111
## 115 127
## 116 123
## 117 117
## 118 109
## 119 104
## 120 89
## 121 72
## 122 151
## 123 46
## 124 143
## 125 27
## 126 130
## 127 125
## 128 110
## 129 116
## 130 55
## 131 29
## 132 142
## 133 141
## 134 106
## 135 113
## 136 99
## 137 129
## 138 73
## 139 120
## 140 41
## 141 94
## 142 148
## 143 146
## 144 97
## 145 135
## 146 96
## 147 152
## 148 60
## 149 153
## 150 65
## 151 147
## 152 21
## 153 78
## 154 155
## 155 133
## 156 154
## Ladder SD_Ladder
## 1 1 4
## 2 2 13
## 3 3 8
## 4 4 9
## 5 5 1
## 6 6 11
## 7 7 18
## 8 8 15
## 9 9 23
## 10 10 10
## 11 11 26
## 12 12 62
## 13 13 14
## 14 14 3
## 15 15 16
## 16 16 34
## 17 17 17
## 18 18 7
## 19 19 49
## 20 20 20
## 21 21 65
## 22 22 42
## 23 23 76
## 24 24 19
## 25 25 37
## 26 26 61
## 27 27 136
## 28 28 93
## 29 29 86
## 30 30 21
## 31 31 121
## 32 32 116
## 33 33 88
## 34 34 5
## 35 35 112
## 36 36 31
## 37 37 83
## 38 38 39
## 39 39 89
## 40 40 28
## 41 41 99
## 42 42 55
## 43 43 120
## 44 44 54
## 45 45 133
## 46 46 107
## 47 47 97
## 48 48 75
## 49 49 95
## 50 50 113
## 51 51 98
## 52 52 81
## 53 53 30
## 54 54 57
## 55 55 32
## 56 56 102
## 57 57 94
## 58 58 43
## 59 59 151
## 60 60 40
## 61 61 71
## 62 62 36
## 63 63 90
## 64 64 35
## 65 65 114
## 66 66 73
## 67 67 53
## 68 68 64
## 69 69 119
## 70 70 100
## 71 71 45
## 72 72 115
## 73 73 84
## 74 74 50
## 75 75 29
## 76 76 33
## 77 77 155
## 78 78 80
## 79 79 58
## 80 80 12
## 81 81 22
## 82 82 87
## 83 83 48
## 84 84 67
## 85 85 130
## 86 86 46
## 87 87 2
## 88 88 56
## 89 89 101
## 90 90 24
## 91 91 60
## 92 92 108
## 93 93 72
## 94 94 27
## 95 95 6
## 96 96 131
## 97 97 47
## 98 98 129
## 99 99 134
## 100 100 128
## 101 101 127
## 102 102 149
## 103 103 152
## 104 104 105
## 105 105 59
## 106 106 124
## 107 107 126
## 108 108 141
## 109 109 135
## 110 110 110
## 111 111 44
## 112 112 74
## 113 113 106
## 114 114 144
## 115 115 92
## 116 116 82
## 117 117 109
## 118 118 146
## 119 119 51
## 120 120 142
## 121 121 118
## 122 122 68
## 123 123 154
## 124 124 79
## 125 125 52
## 126 126 147
## 127 127 78
## 128 128 96
## 129 129 153
## 130 130 91
## 131 131 70
## 132 132 139
## 133 133 69
## 134 134 38
## 135 135 104
## 136 136 148
## 137 137 66
## 138 138 145
## 139 139 103
## 140 140 41
## 141 141 156
## 142 142 143
## 143 143 77
## 144 144 150
## 145 145 138
## 146 146 123
## 147 147 111
## 148 148 125
## 149 149 137
## 150 150 132
## 151 151 85
## 152 152 63
## 153 153 122
## 154 154 25
## 155 155 117
## 156 156 140
## Ladder Freedom
## 1 1 5
## 2 2 6
## 3 3 3
## 4 4 7
## 5 5 19
## 6 6 11
## 7 7 10
## 8 8 8
## 9 9 9
## 10 10 26
## 11 11 17
## 12 12 16
## 13 13 93
## 14 14 28
## 15 15 63
## 16 16 33
## 17 17 44
## 18 18 53
## 19 19 62
## 20 20 58
## 21 21 4
## 22 22 12
## 23 23 71
## 24 24 69
## 25 25 102
## 26 26 98
## 27 27 25
## 28 28 68
## 29 29 NA
## 30 30 95
## 31 31 32
## 32 32 84
## 33 33 30
## 34 34 20
## 35 35 74
## 36 36 132
## 37 37 24
## 38 38 108
## 39 39 51
## 40 40 52
## 41 41 1
## 42 42 122
## 43 43 56
## 44 44 13
## 45 45 70
## 46 46 50
## 47 47 54
## 48 48 57
## 49 49 81
## 50 50 42
## 51 51 47
## 52 52 18
## 53 53 126
## 54 54 144
## 55 55 45
## 56 56 49
## 57 57 40
## 58 58 64
## 59 59 39
## 60 60 80
## 61 61 35
## 62 62 138
## 63 63 34
## 64 64 77
## 65 65 61
## 66 66 37
## 67 67 114
## 68 68 107
## 69 69 15
## 70 70 124
## 71 71 128
## 72 72 79
## 73 73 139
## 74 74 86
## 75 75 118
## 76 76 66
## 77 77 43
## 78 78 137
## 79 79 140
## 80 80 36
## 81 81 131
## 82 82 150
## 83 83 112
## 84 84 105
## 85 85 75
## 86 86 38
## 87 87 83
## 88 88 149
## 89 89 76
## 90 90 101
## 91 91 136
## 92 92 48
## 93 93 31
## 94 94 23
## 95 95 59
## 96 96 90
## 97 97 115
## 98 98 91
## 99 99 100
## 100 100 67
## 101 101 88
## 102 102 103
## 103 103 92
## 104 104 119
## 105 105 22
## 106 106 85
## 107 107 87
## 108 108 145
## 109 109 2
## 110 110 134
## 111 111 121
## 112 112 14
## 113 113 82
## 114 114 111
## 115 115 127
## 116 116 123
## 117 117 117
## 118 118 109
## 119 119 104
## 120 120 89
## 121 121 72
## 122 122 151
## 123 123 46
## 124 124 143
## 125 125 27
## 126 126 130
## 127 127 125
## 128 128 110
## 129 129 116
## 130 130 55
## 131 131 29
## 132 132 142
## 133 133 141
## 134 134 106
## 135 135 113
## 136 136 99
## 137 137 129
## 138 138 73
## 139 139 120
## 140 140 41
## 141 141 94
## 142 142 148
## 143 143 146
## 144 144 97
## 145 145 135
## 146 146 96
## 147 147 152
## 148 148 60
## 149 149 153
## 150 150 65
## 151 151 147
## 152 152 21
## 153 153 78
## 154 154 155
## 155 155 133
## 156 156 154
## Observations: 156
## Variables: 10
## $ Ladder <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,...
## $ SD_Ladder <int> 4, 13, 8, 9, 1, 11, 18, 15, 23, 10, 26, 62, 14...
## $ Positive_affect <int> 41, 24, 16, 3, 12, 44, 34, 22, 18, 64, 47, 4, ...
## $ Negative_affect <int> 10, 26, 29, 3, 25, 21, 8, 12, 49, 24, 37, 87, ...
## $ Social_support <int> 2, 4, 3, 1, 15, 13, 25, 5, 20, 31, 7, 42, 38, ...
## $ Freedom <int> 5, 6, 3, 7, 19, 11, 10, 8, 9, 26, 17, 16, 93, ...
## $ Corruption <int> 4, 3, 8, 45, 12, 7, 6, 5, 11, 19, 13, 58, 74, ...
## $ Generosity <int> 47, 22, 11, 3, 7, 16, 17, 8, 14, 25, 6, 75, 24...
## $ Log_GDP <int> 22, 14, 7, 15, 12, 8, 13, 26, 19, 16, 18, 67, ...
## $ life_expectancy <int> 27, 23, 12, 13, 18, 4, 17, 14, 8, 15, 10, 28, ...
## Observations: 156
## Variables: 1
## $ Country_region <fct> Finland, Denmark, Norway, Iceland, Netherlands,...
## Observations: 156
## Variables: 0
## Observations: 156
## Variables: 1
## $ Country <fct> Finland, Denmark, Norway, Iceland, Netherlands, Switze...
Dplyr filter
## # A tibble: 6 x 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Chee~ Acin~ carni Carn~ lc 12.1 NA NA
## 2 Owl ~ Aotus omni Prim~ <NA> 17 1.8 NA
## 3 Moun~ Aplo~ herbi Rode~ nt 14.4 2.4 NA
## 4 Grea~ Blar~ omni Sori~ lc 14.9 2.3 0.133
## 5 Cow Bos herbi Arti~ domesticated 4 0.7 0.667
## 6 Thre~ Brad~ herbi Pilo~ <NA> 14.4 2.2 0.767
## # ... with 3 more variables: awake <dbl>, brainwt <dbl>, bodywt <dbl>
## Observations: 83
## Variables: 11
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Grea...
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bo...
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi...
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorph...
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", N...
## $ sleep_total <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1...
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0....
## $ sleep_cycle <dbl> NA, NA, NA, 0.13, 0.67, 0.77, 0.38, NA, 0.33, NA,...
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9,...
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0....
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.4...
## Observations: 28
## Variables: 11
## $ name <chr> "Cheetah", "Cow", "Northern fur seal", "Dog", "Ro...
## $ genus <chr> "Acinonyx", "Bos", "Callorhinus", "Canis", "Capre...
## $ vore <chr> "carni", "herbi", "carni", "carni", "herbi", "her...
## $ order <chr> "Carnivora", "Artiodactyla", "Carnivora", "Carniv...
## $ conservation <chr> "lc", "domesticated", "vu", "domesticated", "lc",...
## $ sleep_total <dbl> 12.1, 4.0, 8.7, 10.1, 3.0, 5.3, 3.9, 2.9, 3.1, 10...
## $ sleep_rem <dbl> NA, 0.7, 1.4, 2.9, NA, 0.6, NA, 0.6, 0.4, 1.1, 0....
## $ sleep_cycle <dbl> NA, 0.67, 0.38, 0.33, NA, NA, NA, 1.00, NA, NA, N...
## $ awake <dbl> 11.9, 20.0, 15.3, 13.9, 21.0, 18.7, 20.1, 21.1, 2...
## $ brainwt <dbl> NA, 0.423, NA, 0.070, 0.098, 0.115, 4.603, 0.655,...
## $ bodywt <dbl> 50.0, 600.0, 20.5, 14.0, 14.8, 33.5, 2547.0, 521....
## Observations: 4
## Variables: 11
## $ name <chr> "Asian elephant", "Golden hamster", "Tiger", "Gia...
## $ genus <chr> "Elephas", "Mesocricetus", "Panthera", "Priodontes"
## $ vore <chr> "herbi", "herbi", "carni", "insecti"
## $ order <chr> "Proboscidea", "Rodentia", "Carnivora", "Cingulata"
## $ conservation <chr> "en", "en", "en", "en"
## $ sleep_total <dbl> 3.9, 14.3, 15.8, 18.1
## $ sleep_rem <dbl> NA, 3.1, NA, 6.1
## $ sleep_cycle <dbl> NA, 0.2, NA, NA
## $ awake <dbl> 20.1, 9.7, 8.2, 5.9
## $ brainwt <dbl> 4.603, 0.001, NA, 0.081
## $ bodywt <dbl> 2547.00, 0.12, 162.56, 60.00
## filter on basis of two conditions
### and
m %>% filter(bodywt>=2,conservation =="en") %>% glimpse() ##and## Observations: 3
## Variables: 11
## $ name <chr> "Asian elephant", "Tiger", "Giant armadillo"
## $ genus <chr> "Elephas", "Panthera", "Priodontes"
## $ vore <chr> "herbi", "carni", "insecti"
## $ order <chr> "Proboscidea", "Carnivora", "Cingulata"
## $ conservation <chr> "en", "en", "en"
## $ sleep_total <dbl> 3.9, 15.8, 18.1
## $ sleep_rem <dbl> NA, NA, 6.1
## $ sleep_cycle <dbl> NA, NA, NA
## $ awake <dbl> 20.1, 8.2, 5.9
## $ brainwt <dbl> 4.603, NA, 0.081
## $ bodywt <dbl> 2547, 163, 60
## Observations: 15
## Variables: 11
## $ name <chr> "Cheetah", "Northern fur seal", "Dog", "Asian ele...
## $ genus <chr> "Acinonyx", "Callorhinus", "Canis", "Elephas", "F...
## $ vore <chr> "carni", "carni", "carni", "herbi", "carni", "car...
## $ order <chr> "Carnivora", "Carnivora", "Carnivora", "Proboscid...
## $ conservation <chr> "lc", "vu", "domesticated", "en", "domesticated",...
## $ sleep_total <dbl> 12.1, 8.7, 10.1, 3.9, 12.5, 6.2, 14.3, 15.8, 10.4...
## $ sleep_rem <dbl> NA, 1.4, 2.9, NA, 3.2, 1.5, 3.1, NA, NA, NA, 0.4,...
## $ sleep_cycle <dbl> NA, 0.38, 0.33, NA, 0.42, NA, 0.20, NA, NA, NA, N...
## $ awake <dbl> 11.9, 15.3, 13.9, 20.1, 11.5, 17.8, 9.7, 8.2, 13....
## $ brainwt <dbl> NA, NA, 0.070, 4.603, 0.026, 0.325, 0.001, NA, 0....
## $ bodywt <dbl> 50.00, 20.49, 14.00, 2547.00, 3.30, 85.00, 0.12, ...
######################################################################
######### row filtering: subset rows |
data(msleep) ## mammal sleep data
head(msleep)## # A tibble: 6 x 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Chee~ Acin~ carni Carn~ lc 12.1 NA NA
## 2 Owl ~ Aotus omni Prim~ <NA> 17 1.8 NA
## 3 Moun~ Aplo~ herbi Rode~ nt 14.4 2.4 NA
## 4 Grea~ Blar~ omni Sori~ lc 14.9 2.3 0.133
## 5 Cow Bos herbi Arti~ domesticated 4 0.7 0.667
## 6 Thre~ Brad~ herbi Pilo~ <NA> 14.4 2.2 0.767
## # ... with 3 more variables: awake <dbl>, brainwt <dbl>, bodywt <dbl>
## Observations: 83
## Variables: 11
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Grea...
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bo...
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi...
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorph...
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", N...
## $ sleep_total <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1...
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0....
## $ sleep_cycle <dbl> NA, NA, NA, 0.13, 0.67, 0.77, 0.38, NA, 0.33, NA,...
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9,...
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0....
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.4...
## Observations: 71
## Variables: 11
## $ name <chr> "Owl monkey", "Mountain beaver", "Greater short-t...
## $ genus <chr> "Aotus", "Aplodontia", "Blarina", "Bos", "Bradypu...
## $ vore <chr> "omni", "herbi", "omni", "herbi", "herbi", NA, "h...
## $ order <chr> "Primates", "Rodentia", "Soricomorpha", "Artiodac...
## $ conservation <chr> NA, "nt", "lc", "domesticated", NA, NA, "lc", "lc...
## $ sleep_total <dbl> 17.0, 14.4, 14.9, 4.0, 14.4, 7.0, 3.0, 5.3, 9.4, ...
## $ sleep_rem <dbl> 1.8, 2.4, 2.3, 0.7, 2.2, NA, NA, 0.6, 0.8, 0.7, 1...
## $ sleep_cycle <dbl> NA, NA, 0.13, 0.67, 0.77, NA, NA, NA, 0.22, NA, 0...
## $ awake <dbl> 7.0, 9.6, 9.1, 20.0, 9.6, 17.0, 21.0, 18.7, 14.6,...
## $ brainwt <dbl> 0.01550, NA, 0.00029, 0.42300, NA, NA, 0.09820, 0...
## $ bodywt <dbl> 0.480, 1.350, 0.019, 600.000, 3.850, 0.045, 14.80...
## Observations: 31
## Variables: 11
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Grea...
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Br...
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi...
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorph...
## $ conservation <chr> "lc", NA, "nt", "lc", NA, "domesticated", "lc", "...
## $ sleep_total <dbl> 12, 17, 14, 15, 14, 12, 17, 18, 20, 15, 12, 19, 1...
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 2.2, 1.5, 3.1, 4.9, 3.9, NA, 3...
## $ sleep_cycle <dbl> NA, NA, NA, 0.13, 0.77, 0.12, 0.38, 0.33, 0.12, N...
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 9.6, 11.5, 6.6, 6.0, 4.3, 9....
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, NA, 0.00640, 0.01080, 0...
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 3.850, 0.420, 3.500,...
## # A tibble: 11 x 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Nort~ Call~ carni Carn~ vu 8.7 1.4 0.383
## 2 Asia~ Elep~ herbi Prob~ en 3.9 NA NA
## 3 Mong~ Lemur herbi Prim~ vu 9.5 0.9 NA
## 4 Afri~ Loxo~ herbi Prob~ vu 3.3 NA NA
## 5 Gold~ Meso~ herbi Rode~ en 14.3 3.1 0.2
## 6 Tiger Pant~ carni Carn~ en 15.8 NA NA
## 7 Lion Pant~ carni Carn~ vu 13.5 NA NA
## 8 Casp~ Phoca carni Carn~ vu 3.5 0.4 NA
## 9 Comm~ Phoc~ carni Ceta~ vu 5.6 NA NA
## 10 Gian~ Prio~ inse~ Cing~ en 18.1 6.1 NA
## 11 Braz~ Tapi~ herbi Peri~ vu 4.4 1 0.9
## # ... with 3 more variables: awake <dbl>, brainwt <dbl>, bodywt <dbl>
## Observations: 32
## Variables: 1
## $ vore <chr> "herbi", "herbi", "herbi", "herbi", "herbi", "herbi", "he...
Create new column using mutate
## # A tibble: 6 x 11
## name genus vore order conservation sleep_total sleep_rem sleep_cycle
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Chee~ Acin~ carni Carn~ lc 12.1 NA NA
## 2 Owl ~ Aotus omni Prim~ <NA> 17 1.8 NA
## 3 Moun~ Aplo~ herbi Rode~ nt 14.4 2.4 NA
## 4 Grea~ Blar~ omni Sori~ lc 14.9 2.3 0.133
## 5 Cow Bos herbi Arti~ domesticated 4 0.7 0.667
## 6 Thre~ Brad~ herbi Pilo~ <NA> 14.4 2.2 0.767
## # ... with 3 more variables: awake <dbl>, brainwt <dbl>, bodywt <dbl>
## Observations: 83
## Variables: 11
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Grea...
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bo...
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi...
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorph...
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", N...
## $ sleep_total <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1...
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0....
## $ sleep_cycle <dbl> NA, NA, NA, 0.13, 0.67, 0.77, 0.38, NA, 0.33, NA,...
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9,...
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0....
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.4...
### create a new column/var with the name S
## adds the new variable
m %>% mutate(S=sleep_total+sleep_rem+sleep_cycle) %>% glimpse()## Observations: 83
## Variables: 12
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Grea...
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bo...
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi...
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorph...
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", N...
## $ sleep_total <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1...
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0....
## $ sleep_cycle <dbl> NA, NA, NA, 0.13, 0.67, 0.77, 0.38, NA, 0.33, NA,...
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9,...
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0....
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.4...
## $ S <dbl> NA, NA, NA, 17.3, 5.4, 17.4, 10.5, NA, 13.3, NA, ...
## # A tibble: 61 x 12
## name genus vore order conservation sleep_total sleep_rem sleep_cycle
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Owl ~ Aotus omni Prim~ <NA> 17 1.8 NA
## 2 Moun~ Aplo~ herbi Rode~ nt 14.4 2.4 NA
## 3 Grea~ Blar~ omni Sori~ lc 14.9 2.3 0.133
## 4 Cow Bos herbi Arti~ domesticated 4 0.7 0.667
## 5 Thre~ Brad~ herbi Pilo~ <NA> 14.4 2.2 0.767
## 6 Nort~ Call~ carni Carn~ vu 8.7 1.4 0.383
## 7 Dog Canis carni Carn~ domesticated 10.1 2.9 0.333
## 8 Goat Capri herbi Arti~ lc 5.3 0.6 NA
## 9 Guin~ Cavis herbi Rode~ domesticated 9.4 0.8 0.217
## 10 Griv~ Cerc~ omni Prim~ lc 10 0.7 NA
## # ... with 51 more rows, and 4 more variables: awake <dbl>, brainwt <dbl>,
## # bodywt <dbl>, S <dbl>
## Observations: 32
## Variables: 12
## $ name <chr> "Greater short-tailed shrew", "Cow", "Three-toed ...
## $ genus <chr> "Blarina", "Bos", "Bradypus", "Callorhinus", "Can...
## $ vore <chr> "omni", "herbi", "herbi", "carni", "carni", "herb...
## $ order <chr> "Soricomorpha", "Artiodactyla", "Pilosa", "Carniv...
## $ conservation <chr> "lc", "domesticated", NA, "vu", "domesticated", "...
## $ sleep_total <dbl> 14.9, 4.0, 14.4, 8.7, 10.1, 9.4, 12.5, 9.1, 17.4,...
## $ sleep_rem <dbl> 2.3, 0.7, 2.2, 1.4, 2.9, 0.8, 1.5, 1.4, 3.1, 4.9,...
## $ sleep_cycle <dbl> 0.13, 0.67, 0.77, 0.38, 0.33, 0.22, 0.12, 0.15, 0...
## $ awake <dbl> 9.1, 20.0, 9.6, 15.3, 13.9, 14.6, 11.5, 14.9, 6.6...
## $ brainwt <dbl> 0.00029, 0.42300, NA, NA, 0.07000, 0.00550, 0.006...
## $ bodywt <dbl> 0.019, 600.000, 3.850, 20.490, 14.000, 0.728, 0.4...
## $ S <dbl> 17.3, 5.4, 17.4, 10.5, 13.3, 10.4, 14.1, 10.6, 20...
### create a var and remove the older one
m %>% transmute(S=sleep_total+sleep_rem+sleep_cycle) %>% glimpse()## Observations: 83
## Variables: 1
## $ S <dbl> NA, NA, NA, 17.3, 5.4, 17.4, 10.5, NA, 13.3, NA, NA, 10.4, N...
Dplyr group_by
## Observations: 83
## Variables: 11
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Grea...
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bo...
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi...
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorph...
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", N...
## $ sleep_total <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1...
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0....
## $ sleep_cycle <dbl> NA, NA, NA, 0.13, 0.67, 0.77, 0.38, NA, 0.33, NA,...
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9,...
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0....
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.4...
## # A tibble: 7 x 2
## conservation `n()`
## <chr> <int>
## 1 cd 2
## 2 domesticated 10
## 3 en 4
## 4 lc 27
## 5 nt 4
## 6 vu 7
## 7 <NA> 29
## # A tibble: 7 x 2
## conservation mean_a
## <chr> <dbl>
## 1 cd 21.7
## 2 domesticated 16.4
## 3 en 11.0
## 4 lc 12.6
## 5 nt 11.0
## 6 vu 17.1
## 7 <NA> 12.8
## # A tibble: 22 x 3
## # Groups: conservation [7]
## conservation vore mean_a
## <chr> <chr> <dbl>
## 1 cd carni 21.4
## 2 cd herbi 22.1
## 3 domesticated carni 12.7
## 4 domesticated herbi 17.7
## 5 domesticated omni 14.9
## 6 en carni 8.2
## 7 en herbi 14.9
## 8 en insecti 5.9
## 9 lc carni 10.1
## 10 lc herbi 13.9
## # ... with 12 more rows
## ID Name Sex Age Height Weight Team NOC
## 1 1 A Dijiang M 24 180 80 China CHN
## 2 2 A Lamusi M 23 170 60 China CHN
## 3 3 Gunnar Nielsen Aaby M 24 NA NA Denmark DEN
## 4 4 Edgar Lindenau Aabye M 34 NA NA Denmark/Sweden DEN
## 5 5 Christine Jacoba Aaftink F 21 185 82 Netherlands NED
## 6 5 Christine Jacoba Aaftink F 21 185 82 Netherlands NED
## Games Year Season City Sport
## 1 1992 Summer 1992 Summer Barcelona Basketball
## 2 2012 Summer 2012 Summer London Judo
## 3 1920 Summer 1920 Summer Antwerpen Football
## 4 1900 Summer 1900 Summer Paris Tug-Of-War
## 5 1988 Winter 1988 Winter Calgary Speed Skating
## 6 1988 Winter 1988 Winter Calgary Speed Skating
## Event Medal
## 1 Basketball Men's Basketball <NA>
## 2 Judo Men's Extra-Lightweight <NA>
## 3 Football Men's Football <NA>
## 4 Tug-Of-War Men's Tug-Of-War Gold
## 5 Speed Skating Women's 500 metres <NA>
## 6 Speed Skating Women's 1,000 metres <NA>
## Observations: 271,116
## Variables: 15
## $ ID <int> 1, 2, 3, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7...
## $ Name <fct> A Dijiang, A Lamusi, Gunnar Nielsen Aaby, Edgar Lindena...
## $ Sex <fct> M, M, M, M, F, F, F, F, F, F, M, M, M, M, M, M, M, M, M...
## $ Age <int> 24, 23, 24, 34, 21, 21, 25, 25, 27, 27, 31, 31, 31, 31,...
## $ Height <int> 180, 170, NA, NA, 185, 185, 185, 185, 185, 185, 188, 18...
## $ Weight <dbl> 80, 60, NA, NA, 82, 82, 82, 82, 82, 82, 75, 75, 75, 75,...
## $ Team <fct> China, China, Denmark, Denmark/Sweden, Netherlands, Net...
## $ NOC <fct> CHN, CHN, DEN, DEN, NED, NED, NED, NED, NED, NED, USA, ...
## $ Games <fct> 1992 Summer, 2012 Summer, 1920 Summer, 1900 Summer, 198...
## $ Year <int> 1992, 2012, 1920, 1900, 1988, 1988, 1992, 1992, 1994, 1...
## $ Season <fct> Summer, Summer, Summer, Summer, Winter, Winter, Winter,...
## $ City <fct> Barcelona, London, Antwerpen, Paris, Calgary, Calgary, ...
## $ Sport <fct> Basketball, Judo, Football, Tug-Of-War, Speed Skating, ...
## $ Event <fct> "Basketball Men's Basketball", "Judo Men's Extra-Lightw...
## $ Medal <fct> NA, NA, NA, Gold, NA, NA, NA, NA, NA, NA, NA, NA, NA, N...
## Observations: 13,688
## Variables: 15
## $ ID <int> 22, 51, 51, 51, 51, 51, 51, 55, 62, 65, 73, 105, 110, 1...
## $ Name <fct> Andreea Aanei, Nstor Abad Sanjun, Nstor Abad Sanjun, Ns...
## $ Sex <fct> F, M, M, M, M, M, M, M, M, F, M, M, M, F, M, F, F, F, M...
## $ Age <int> 22, 23, 23, 23, 23, 23, 23, 26, 21, 21, 31, 23, 20, 23,...
## $ Height <int> 170, 167, 167, 167, 167, 167, 167, 170, 198, 165, 182, ...
## $ Weight <dbl> 125, 64, 64, 64, 64, 64, 64, 65, 90, 49, 86, 75, 66, 72...
## $ Team <fct> Romania, Spain, Spain, Spain, Spain, Spain, Spain, Spai...
## $ NOC <fct> ROU, ESP, ESP, ESP, ESP, ESP, ESP, ESP, ITA, AZE, FRA, ...
## $ Games <fct> 2016 Summer, 2016 Summer, 2016 Summer, 2016 Summer, 201...
## $ Year <int> 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2016, 2...
## $ Season <fct> Summer, Summer, Summer, Summer, Summer, Summer, Summer,...
## $ City <fct> Rio de Janeiro, Rio de Janeiro, Rio de Janeiro, Rio de ...
## $ Sport <fct> Weightlifting, Gymnastics, Gymnastics, Gymnastics, Gymn...
## $ Event <fct> "Weightlifting Women's Super-Heavyweight", "Gymnastics ...
## $ Medal <fct> NA, NA, NA, NA, NA, NA, NA, NA, Bronze, Bronze, Silver,...
## # A tibble: 6 x 2
## Sport mean_age
## <fct> <dbl>
## 1 Archery 25.2
## 2 Athletics 26.4
## 3 Badminton 26.8
## 4 Basketball 27.7
## 5 Beach Volleyball 28.7
## 6 Boxing 25.1
## # A tibble: 6 x 3
## Sport mean_age mean_ht
## <fct> <dbl> <dbl>
## 1 Trampolining 25.1 168.
## 2 Triathlon 27.7 173.
## 3 Volleyball 27.1 190.
## 4 Water Polo 26.6 185.
## 5 Weightlifting 24.8 167.
## 6 Wrestling 26.6 173.
###### correlation between quantitative variables
s2016 %>% group_by(Sport) %>%
summarise(corrl=cor(Height,Weight)) %>% tail()## # A tibble: 6 x 2
## Sport corrl
## <fct> <dbl>
## 1 Trampolining 0.819
## 2 Triathlon NA
## 3 Volleyball 0.810
## 4 Water Polo 0.865
## 5 Weightlifting 0.855
## 6 Wrestling 0.855
## # A tibble: 6 x 15
## # Groups: Sport, Sex [2]
## ID Name Sex Age Height Weight Team NOC Games Year Season
## <int> <fct> <fct> <int> <int> <dbl> <fct> <fct> <fct> <int> <fct>
## 1 15436 "Jul~ F 62 157 48 New ~ NZL 2016~ 2016 Summer
## 2 45610 "Mar~ F 61 173 63 Aust~ AUS 2016~ 2016 Summer
## 3 45610 "Mar~ F 61 173 63 Aust~ AUS 2016~ 2016 Summer
## 4 46990 "Suz~ F 60 165 68 Aust~ AUS 2016~ 2016 Summer
## 5 46990 "Suz~ F 60 165 68 Aust~ AUS 2016~ 2016 Summer
## 6 120661 "Mar~ M 60 190 78 New ~ NZL 2016~ 2016 Summer
## # ... with 4 more variables: City <fct>, Sport <fct>, Event <fct>,
## # Medal <fct>
#compute within groups. If the thing you compute is an
#unnamed data.frame, they get row-bound together,
#with the grouping variables retained
s2016 %>% group_by(Sport) %>%
do(head(., 2))%>% head()## # A tibble: 6 x 15
## # Groups: Sport [3]
## ID Name Sex Age Height Weight Team NOC Games Year Season
## <int> <fct> <fct> <int> <int> <dbl> <fct> <fct> <fct> <int> <fct>
## 1 1277 "Car~ F 20 160 59 Colo~ COL 2016~ 2016 Summer
## 2 1277 "Car~ F 20 160 59 Colo~ COL 2016~ 2016 Summer
## 3 55 "Ant~ M 26 170 65 Spain ESP 2016~ 2016 Summer
## 4 110 "Abu~ M 20 175 66 Bahr~ BRN 2016~ 2016 Summer
## 5 433 "Pab~ M 31 177 68 Spain ESP 2016~ 2016 Summer
## 6 867 "Chr~ M 27 183 80 Grea~ GBR 2016~ 2016 Summer
## # ... with 4 more variables: City <fct>, Sport <fct>, Event <fct>,
## # Medal <fct>
Summarizing the Dplyr verbs
##################################################################
####### Intro to dplyr: package for data summarizing
head(airquality)## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
library(datasets)
library(dplyr)
############# Data selection
########## select a few columns by name
selecn=select(airquality,Ozone,Month)
head(selecn)## Ozone Month
## 1 41 5
## 2 36 5
## 3 12 5
## 4 18 5
## 5 NA 5
## 6 28 5
## Ozone Solar.R Wind Temp Month
## 1 41 190 7.4 67 5
## 2 36 118 8.0 72 5
## 3 12 149 12.6 74 5
## 4 18 313 11.5 62 5
## 5 NA NA 14.3 56 5
## 6 28 NA 14.9 66 5
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 28 NA 14.9 66 5 6
## 4 34 307 12.0 66 5 17
## 5 30 322 11.5 68 5 19
## 6 32 92 12.0 61 5 24
## 7 45 252 14.9 81 5 29
## 8 115 223 5.7 79 5 30
## 9 37 279 7.4 76 5 31
## 10 29 127 9.7 82 6 7
## 11 71 291 13.8 90 6 9
## 12 39 323 11.5 87 6 10
## 13 37 284 20.7 72 6 17
## 14 135 269 4.1 84 7 1
## 15 49 248 9.2 85 7 2
## 16 32 236 9.2 81 7 3
## 17 64 175 4.6 83 7 5
## 18 40 314 10.9 83 7 6
## 19 77 276 5.1 88 7 7
## 20 97 267 6.3 92 7 8
## 21 97 272 5.7 92 7 9
## 22 85 175 7.4 89 7 10
## 23 27 175 14.9 81 7 13
## 24 48 260 6.9 81 7 16
## 25 35 274 10.3 82 7 17
## 26 61 285 6.3 84 7 18
## 27 79 187 5.1 87 7 19
## 28 63 220 11.5 85 7 20
## 29 80 294 8.6 86 7 24
## 30 108 223 8.0 85 7 25
## 31 52 82 12.0 86 7 27
## 32 82 213 7.4 88 7 28
## 33 50 275 7.4 86 7 29
## 34 64 253 7.4 83 7 30
## 35 59 254 9.2 81 7 31
## 36 39 83 6.9 81 8 1
## 37 78 NA 6.9 86 8 4
## 38 35 NA 7.4 85 8 5
## 39 66 NA 4.6 87 8 6
## 40 122 255 4.0 89 8 7
## 41 89 229 10.3 90 8 8
## 42 110 207 8.0 90 8 9
## 43 44 192 11.5 86 8 12
## 44 28 273 11.5 82 8 13
## 45 65 157 9.7 80 8 14
## 46 59 51 6.3 79 8 17
## 47 31 244 10.9 78 8 19
## 48 44 190 10.3 78 8 20
## 49 45 212 9.7 79 8 24
## 50 168 238 3.4 81 8 25
## 51 73 215 8.0 86 8 26
## 52 76 203 9.7 97 8 28
## 53 118 225 2.3 94 8 29
## 54 84 237 6.3 96 8 30
## 55 85 188 6.3 94 8 31
## 56 96 167 6.9 91 9 1
## 57 78 197 5.1 92 9 2
## 58 73 183 2.8 93 9 3
## 59 91 189 4.6 93 9 4
## 60 47 95 7.4 87 9 5
## 61 32 92 15.5 84 9 6
## 62 44 236 14.9 81 9 11
## 63 28 238 6.3 77 9 13
## 64 46 237 6.9 78 9 16
## 65 36 139 10.3 81 9 23
## 66 30 193 6.9 70 9 26
## Ozone Solar.R Wind Temp Month Day
## 1 45 252 14.9 81 5 29
## 2 115 223 5.7 79 5 30
## 3 37 279 7.4 76 5 31
## 4 29 127 9.7 82 6 7
## 5 71 291 13.8 90 6 9
## 6 39 323 11.5 87 6 10
## Ozone Solar.R Wind Temp Month Day TempInC
## 1 41 190 7.4 67 5 1 19
## 2 36 118 8.0 72 5 2 22
## 3 12 149 12.6 74 5 3 23
## 4 18 313 11.5 62 5 4 17
## 5 NA NA 14.3 56 5 5 13
## 6 28 NA 14.9 66 5 6 19
####### summarize and group by data
summarise(airquality, min(Ozone, na.rm = TRUE)) #mean ozone value## min(Ozone, na.rm = TRUE)
## 1 1
## group: average wind value per month
## Month is the basis of grouping
summarise(group_by(airquality, Month), mean(Wind, na.rm = TRUE))## # A tibble: 5 x 2
## Month `mean(Wind, na.rm = TRUE)`
## <int> <dbl>
## 1 5 11.6
## 2 6 10.3
## 3 7 8.94
## 4 8 8.79
## 5 9 10.2
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## Ozone Month
## 1 41 5
## 2 36 5
## 3 12 5
## 4 18 5
## 5 NA 5
## 6 28 5
## Ozone Temp
## 1 41 67
## 2 36 72
## 3 12 74
## 4 18 62
## 5 NA 56
## 6 28 66
## data summary
airquality %>%
summarise(avg = mean(Ozone,na.rm=TRUE),
min = min(Ozone,na.rm=TRUE),
max = max(Ozone,na.rm=TRUE),
total = n())## avg min max total
## 1 42 1 168 153
## # A tibble: 5 x 2
## Month `mean(Temp, na.rm = TRUE)`
## <int> <dbl>
## 1 5 65.5
## 2 6 79.1
## 3 7 83.9
## 4 8 84.0
## 5 9 76.9
##REMOVE the 5th monthand group
airquality %>%
filter(Month != 5) %>%
group_by(Month) %>%
summarise(mean(Temp, na.rm = TRUE))## # A tibble: 4 x 2
## Month `mean(Temp, na.rm = TRUE)`
## <int> <dbl>
## 1 6 79.1
## 2 7 83.9
## 3 8 84.0
## 4 9 76.9
## Month Ozone Wind
## 1 5 12 13
## 2 5 NA 14
## 3 5 28 15
## 4 5 19 14
## 5 5 8 20
## 6 5 18 13
Data Processing the Tidy Way: The “tidyr” Package
Importing data as tibble
library(tidyverse)
##tidyverse is a coherent system of packages for
##data manipulation, exploration and visualization that share a common design philosophy.
### underpinned by tibbles
#vignette("tibble")
class(iris)## [1] "data.frame"
## # A tibble: 6 x 5
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## <dbl> <dbl> <dbl> <dbl> <fct>
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## # A tibble: 6 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
rename columns in data frame
## Sepal.Length Sepal.Width Petal.Length petal_w Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
### filter and rename columns
sep=iris %>%
select(sepal_l=Sepal.Length,sepal_w=Sepal.Width,iris_s=Species)
head(sep)## sepal_l sepal_w iris_s
## 1 5.1 3.5 setosa
## 2 4.9 3.0 setosa
## 3 4.7 3.2 setosa
## 4 4.6 3.1 setosa
## 5 5.0 3.6 setosa
## 6 5.4 3.9 setosa
## sepal.length sepal.width petal.length petal.width species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## SEPAL.LENGTH SEPAL.WIDTH PETAL.LENGTH PETAL.WIDTH Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## Sepal.Length Sepal.Width petal.length petal.width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21 6 160 110 3.9 2.6 16 0 1 4 4
## Mazda RX4 Wag 21 6 160 110 3.9 2.9 17 0 1 4 4
## Datsun 710 23 4 108 93 3.8 2.3 19 1 1 4 1
## Hornet 4 Drive 21 6 258 110 3.1 3.2 19 1 0 3 1
## Hornet Sportabout 19 8 360 175 3.1 3.4 17 0 0 3 2
## Valiant 18 6 225 105 2.8 3.5 20 1 0 3 1
## model mpg cyl disp hp drat wt qsec vs am gear carb
## 1 Mazda RX4 21 6 160 110 3.9 2.6 16 0 1 4 4
## 2 Mazda RX4 Wag 21 6 160 110 3.9 2.9 17 0 1 4 4
## 3 Datsun 710 23 4 108 93 3.8 2.3 19 1 1 4 1
## 4 Hornet 4 Drive 21 6 258 110 3.1 3.2 19 1 0 3 1
## 5 Hornet Sportabout 19 8 360 175 3.1 3.4 17 0 0 3 2
## 6 Valiant 18 6 225 105 2.8 3.5 20 1 0 3 1
Summarize columns in data frame using gather and summarize
Reshape data frame from wide to long and long to wide
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
### wide to long
names(airquality) <- tolower(names(airquality))
## w__>l : gather
qty = airquality %>% gather(attribute, value)
head(qty)## attribute value
## 1 ozone 41
## 2 ozone 36
## 3 ozone 12
## 4 ozone 18
## 5 ozone NA
## 6 ozone 28
## attribute value
## 913 day 25
## 914 day 26
## 915 day 27
## 916 day 28
## 917 day 29
## 918 day 30
## temp month day attribute value
## 1 67 5 1 ozone 41
## 2 72 5 2 ozone 36
## 3 74 5 3 ozone 12
## 4 62 5 4 ozone 18
## 5 56 5 5 ozone NA
## 6 66 5 6 ozone 28
## temp month day attribute value
## 454 63 9 25 wind 16.6
## 455 70 9 26 wind 6.9
## 456 77 9 27 wind 13.2
## 457 75 9 28 wind 14.3
## 458 76 9 29 wind 8.0
## 459 68 9 30 wind 11.5
## month day variable value
## 1 5 1 ozone 41
## 2 5 2 ozone 36
## 3 5 3 ozone 12
## 4 5 4 ozone 18
## 6 5 6 ozone 28
## 7 5 7 ozone 23
## summarize
s=aqg %>%
group_by(month, variable) %>%
summarise(value = mean(value)) %>%
spread(variable, value)
head(s)## # A tibble: 5 x 5
## # Groups: month [5]
## month ozone solar.r temp wind
## <int> <dbl> <dbl> <dbl> <dbl>
## 1 5 23.6 181. 65.5 11.6
## 2 6 29.4 190. 79.1 10.3
## 3 7 59.1 216. 83.9 8.94
## 4 8 60.0 172. 84.0 8.79
## 5 9 31.4 167. 76.9 10.2
Combining dataframes based on column matches
## # A tibble: 6 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## # ... with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
## # A tibble: 6 x 15
## origin year month day hour temp dewp humid wind_dir wind_speed
## <chr> <dbl> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 EWR 2013 1 1 1 39.0 26.1 59.4 270 10.4
## 2 EWR 2013 1 1 2 39.0 27.0 61.6 250 8.06
## 3 EWR 2013 1 1 3 39.0 28.0 64.4 240 11.5
## 4 EWR 2013 1 1 4 39.9 28.0 62.2 250 12.7
## 5 EWR 2013 1 1 5 39.0 28.0 64.4 260 12.7
## 6 EWR 2013 1 1 6 37.9 28.0 67.2 240 11.5
## # ... with 5 more variables: wind_gust <dbl>, precip <dbl>,
## # pressure <dbl>, visib <dbl>, time_hour <dttm>
## # A tibble: 6 x 9
## tailnum year type manufacturer model engines seats speed engine
## <chr> <int> <chr> <chr> <chr> <int> <int> <int> <chr>
## 1 N10156 2004 Fixed win~ EMBRAER EMB-1~ 2 55 NA Turbo~
## 2 N102UW 1998 Fixed win~ AIRBUS INDUST~ A320-~ 2 182 NA Turbo~
## 3 N103US 1999 Fixed win~ AIRBUS INDUST~ A320-~ 2 182 NA Turbo~
## 4 N104UW 1999 Fixed win~ AIRBUS INDUST~ A320-~ 2 182 NA Turbo~
## 5 N10575 2002 Fixed win~ EMBRAER EMB-1~ 2 55 NA Turbo~
## 6 N105UW 1999 Fixed win~ AIRBUS INDUST~ A320-~ 2 182 NA Turbo~
## # A tibble: 6 x 8
## year month day hour origin dest tailnum carrier
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr>
## 1 2013 1 1 5 EWR IAH N14228 UA
## 2 2013 1 1 5 LGA IAH N24211 UA
## 3 2013 1 1 5 JFK MIA N619AA AA
## 4 2013 1 1 5 JFK BQN N804JB B6
## 5 2013 1 1 6 LGA ATL N668DN DL
## 6 2013 1 1 5 EWR ORD N39463 UA
## # A tibble: 6 x 2
## carrier name
## <chr> <chr>
## 1 9E Endeavor Air Inc.
## 2 AA American Airlines Inc.
## 3 AS Alaska Airlines Inc.
## 4 B6 JetBlue Airways
## 5 DL Delta Air Lines Inc.
## 6 EV ExpressJet Airlines Inc.
## # A tibble: 6 x 9
## year month day hour origin dest tailnum carrier name
## <int> <int> <int> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 2013 1 1 5 EWR IAH N14228 UA United Air Lines In~
## 2 2013 1 1 5 LGA IAH N24211 UA United Air Lines In~
## 3 2013 1 1 5 JFK MIA N619AA AA American Airlines I~
## 4 2013 1 1 5 JFK BQN N804JB B6 JetBlue Airways
## 5 2013 1 1 6 LGA ATL N668DN DL Delta Air Lines Inc.
## 6 2013 1 1 5 EWR ORD N39463 UA United Air Lines In~
## # A tibble: 6 x 15
## origin year month day hour temp dewp humid wind_dir wind_speed
## <chr> <dbl> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 EWR 2013 1 1 1 39.0 26.1 59.4 270 10.4
## 2 EWR 2013 1 1 2 39.0 27.0 61.6 250 8.06
## 3 EWR 2013 1 1 3 39.0 28.0 64.4 240 11.5
## 4 EWR 2013 1 1 4 39.9 28.0 62.2 250 12.7
## 5 EWR 2013 1 1 5 39.0 28.0 64.4 260 12.7
## 6 EWR 2013 1 1 6 37.9 28.0 67.2 240 11.5
## # ... with 5 more variables: wind_gust <dbl>, precip <dbl>,
## # pressure <dbl>, visib <dbl>, time_hour <dttm>
## # A tibble: 6 x 18
## year month day hour origin dest tailnum carrier temp dewp humid
## <dbl> <dbl> <int> <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 2013 1 1 5 EWR IAH N14228 UA 39.0 28.0 64.4
## 2 2013 1 1 5 LGA IAH N24211 UA 39.9 25.0 54.8
## 3 2013 1 1 5 JFK MIA N619AA AA 39.0 27.0 61.6
## 4 2013 1 1 5 JFK BQN N804JB B6 39.0 27.0 61.6
## 5 2013 1 1 6 LGA ATL N668DN DL 39.9 25.0 54.8
## 6 2013 1 1 5 EWR ORD N39463 UA 39.0 28.0 64.4
## # ... with 7 more variables: wind_dir <dbl>, wind_speed <dbl>,
## # wind_gust <dbl>, precip <dbl>, pressure <dbl>, visib <dbl>,
## # time_hour <dttm>
##semi-join: keep all observations in x that match y
topd = flights %>% count(dest, sort=TRUE) %>% head(10)
head(topd)## # A tibble: 6 x 2
## dest n
## <chr> <int>
## 1 ORD 17283
## 2 ATL 17215
## 3 LAX 16174
## 4 BOS 15508
## 5 MCO 14082
## 6 CLT 14064
## # A tibble: 141,145 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 554 600 -6 812
## 3 2013 1 1 554 558 -4 740
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 838
## 6 2013 1 1 558 600 -2 753
## 7 2013 1 1 558 600 -2 924
## 8 2013 1 1 558 600 -2 923
## 9 2013 1 1 559 559 0 702
## 10 2013 1 1 600 600 0 851
## # ... with 141,135 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
##anti-join: drop all observations in x that match y
## good for discovering mismatch
## identify flights that don't have a match in planes
flights %>% anti_join(planes, by="tailnum") %>%
count(tailnum, sort = TRUE)## # A tibble: 722 x 2
## tailnum n
## <chr> <int>
## 1 <NA> 2512
## 2 N725MQ 575
## 3 N722MQ 513
## 4 N723MQ 507
## 5 N713MQ 483
## 6 N735MQ 396
## 7 N0EGMQ 371
## 8 N534MQ 364
## 9 N542MQ 363
## 10 N531MQ 349
## # ... with 712 more rows
Nesting
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,694 more rows
## # A tibble: 142 x 2
## country data
## <fct> <list>
## 1 Afghanistan <tibble [12 x 5]>
## 2 Albania <tibble [12 x 5]>
## 3 Algeria <tibble [12 x 5]>
## 4 Angola <tibble [12 x 5]>
## 5 Argentina <tibble [12 x 5]>
## 6 Australia <tibble [12 x 5]>
## 7 Austria <tibble [12 x 5]>
## 8 Bahrain <tibble [12 x 5]>
## 9 Bangladesh <tibble [12 x 5]>
## 10 Belgium <tibble [12 x 5]>
## # ... with 132 more rows
## [[1]]
## # A tibble: 12 x 5
## continent year lifeExp pop gdpPercap
## <fct> <int> <dbl> <int> <dbl>
## 1 Europe 1952 68 8730405 8343.
## 2 Europe 1957 69.2 8989111 9715.
## 3 Europe 1962 70.2 9218400 10991.
## 4 Europe 1967 70.9 9556500 13149.
## 5 Europe 1972 71.4 9709100 16672.
## 6 Europe 1977 72.8 9821800 19118.
## 7 Europe 1982 73.9 9856303 20980.
## 8 Europe 1987 75.4 9870200 22526.
## 9 Europe 1992 76.5 10045622 25576.
## 10 Europe 1997 77.5 10199787 27561.
## 11 Europe 2002 78.3 10311970 30486.
## 12 Europe 2007 79.4 10392226 33693.
d2=gapminder %>% nest(-country, -continent)
###########
###### t-test
#library(tidyverse)
library(broom)
head(mtcars)## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21 6 160 110 3.9 2.6 16 0 1 4 4
## Mazda RX4 Wag 21 6 160 110 3.9 2.9 17 0 1 4 4
## Datsun 710 23 4 108 93 3.8 2.3 19 1 1 4 1
## Hornet 4 Drive 21 6 258 110 3.1 3.2 19 1 0 3 1
## Hornet Sportabout 19 8 360 175 3.1 3.4 17 0 0 3 2
## Valiant 18 6 225 105 2.8 3.5 20 1 0 3 1
## [[1]]
##
## One Sample t-test
##
## data: .$mpg
## t = 20, df = 10, p-value = 0.000000003
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 24 30
## sample estimates:
## mean of x
## 27
## # A tibble: 3 x 4
## cyl data fit p
## <dbl> <list> <list> <dbl>
## 1 6 <tibble [7 x 10]> <htest> 3.10e- 8
## 2 4 <tibble [11 x 10]> <htest> 2.60e- 9
## 3 8 <tibble [14 x 10]> <htest> 1.09e-11
## data formatting-- unnest
mtcars %>%
nest(-cyl) %>%
mutate(fit = map(data, ~ t.test(.$mpg)),
results = map(fit, glance)) %>%
unnest(results)## # A tibble: 3 x 11
## cyl data fit estimate statistic p.value parameter conf.low
## <dbl> <lis> <lis> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 6 <tib~ <hte~ 19.7 35.9 3.10e- 8 6 18.4
## 2 4 <tib~ <hte~ 26.7 19.6 2.60e- 9 10 23.6
## 3 8 <tib~ <hte~ 15.1 22.1 1.09e-11 13 13.6
## # ... with 3 more variables: conf.high <dbl>, method <chr>,
## # alternative <chr>
Dealing With Missing Values
Removing NAs- the ordinary way
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
## Ozone Solar.R Wind Temp Month
## Min. : 1 Min. : 7 Min. : 1.7 Min. :56 Min. :5
## 1st Qu.: 18 1st Qu.:116 1st Qu.: 7.4 1st Qu.:72 1st Qu.:6
## Median : 32 Median :205 Median : 9.7 Median :79 Median :7
## Mean : 42 Mean :186 Mean :10.0 Mean :78 Mean :7
## 3rd Qu.: 63 3rd Qu.:259 3rd Qu.:11.5 3rd Qu.:85 3rd Qu.:8
## Max. :168 Max. :334 Max. :20.7 Max. :97 Max. :9
## NA's :37 NA's :7
## Day
## Min. : 1.0
## 1st Qu.: 8.0
## Median :16.0
## Mean :15.8
## 3rd Qu.:23.0
## Max. :31.0
##
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## Ozone Solar.R Wind Temp Month
## Min. : 1 Min. : 7 Min. : 2.3 Min. :57 Min. :5.0
## 1st Qu.: 18 1st Qu.:114 1st Qu.: 7.4 1st Qu.:71 1st Qu.:6.0
## Median : 31 Median :207 Median : 9.7 Median :79 Median :7.0
## Mean : 42 Mean :185 Mean : 9.9 Mean :78 Mean :7.2
## 3rd Qu.: 62 3rd Qu.:256 3rd Qu.:11.5 3rd Qu.:84 3rd Qu.:9.0
## Max. :168 Max. :334 Max. :20.7 Max. :97 Max. :9.0
## Day
## Min. : 1.0
## 1st Qu.: 9.0
## Median :16.0
## Mean :15.9
## 3rd Qu.:22.5
## Max. :31.0
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 7 23 299 8.6 65 5 7
## 8 19 99 13.8 59 5 8
## Ozone Solar.R Wind Temp Month
## Min. : 1 Min. : 7 Min. : 2.3 Min. :57 Min. :5.0
## 1st Qu.: 18 1st Qu.:114 1st Qu.: 7.4 1st Qu.:71 1st Qu.:6.0
## Median : 31 Median :207 Median : 9.7 Median :79 Median :7.0
## Mean : 42 Mean :185 Mean : 9.9 Mean :78 Mean :7.2
## 3rd Qu.: 62 3rd Qu.:256 3rd Qu.:11.5 3rd Qu.:84 3rd Qu.:9.0
## Max. :168 Max. :334 Max. :20.7 Max. :97 Max. :9.0
## Day
## Min. : 1.0
## 1st Qu.: 9.0
## Median :16.0
## Mean :15.9
## 3rd Qu.:22.5
## Max. :31.0
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 0 0 14.3 56 5 5
## 6 28 0 14.9 66 5 6
## Ozone Solar.R Wind Temp Month
## Min. : 0 Min. : 0 Min. : 1.7 Min. :56 Min. :5
## 1st Qu.: 4 1st Qu.: 95 1st Qu.: 7.4 1st Qu.:72 1st Qu.:6
## Median : 21 Median :194 Median : 9.7 Median :79 Median :7
## Mean : 32 Mean :177 Mean :10.0 Mean :78 Mean :7
## 3rd Qu.: 46 3rd Qu.:256 3rd Qu.:11.5 3rd Qu.:85 3rd Qu.:8
## Max. :168 Max. :334 Max. :20.7 Max. :97 Max. :9
## Day
## Min. : 1.0
## 1st Qu.: 8.0
## Median :16.0
## Mean :15.8
## 3rd Qu.:23.0
## Max. :31.0
## replcae missing values with average values
meanOzone=mean(airquality$Ozone,na.rm=T)
# remove NAs while computing mean of Ozone
#with na mean value will be na
aqty.fix=ifelse(is.na(airquality$Ozone),meanOzone,airquality$Ozone)
summary(aqty.fix)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 21 42 42 46 168
## Wind Temp Month Day Solar.R Ozone
## 111 1 1 1 1 1 1 0
## 35 1 1 1 1 1 0 1
## 5 1 1 1 1 0 1 1
## 2 1 1 1 1 0 0 2
## 0 0 0 0 7 37 44
#111 observations with no values
library(VIM) #visualize the pattern of NAs
mp <- aggr(aqty2, col=c('navyblue','yellow'),
numbers=TRUE, sortVars=TRUE,
labels=names(aqty2), cex.axis=.7,
gap=3, ylab=c("Missing data","Pattern"))##
## Variables sorted by number of missings:
## Variable Count
## Ozone 0.242
## Solar.R 0.046
## Wind 0.000
## Temp 0.000
## Month 0.000
## Day 0.000
#72.5% observations in the entire data have no missing values
#22.9% missing values in Ozone
#impute
#500 iterataions of predictive mean mapping for imputing
#5 datasets
im_aqty<- mice(aqty2, m=5, maxit = 50, method = 'pmm', seed = 500)##
## iter imp variable
## 1 1 Ozone Solar.R
## 1 2 Ozone Solar.R
## 1 3 Ozone Solar.R
## 1 4 Ozone Solar.R
## 1 5 Ozone Solar.R
## 2 1 Ozone Solar.R
## 2 2 Ozone Solar.R
## 2 3 Ozone Solar.R
## 2 4 Ozone Solar.R
## 2 5 Ozone Solar.R
## 3 1 Ozone Solar.R
## 3 2 Ozone Solar.R
## 3 3 Ozone Solar.R
## 3 4 Ozone Solar.R
## 3 5 Ozone Solar.R
## 4 1 Ozone Solar.R
## 4 2 Ozone Solar.R
## 4 3 Ozone Solar.R
## 4 4 Ozone Solar.R
## 4 5 Ozone Solar.R
## 5 1 Ozone Solar.R
## 5 2 Ozone Solar.R
## 5 3 Ozone Solar.R
## 5 4 Ozone Solar.R
## 5 5 Ozone Solar.R
## 6 1 Ozone Solar.R
## 6 2 Ozone Solar.R
## 6 3 Ozone Solar.R
## 6 4 Ozone Solar.R
## 6 5 Ozone Solar.R
## 7 1 Ozone Solar.R
## 7 2 Ozone Solar.R
## 7 3 Ozone Solar.R
## 7 4 Ozone Solar.R
## 7 5 Ozone Solar.R
## 8 1 Ozone Solar.R
## 8 2 Ozone Solar.R
## 8 3 Ozone Solar.R
## 8 4 Ozone Solar.R
## 8 5 Ozone Solar.R
## 9 1 Ozone Solar.R
## 9 2 Ozone Solar.R
## 9 3 Ozone Solar.R
## 9 4 Ozone Solar.R
## 9 5 Ozone Solar.R
## 10 1 Ozone Solar.R
## 10 2 Ozone Solar.R
## 10 3 Ozone Solar.R
## 10 4 Ozone Solar.R
## 10 5 Ozone Solar.R
## 11 1 Ozone Solar.R
## 11 2 Ozone Solar.R
## 11 3 Ozone Solar.R
## 11 4 Ozone Solar.R
## 11 5 Ozone Solar.R
## 12 1 Ozone Solar.R
## 12 2 Ozone Solar.R
## 12 3 Ozone Solar.R
## 12 4 Ozone Solar.R
## 12 5 Ozone Solar.R
## 13 1 Ozone Solar.R
## 13 2 Ozone Solar.R
## 13 3 Ozone Solar.R
## 13 4 Ozone Solar.R
## 13 5 Ozone Solar.R
## 14 1 Ozone Solar.R
## 14 2 Ozone Solar.R
## 14 3 Ozone Solar.R
## 14 4 Ozone Solar.R
## 14 5 Ozone Solar.R
## 15 1 Ozone Solar.R
## 15 2 Ozone Solar.R
## 15 3 Ozone Solar.R
## 15 4 Ozone Solar.R
## 15 5 Ozone Solar.R
## 16 1 Ozone Solar.R
## 16 2 Ozone Solar.R
## 16 3 Ozone Solar.R
## 16 4 Ozone Solar.R
## 16 5 Ozone Solar.R
## 17 1 Ozone Solar.R
## 17 2 Ozone Solar.R
## 17 3 Ozone Solar.R
## 17 4 Ozone Solar.R
## 17 5 Ozone Solar.R
## 18 1 Ozone Solar.R
## 18 2 Ozone Solar.R
## 18 3 Ozone Solar.R
## 18 4 Ozone Solar.R
## 18 5 Ozone Solar.R
## 19 1 Ozone Solar.R
## 19 2 Ozone Solar.R
## 19 3 Ozone Solar.R
## 19 4 Ozone Solar.R
## 19 5 Ozone Solar.R
## 20 1 Ozone Solar.R
## 20 2 Ozone Solar.R
## 20 3 Ozone Solar.R
## 20 4 Ozone Solar.R
## 20 5 Ozone Solar.R
## 21 1 Ozone Solar.R
## 21 2 Ozone Solar.R
## 21 3 Ozone Solar.R
## 21 4 Ozone Solar.R
## 21 5 Ozone Solar.R
## 22 1 Ozone Solar.R
## 22 2 Ozone Solar.R
## 22 3 Ozone Solar.R
## 22 4 Ozone Solar.R
## 22 5 Ozone Solar.R
## 23 1 Ozone Solar.R
## 23 2 Ozone Solar.R
## 23 3 Ozone Solar.R
## 23 4 Ozone Solar.R
## 23 5 Ozone Solar.R
## 24 1 Ozone Solar.R
## 24 2 Ozone Solar.R
## 24 3 Ozone Solar.R
## 24 4 Ozone Solar.R
## 24 5 Ozone Solar.R
## 25 1 Ozone Solar.R
## 25 2 Ozone Solar.R
## 25 3 Ozone Solar.R
## 25 4 Ozone Solar.R
## 25 5 Ozone Solar.R
## 26 1 Ozone Solar.R
## 26 2 Ozone Solar.R
## 26 3 Ozone Solar.R
## 26 4 Ozone Solar.R
## 26 5 Ozone Solar.R
## 27 1 Ozone Solar.R
## 27 2 Ozone Solar.R
## 27 3 Ozone Solar.R
## 27 4 Ozone Solar.R
## 27 5 Ozone Solar.R
## 28 1 Ozone Solar.R
## 28 2 Ozone Solar.R
## 28 3 Ozone Solar.R
## 28 4 Ozone Solar.R
## 28 5 Ozone Solar.R
## 29 1 Ozone Solar.R
## 29 2 Ozone Solar.R
## 29 3 Ozone Solar.R
## 29 4 Ozone Solar.R
## 29 5 Ozone Solar.R
## 30 1 Ozone Solar.R
## 30 2 Ozone Solar.R
## 30 3 Ozone Solar.R
## 30 4 Ozone Solar.R
## 30 5 Ozone Solar.R
## 31 1 Ozone Solar.R
## 31 2 Ozone Solar.R
## 31 3 Ozone Solar.R
## 31 4 Ozone Solar.R
## 31 5 Ozone Solar.R
## 32 1 Ozone Solar.R
## 32 2 Ozone Solar.R
## 32 3 Ozone Solar.R
## 32 4 Ozone Solar.R
## 32 5 Ozone Solar.R
## 33 1 Ozone Solar.R
## 33 2 Ozone Solar.R
## 33 3 Ozone Solar.R
## 33 4 Ozone Solar.R
## 33 5 Ozone Solar.R
## 34 1 Ozone Solar.R
## 34 2 Ozone Solar.R
## 34 3 Ozone Solar.R
## 34 4 Ozone Solar.R
## 34 5 Ozone Solar.R
## 35 1 Ozone Solar.R
## 35 2 Ozone Solar.R
## 35 3 Ozone Solar.R
## 35 4 Ozone Solar.R
## 35 5 Ozone Solar.R
## 36 1 Ozone Solar.R
## 36 2 Ozone Solar.R
## 36 3 Ozone Solar.R
## 36 4 Ozone Solar.R
## 36 5 Ozone Solar.R
## 37 1 Ozone Solar.R
## 37 2 Ozone Solar.R
## 37 3 Ozone Solar.R
## 37 4 Ozone Solar.R
## 37 5 Ozone Solar.R
## 38 1 Ozone Solar.R
## 38 2 Ozone Solar.R
## 38 3 Ozone Solar.R
## 38 4 Ozone Solar.R
## 38 5 Ozone Solar.R
## 39 1 Ozone Solar.R
## 39 2 Ozone Solar.R
## 39 3 Ozone Solar.R
## 39 4 Ozone Solar.R
## 39 5 Ozone Solar.R
## 40 1 Ozone Solar.R
## 40 2 Ozone Solar.R
## 40 3 Ozone Solar.R
## 40 4 Ozone Solar.R
## 40 5 Ozone Solar.R
## 41 1 Ozone Solar.R
## 41 2 Ozone Solar.R
## 41 3 Ozone Solar.R
## 41 4 Ozone Solar.R
## 41 5 Ozone Solar.R
## 42 1 Ozone Solar.R
## 42 2 Ozone Solar.R
## 42 3 Ozone Solar.R
## 42 4 Ozone Solar.R
## 42 5 Ozone Solar.R
## 43 1 Ozone Solar.R
## 43 2 Ozone Solar.R
## 43 3 Ozone Solar.R
## 43 4 Ozone Solar.R
## 43 5 Ozone Solar.R
## 44 1 Ozone Solar.R
## 44 2 Ozone Solar.R
## 44 3 Ozone Solar.R
## 44 4 Ozone Solar.R
## 44 5 Ozone Solar.R
## 45 1 Ozone Solar.R
## 45 2 Ozone Solar.R
## 45 3 Ozone Solar.R
## 45 4 Ozone Solar.R
## 45 5 Ozone Solar.R
## 46 1 Ozone Solar.R
## 46 2 Ozone Solar.R
## 46 3 Ozone Solar.R
## 46 4 Ozone Solar.R
## 46 5 Ozone Solar.R
## 47 1 Ozone Solar.R
## 47 2 Ozone Solar.R
## 47 3 Ozone Solar.R
## 47 4 Ozone Solar.R
## 47 5 Ozone Solar.R
## 48 1 Ozone Solar.R
## 48 2 Ozone Solar.R
## 48 3 Ozone Solar.R
## 48 4 Ozone Solar.R
## 48 5 Ozone Solar.R
## 49 1 Ozone Solar.R
## 49 2 Ozone Solar.R
## 49 3 Ozone Solar.R
## 49 4 Ozone Solar.R
## 49 5 Ozone Solar.R
## 50 1 Ozone Solar.R
## 50 2 Ozone Solar.R
## 50 3 Ozone Solar.R
## 50 4 Ozone Solar.R
## 50 5 Ozone Solar.R
## Class: mids
## Number of multiple imputations: 5
## Imputation methods:
## Ozone Solar.R Wind Temp Month Day
## "pmm" "pmm" "" "" "" ""
## PredictorMatrix:
## Ozone Solar.R Wind Temp Month Day
## Ozone 0 1 1 1 1 1
## Solar.R 1 0 1 1 1 1
## Wind 1 1 0 1 1 1
## Temp 1 1 1 0 1 1
## Month 1 1 1 1 0 1
## Day 1 1 1 1 1 0
## 1 2 3 4 5
## 5 6 32 14 18 6
## 10 12 23 27 21 41
## 25 8 19 6 14 19
## 26 32 9 28 19 28
## 27 18 22 37 18 9
## 32 59 47 44 45 52
## 33 16 16 20 11 18
## 34 1 13 13 37 13
## 35 44 71 40 40 71
## 36 35 64 89 35 39
## 37 14 13 30 30 44
## 39 115 91 135 168 82
## 42 64 77 168 66 76
## 43 61 91 135 82 91
## 45 23 29 45 44 59
## 46 45 63 29 45 32
## 52 16 71 47 52 52
## 53 20 64 35 23 49
## 54 45 37 40 52 35
## 55 20 39 23 20 7
## 56 13 40 45 36 45
## 57 36 35 52 46 44
## 58 32 16 21 23 23
## 59 16 52 31 39 28
## 60 23 14 13 44 24
## 61 40 85 48 71 48
## 65 23 16 23 28 59
## 72 59 47 29 52 35
## 75 35 89 59 59 108
## 83 32 23 23 44 35
## 84 28 47 35 7 35
## 102 115 85 80 168 91
## 103 16 39 16 32 47
## 107 12 22 41 22 23
## 115 24 22 16 21 36
## 119 64 78 82 61 78
## 150 12 32 12 21 16
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 6 115 14.3 56 5 5
## 6 28 274 14.9 66 5 6
Remove NAs- using “dplyr”
################################################################
library(tidyverse)
data(msleep) ## mammal sleep data
m=msleep
m %>% glimpse()## Observations: 83
## Variables: 11
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Grea...
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bo...
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi...
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorph...
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", N...
## $ sleep_total <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1...
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0....
## $ sleep_cycle <dbl> NA, NA, NA, 0.13, 0.67, 0.77, 0.38, NA, 0.33, NA,...
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9,...
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0....
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.4...
## # A tibble: 7 x 1
## conservation
## <chr>
## 1 lc
## 2 <NA>
## 3 nt
## 4 domesticated
## 5 vu
## 6 en
## 7 cd
## Observations: 20
## Variables: 11
## $ name <chr> "Greater short-tailed shrew", "Cow", "Dog", "Guin...
## $ genus <chr> "Blarina", "Bos", "Canis", "Cavis", "Chinchilla",...
## $ vore <chr> "omni", "herbi", "carni", "herbi", "herbi", "omni...
## $ order <chr> "Soricomorpha", "Artiodactyla", "Carnivora", "Rod...
## $ conservation <chr> "lc", "domesticated", "domesticated", "domesticat...
## $ sleep_total <dbl> 14.9, 4.0, 10.1, 9.4, 12.5, 9.1, 17.4, 18.0, 19.7...
## $ sleep_rem <dbl> 2.3, 0.7, 2.9, 0.8, 1.5, 1.4, 3.1, 4.9, 3.9, 0.6,...
## $ sleep_cycle <dbl> 0.13, 0.67, 0.33, 0.22, 0.12, 0.15, 0.38, 0.33, 0...
## $ awake <dbl> 9.1, 20.0, 13.9, 14.6, 11.5, 14.9, 6.6, 6.0, 4.3,...
## $ brainwt <dbl> 0.00029, 0.42300, 0.07000, 0.00550, 0.00640, 0.00...
## $ bodywt <dbl> 0.019, 600.000, 14.000, 0.728, 0.420, 0.005, 3.50...
## Observations: 20
## Variables: 11
## $ name <chr> "Greater short-tailed shrew", "Cow", "Dog", "Guin...
## $ genus <chr> "Blarina", "Bos", "Canis", "Cavis", "Chinchilla",...
## $ vore <chr> "omni", "herbi", "carni", "herbi", "herbi", "omni...
## $ order <chr> "Soricomorpha", "Artiodactyla", "Carnivora", "Rod...
## $ conservation <chr> "lc", "domesticated", "domesticated", "domesticat...
## $ sleep_total <dbl> 14.9, 4.0, 10.1, 9.4, 12.5, 9.1, 17.4, 18.0, 19.7...
## $ sleep_rem <dbl> 2.3, 0.7, 2.9, 0.8, 1.5, 1.4, 3.1, 4.9, 3.9, 0.6,...
## $ sleep_cycle <dbl> 0.13, 0.67, 0.33, 0.22, 0.12, 0.15, 0.38, 0.33, 0...
## $ awake <dbl> 9.1, 20.0, 13.9, 14.6, 11.5, 14.9, 6.6, 6.0, 4.3,...
## $ brainwt <dbl> 0.00029, 0.42300, 0.07000, 0.00550, 0.00640, 0.00...
## $ bodywt <dbl> 0.019, 600.000, 14.000, 0.728, 0.420, 0.005, 3.50...
## # A tibble: 7 x 1
## conservation
## <chr>
## 1 lc
## 2 <NA>
## 3 nt
## 4 domesticated
## 5 vu
## 6 en
## 7 cd
## # A tibble: 1 x 1
## count
## <int>
## 1 29
## Observations: 54
## Variables: 11
## $ name <chr> "Cheetah", "Mountain beaver", "Greater short-tail...
## $ genus <chr> "Acinonyx", "Aplodontia", "Blarina", "Bos", "Call...
## $ vore <chr> "carni", "herbi", "omni", "herbi", "carni", "carn...
## $ order <chr> "Carnivora", "Rodentia", "Soricomorpha", "Artioda...
## $ conservation <chr> "lc", "nt", "lc", "domesticated", "vu", "domestic...
## $ sleep_total <dbl> 12.1, 14.4, 14.9, 4.0, 8.7, 10.1, 3.0, 5.3, 9.4, ...
## $ sleep_rem <dbl> NA, 2.4, 2.3, 0.7, 1.4, 2.9, NA, 0.6, 0.8, 0.7, 1...
## $ sleep_cycle <dbl> NA, NA, 0.13, 0.67, 0.38, 0.33, NA, NA, 0.22, NA,...
## $ awake <dbl> 11.9, 9.6, 9.1, 20.0, 15.3, 13.9, 21.0, 18.7, 14....
## $ brainwt <dbl> NA, NA, 0.00029, 0.42300, NA, 0.07000, 0.09820, 0...
## $ bodywt <dbl> 50.000, 1.350, 0.019, 600.000, 20.490, 14.000, 14...
## Observations: 54
## Variables: 11
## $ name <chr> "Cheetah", "Mountain beaver", "Greater short-tail...
## $ genus <chr> "Acinonyx", "Aplodontia", "Blarina", "Bos", "Call...
## $ vore <chr> "carni", "herbi", "omni", "herbi", "carni", "carn...
## $ order <chr> "Carnivora", "Rodentia", "Soricomorpha", "Artioda...
## $ conservation <chr> "lc", "nt", "lc", "domesticated", "vu", "domestic...
## $ sleep_total <dbl> 12.1, 14.4, 14.9, 4.0, 8.7, 10.1, 3.0, 5.3, 9.4, ...
## $ sleep_rem <dbl> NA, 2.4, 2.3, 0.7, 1.4, 2.9, NA, 0.6, 0.8, 0.7, 1...
## $ sleep_cycle <dbl> NA, NA, 0.13, 0.67, 0.38, 0.33, NA, NA, 0.22, NA,...
## $ awake <dbl> 11.9, 9.6, 9.1, 20.0, 15.3, 13.9, 21.0, 18.7, 14....
## $ brainwt <dbl> NA, NA, 0.00029, 0.42300, NA, 0.07000, 0.09820, 0...
## $ bodywt <dbl> 50.000, 1.350, 0.019, 600.000, 20.490, 14.000, 14...
Data imputation with dplyr
## Observations: 83
## Variables: 11
## $ name <chr> "Cheetah", "Owl monkey", "Mountain beaver", "Grea...
## $ genus <chr> "Acinonyx", "Aotus", "Aplodontia", "Blarina", "Bo...
## $ vore <chr> "carni", "omni", "herbi", "omni", "herbi", "herbi...
## $ order <chr> "Carnivora", "Primates", "Rodentia", "Soricomorph...
## $ conservation <chr> "lc", NA, "nt", "lc", "domesticated", NA, "vu", N...
## $ sleep_total <dbl> 12.1, 17.0, 14.4, 14.9, 4.0, 14.4, 8.7, 7.0, 10.1...
## $ sleep_rem <dbl> NA, 1.8, 2.4, 2.3, 0.7, 2.2, 1.4, NA, 2.9, NA, 0....
## $ sleep_cycle <dbl> NA, NA, NA, 0.13, 0.67, 0.77, 0.38, NA, 0.33, NA,...
## $ awake <dbl> 11.9, 7.0, 9.6, 9.1, 20.0, 9.6, 15.3, 17.0, 13.9,...
## $ brainwt <dbl> NA, 0.01550, NA, 0.00029, 0.42300, NA, NA, NA, 0....
## $ bodywt <dbl> 50.000, 0.480, 1.350, 0.019, 600.000, 3.850, 20.4...
## # A tibble: 1 x 1
## count
## <int>
## 1 22
## [1] 1.9
Data Visualisation and Explorations
Data Visualisation With dplyr and ggplot2
## CPI.2016.Rank Country Country.Code Region
## 1 1 New Zealand NZL Asia Pacific
## 2 1 Denmark DNK Europe and Central Asia
## 3 3 Finland FIN Europe and Central Asia
## 4 4 Sweden SWE Europe and Central Asia
## 5 5 Switzerland CHE Europe and Central Asia
## 6 6 Norway NOR Europe and Central Asia
## CPI.2016.Score CPI.2015.Score CPI.2014.Score CPI.2013.Score
## 1 90 88 91 91
## 2 90 91 92 91
## 3 89 90 89 89
## 4 88 89 87 89
## 5 86 86 86 85
## 6 85 87 86 86
## CPI.2012.Score
## 1 90
## 2 90
## 3 90
## 4 88
## 5 86
## 6 85
library(ggplot2)
library(tidyr)
library(dplyr)
# Gather columns
cpi_history = gather(cpi, year, cpi, CPI.2012.Score:CPI.2016.Score, na.rm = TRUE)
head(cpi_history)## CPI.2016.Rank Country Country.Code Region
## 1 1 New Zealand NZL Asia Pacific
## 2 1 Denmark DNK Europe and Central Asia
## 3 3 Finland FIN Europe and Central Asia
## 4 4 Sweden SWE Europe and Central Asia
## 5 5 Switzerland CHE Europe and Central Asia
## 6 6 Norway NOR Europe and Central Asia
## year cpi
## 1 CPI.2012.Score 90
## 2 CPI.2012.Score 90
## 3 CPI.2012.Score 90
## 4 CPI.2012.Score 88
## 5 CPI.2012.Score 86
## 6 CPI.2012.Score 85
#lets look at the top 15 countries--> use dplyr
top2016 = cpi_history %>% filter(year=="CPI.2016.Score") %>% top_n(15,cpi)
top2016$rnk = "top"
#collect the bottom 15 countries-->use dplyr
bot2016 = cpi_history %>% filter(year=="CPI.2016.Score") %>% top_n(-15,cpi)
bot2016$rnk <- "bot"
# combine
dt2016 <- rbind(top2016,bot2016)
head(dt2016)## CPI.2016.Rank Country Country.Code Region
## 1 1 New Zealand NZL Asia Pacific
## 2 1 Denmark DNK Europe and Central Asia
## 3 3 Finland FIN Europe and Central Asia
## 4 4 Sweden SWE Europe and Central Asia
## 5 5 Switzerland CHE Europe and Central Asia
## 6 6 Norway NOR Europe and Central Asia
## year cpi rnk
## 1 CPI.2016.Score 90 top
## 2 CPI.2016.Score 90 top
## 3 CPI.2016.Score 89 top
## 4 CPI.2016.Score 88 top
## 5 CPI.2016.Score 86 top
## 6 CPI.2016.Score 85 top
## CPI.2016.Rank Country Country.Code Region
## 29 170 Yemen YEM Middle East and North Africa
## 30 170 Sudan SDN Sub-Saharan Africa
## 31 173 Syria SYR Middle East and North Africa
## 32 174 Korea (North) PRK Asia Pacific
## 33 175 South Sudan SSD Sub-Saharan Africa
## 34 176 Somalia SOM Sub-Saharan Africa
## year cpi rnk
## 29 CPI.2016.Score 14 bot
## 30 CPI.2016.Score 14 bot
## 31 CPI.2016.Score 13 bot
## 32 CPI.2016.Score 12 bot
## 33 CPI.2016.Score 11 bot
## 34 CPI.2016.Score 10 bot
# plot the data
library(ggplot2)
ggplot(dt2016, aes(reorder(Country, cpi), cpi)) +
geom_bar(stat="identity", aes(fill=rnk)) +
coord_flip() +
xlab("") +
ggtitle("Top and Bottom CPI's in 2016") +
scale_fill_manual(values = c("top" = "blue", "bot" = "red"), name="CPI") +
guides(fill = guide_legend(reverse = TRUE))Mining and Visualising Information About the Olympic Games
## [1] "C:/Users/HP/Desktop/TidyDataUpdatedData Processing_tidyrdplyrinR_MinervaSingh_Udemy/rfiles"
library(tidyverse)
library(ggthemes)
atheletes =read.csv("athlete_events.csv", stringsAsFactors = F)
head(atheletes)## ID Name Sex Age Height Weight Team NOC
## 1 1 A Dijiang M 24 180 80 China CHN
## 2 2 A Lamusi M 23 170 60 China CHN
## 3 3 Gunnar Nielsen Aaby M 24 NA NA Denmark DEN
## 4 4 Edgar Lindenau Aabye M 34 NA NA Denmark/Sweden DEN
## 5 5 Christine Jacoba Aaftink F 21 185 82 Netherlands NED
## 6 5 Christine Jacoba Aaftink F 21 185 82 Netherlands NED
## Games Year Season City Sport
## 1 1992 Summer 1992 Summer Barcelona Basketball
## 2 2012 Summer 2012 Summer London Judo
## 3 1920 Summer 1920 Summer Antwerpen Football
## 4 1900 Summer 1900 Summer Paris Tug-Of-War
## 5 1988 Winter 1988 Winter Calgary Speed Skating
## 6 1988 Winter 1988 Winter Calgary Speed Skating
## Event Medal
## 1 Basketball Men's Basketball <NA>
## 2 Judo Men's Extra-Lightweight <NA>
## 3 Football Men's Football <NA>
## 4 Tug-Of-War Men's Tug-Of-War Gold
## 5 Speed Skating Women's 500 metres <NA>
## 6 Speed Skating Women's 1,000 metres <NA>
### remove NAs
rem=atheletes %>%
filter(Medal != "<NA>") ## retain all data which is not NA
head(rem)## ID Name Sex Age Height Weight Team NOC
## 1 4 Edgar Lindenau Aabye M 34 NA NA Denmark/Sweden DEN
## 2 15 Arvo Ossian Aaltonen M 30 NA NA Finland FIN
## 3 15 Arvo Ossian Aaltonen M 30 NA NA Finland FIN
## 4 16 Juhamatti Tapio Aaltonen M 28 184 85 Finland FIN
## 5 17 Paavo Johannes Aaltonen M 28 175 64 Finland FIN
## 6 17 Paavo Johannes Aaltonen M 28 175 64 Finland FIN
## Games Year Season City Sport
## 1 1900 Summer 1900 Summer Paris Tug-Of-War
## 2 1920 Summer 1920 Summer Antwerpen Swimming
## 3 1920 Summer 1920 Summer Antwerpen Swimming
## 4 2014 Winter 2014 Winter Sochi Ice Hockey
## 5 1948 Summer 1948 Summer London Gymnastics
## 6 1948 Summer 1948 Summer London Gymnastics
## Event Medal
## 1 Tug-Of-War Men's Tug-Of-War Gold
## 2 Swimming Men's 200 metres Breaststroke Bronze
## 3 Swimming Men's 400 metres Breaststroke Bronze
## 4 Ice Hockey Men's Ice Hockey Bronze
## 5 Gymnastics Men's Individual All-Around Bronze
## 6 Gymnastics Men's Team All-Around Gold
## [1] "integer"
## ID Name Sex Age Height Weight Team NOC Games Year
## 702 3610 An Yulong M 19 173 70 China CHN 1998 Winter 1998
## 703 3610 An Yulong M 19 173 70 China CHN 1998 Winter 1998
## 704 3610 An Yulong M 23 173 70 China CHN 2002 Winter 2002
## 705 3611 An Zhongxin F 23 170 65 China CHN 1996 Summer 1996
## 1453 6381 Ba Yan F 21 183 78 China CHN 1984 Summer 1984
## 1760 7597 Bao Yingying F 24 172 67 China CHN 2008 Summer 2008
## Season City Sport
## 702 Winter Nagano Short Track Speed Skating
## 703 Winter Nagano Short Track Speed Skating
## 704 Winter Salt Lake City Short Track Speed Skating
## 705 Summer Atlanta Softball
## 1453 Summer Los Angeles Basketball
## 1760 Summer Beijing Fencing
## Event Medal
## 702 Short Track Speed Skating Men's 500 metres Silver
## 703 Short Track Speed Skating Men's 5,000 metres Relay Bronze
## 704 Short Track Speed Skating Men's 5,000 metres Relay Bronze
## 705 Softball Women's Softball Silver
## 1453 Basketball Women's Basketball Bronze
## 1760 Fencing Women's Sabre, Team Silver
### medal tally/country (from 1896 onwards)
t = rem %>% group_by(Team) %>%
summarise(Medal_Tally = length(Medal))%>%
arrange(desc(Medal_Tally))
head(t)## # A tibble: 6 x 2
## Team Medal_Tally
## <chr> <int>
## 1 United States 5219
## 2 Soviet Union 2451
## 3 Germany 1984
## 4 Great Britain 1673
## 5 France 1550
## 6 Italy 1527
t %>%top_n(12) %>% ggplot(aes(x = Team,y = Medal_Tally)) +
geom_bar(stat='identity',colour="white", fill = "red")+
theme_wsj()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))### Medal tally of countries, per year
mt = rem %>%
group_by(Team,Year) %>%
summarise(Total = n())
head(mt)## # A tibble: 6 x 3
## # Groups: Team [3]
## Team Year Total
## <chr> <fct> <int>
## 1 A North American Team 1900 4
## 2 Afghanistan 2008 1
## 3 Afghanistan 2012 1
## 4 Algeria 1984 2
## 5 Algeria 1992 2
## 6 Algeria 1996 3
## medal tally of a few countries across the years
topm = mt %>% filter(Team %in% c("USA","Russia","Germany","France","China"))
head(topm)## # A tibble: 6 x 3
## # Groups: Team [1]
## Team Year Total
## <chr> <fct> <int>
## 1 China 1984 74
## 2 China 1988 50
## 3 China 1992 73
## 4 China 1994 3
## 5 China 1996 94
## 6 China 1998 14
topm %>%
ggplot(aes(x = Year, y = Total, group = Team)) +
geom_line(aes(color = Team)) +
geom_point(aes(color = Team)) +theme_economist()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))##flip the x labels
##################
#setwd("~/Desktop/forecast/Data viz_r/Data and Code/section6")
getwd()## [1] "C:/Users/HP/Desktop/TidyDataUpdatedData Processing_tidyrdplyrinR_MinervaSingh_Udemy/rfiles"
library(tidyverse)
library(ggthemes)
atheletes =read.csv("athlete_events.csv", stringsAsFactors = F)
head(atheletes)## ID Name Sex Age Height Weight Team NOC
## 1 1 A Dijiang M 24 180 80 China CHN
## 2 2 A Lamusi M 23 170 60 China CHN
## 3 3 Gunnar Nielsen Aaby M 24 NA NA Denmark DEN
## 4 4 Edgar Lindenau Aabye M 34 NA NA Denmark/Sweden DEN
## 5 5 Christine Jacoba Aaftink F 21 185 82 Netherlands NED
## 6 5 Christine Jacoba Aaftink F 21 185 82 Netherlands NED
## Games Year Season City Sport
## 1 1992 Summer 1992 Summer Barcelona Basketball
## 2 2012 Summer 2012 Summer London Judo
## 3 1920 Summer 1920 Summer Antwerpen Football
## 4 1900 Summer 1900 Summer Paris Tug-Of-War
## 5 1988 Winter 1988 Winter Calgary Speed Skating
## 6 1988 Winter 1988 Winter Calgary Speed Skating
## Event Medal
## 1 Basketball Men's Basketball <NA>
## 2 Judo Men's Extra-Lightweight <NA>
## 3 Football Men's Football <NA>
## 4 Tug-Of-War Men's Tug-Of-War Gold
## 5 Speed Skating Women's 500 metres <NA>
## 6 Speed Skating Women's 1,000 metres <NA>
### remove NAs
rem=atheletes %>%
filter(Medal != "<NA>") ## retain all data which is not NA
head(rem)## ID Name Sex Age Height Weight Team NOC
## 1 4 Edgar Lindenau Aabye M 34 NA NA Denmark/Sweden DEN
## 2 15 Arvo Ossian Aaltonen M 30 NA NA Finland FIN
## 3 15 Arvo Ossian Aaltonen M 30 NA NA Finland FIN
## 4 16 Juhamatti Tapio Aaltonen M 28 184 85 Finland FIN
## 5 17 Paavo Johannes Aaltonen M 28 175 64 Finland FIN
## 6 17 Paavo Johannes Aaltonen M 28 175 64 Finland FIN
## Games Year Season City Sport
## 1 1900 Summer 1900 Summer Paris Tug-Of-War
## 2 1920 Summer 1920 Summer Antwerpen Swimming
## 3 1920 Summer 1920 Summer Antwerpen Swimming
## 4 2014 Winter 2014 Winter Sochi Ice Hockey
## 5 1948 Summer 1948 Summer London Gymnastics
## 6 1948 Summer 1948 Summer London Gymnastics
## Event Medal
## 1 Tug-Of-War Men's Tug-Of-War Gold
## 2 Swimming Men's 200 metres Breaststroke Bronze
## 3 Swimming Men's 400 metres Breaststroke Bronze
## 4 Ice Hockey Men's Ice Hockey Bronze
## 5 Gymnastics Men's Individual All-Around Bronze
## 6 Gymnastics Men's Team All-Around Gold
## [1] "integer"
## ID Name Sex Age Height Weight Team NOC Games Year
## 702 3610 An Yulong M 19 173 70 China CHN 1998 Winter 1998
## 703 3610 An Yulong M 19 173 70 China CHN 1998 Winter 1998
## 704 3610 An Yulong M 23 173 70 China CHN 2002 Winter 2002
## 705 3611 An Zhongxin F 23 170 65 China CHN 1996 Summer 1996
## 1453 6381 Ba Yan F 21 183 78 China CHN 1984 Summer 1984
## 1760 7597 Bao Yingying F 24 172 67 China CHN 2008 Summer 2008
## Season City Sport
## 702 Winter Nagano Short Track Speed Skating
## 703 Winter Nagano Short Track Speed Skating
## 704 Winter Salt Lake City Short Track Speed Skating
## 705 Summer Atlanta Softball
## 1453 Summer Los Angeles Basketball
## 1760 Summer Beijing Fencing
## Event Medal
## 702 Short Track Speed Skating Men's 500 metres Silver
## 703 Short Track Speed Skating Men's 5,000 metres Relay Bronze
## 704 Short Track Speed Skating Men's 5,000 metres Relay Bronze
## 705 Softball Women's Softball Silver
## 1453 Basketball Women's Basketball Bronze
## 1760 Fencing Women's Sabre, Team Silver
### medal tally/country (from 1896 onwards)
t = rem %>% group_by(Team) %>%
summarise(Medal_Tally = length(Medal))%>%
arrange(desc(Medal_Tally))
head(t)## # A tibble: 6 x 2
## Team Medal_Tally
## <chr> <int>
## 1 United States 5219
## 2 Soviet Union 2451
## 3 Germany 1984
## 4 Great Britain 1673
## 5 France 1550
## 6 Italy 1527
#### Medal Tally for China, Ger, Rus, Fr
#### for Summer & Winter Games
t1 = rem %>% group_by(Team, Season) %>%
summarise(Medal_Tally = length(Medal))%>%
arrange(desc(Medal_Tally))
head(t1)## # A tibble: 6 x 3
## # Groups: Team [6]
## Team Season Medal_Tally
## <chr> <chr> <int>
## 1 United States Summer 4686
## 2 Soviet Union Summer 2061
## 3 Germany Summer 1687
## 4 Great Britain Summer 1598
## 5 France Summer 1408
## 6 Italy Summer 1384
## # A tibble: 6 x 3
## # Groups: Team [4]
## Team Season Medal_Tally
## <chr> <chr> <int>
## 1 Germany Summer 1687
## 2 France Summer 1408
## 3 Russia Summer 894
## 4 China Summer 831
## 5 Germany Winter 297
## 6 Russia Winter 216
## x= factor, y= numerical, fill= factor
topm %>% ggplot(aes(x = Team,y = Medal_Tally, fill=Season)) +
geom_bar(stat="identity", position=position_dodge())+
theme_economist()Mining and Visualising Information About the Olympic Games…contd.
## ID Name Sex Age Height Weight Team NOC
## 1 1 A Dijiang M 24 180 80 China CHN
## 2 2 A Lamusi M 23 170 60 China CHN
## 3 3 Gunnar Nielsen Aaby M 24 NA NA Denmark DEN
## 4 4 Edgar Lindenau Aabye M 34 NA NA Denmark/Sweden DEN
## 5 5 Christine Jacoba Aaftink F 21 185 82 Netherlands NED
## 6 5 Christine Jacoba Aaftink F 21 185 82 Netherlands NED
## Games Year Season City Sport
## 1 1992 Summer 1992 Summer Barcelona Basketball
## 2 2012 Summer 2012 Summer London Judo
## 3 1920 Summer 1920 Summer Antwerpen Football
## 4 1900 Summer 1900 Summer Paris Tug-Of-War
## 5 1988 Winter 1988 Winter Calgary Speed Skating
## 6 1988 Winter 1988 Winter Calgary Speed Skating
## Event Medal
## 1 Basketball Men's Basketball <NA>
## 2 Judo Men's Extra-Lightweight <NA>
## 3 Football Men's Football <NA>
## 4 Tug-Of-War Men's Tug-Of-War Gold
## 5 Speed Skating Women's 500 metres <NA>
## 6 Speed Skating Women's 1,000 metres <NA>
### remove NAs
rem=atheletes %>%
filter(Medal != "<NA>") ## retain all data which is not NA
head(rem)## ID Name Sex Age Height Weight Team NOC
## 1 4 Edgar Lindenau Aabye M 34 NA NA Denmark/Sweden DEN
## 2 15 Arvo Ossian Aaltonen M 30 NA NA Finland FIN
## 3 15 Arvo Ossian Aaltonen M 30 NA NA Finland FIN
## 4 16 Juhamatti Tapio Aaltonen M 28 184 85 Finland FIN
## 5 17 Paavo Johannes Aaltonen M 28 175 64 Finland FIN
## 6 17 Paavo Johannes Aaltonen M 28 175 64 Finland FIN
## Games Year Season City Sport
## 1 1900 Summer 1900 Summer Paris Tug-Of-War
## 2 1920 Summer 1920 Summer Antwerpen Swimming
## 3 1920 Summer 1920 Summer Antwerpen Swimming
## 4 2014 Winter 2014 Winter Sochi Ice Hockey
## 5 1948 Summer 1948 Summer London Gymnastics
## 6 1948 Summer 1948 Summer London Gymnastics
## Event Medal
## 1 Tug-Of-War Men's Tug-Of-War Gold
## 2 Swimming Men's 200 metres Breaststroke Bronze
## 3 Swimming Men's 400 metres Breaststroke Bronze
## 4 Ice Hockey Men's Ice Hockey Bronze
## 5 Gymnastics Men's Individual All-Around Bronze
## 6 Gymnastics Men's Team All-Around Gold
## [1] "integer"
rem$Year = as.factor(rem$Year)
#### Men vs Women performance in the Summer Olympics
s= rem %>% group_by(Year, Sex) %>%
filter(Season=='Summer') %>%
summarize(Count=n()) %>%
arrange(Year)
head(s)## # A tibble: 6 x 3
## # Groups: Year [4]
## Year Sex Count
## <fct> <chr> <int>
## 1 1896 M 143
## 2 1900 F 13
## 3 1900 M 591
## 4 1904 F 10
## 5 1904 M 476
## 6 1906 F 6
ggplot(s, aes(x=Year, y=Count, group=Sex))+
geom_line(aes(color=Sex)) +
geom_point(aes(color=Sex)) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))#### Most popular sports by sex
p= rem %>% group_by(Sport, Sex) %>%
filter(Season=='Summer') %>%
summarize(Count=n()) %>%
group_by(Sex) %>% top_n(10,Sport)
ggplot(p, aes(x=Sport, y=Count, group=Sex)) +
geom_col(aes(fill=Sex))+
theme(axis.text.x = element_text(angle = 90, hjust = 1))#### Medals won by women in 2016
m_2016 = rem %>% filter(Year==2016, Sex=="F") %>%
group_by(NOC, Medal) %>%
summarize(Count=length(Medal))
head(m_2016)## # A tibble: 6 x 3
## # Groups: NOC [4]
## NOC Medal Count
## <chr> <chr> <int>
## 1 ARG Gold 2
## 2 AUS Bronze 5
## 3 AUS Gold 20
## 4 AUS Silver 14
## 5 AUT Bronze 1
## 6 AZE Bronze 3
ggplot(m_2016, aes(x=NOC, y=Count, fill=Medal)) +
geom_col() +
coord_flip() +
scale_fill_manual(values=c("gold1","gray70","gold4")) +
ggtitle("Medal counts for women at the 2016 Rio Olympics") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(size=6))## order by medal counts
l_2016 = m_2016 %>%
group_by(NOC) %>%
summarize(Total=sum(Count)) %>%
arrange(Total) %>%
select(NOC)
head(l_2016)## # A tibble: 6 x 1
## NOC
## <chr>
## 1 AUT
## 2 BAH
## 3 BDI
## 4 CIV
## 5 FIN
## 6 IRI
m_2016$NOC = factor(m_2016$NOC, levels=l_2016$NOC)
ggplot(m_2016, aes(x=NOC, y=Count, fill=Medal)) +
geom_col() +
coord_flip() +
scale_fill_manual(values=c("gold1","gray70","gold4")) +
ggtitle("Medal counts for women at the 2016 Rio Olympics") +
theme(plot.title = element_text(hjust = 0.5),
axis.text.y = element_text(size=6))Implement OLS on Different Categories
#############################################################
library(tidyverse)
library(modelr) ## helps with modelling
library(gapminder) ##data about countries
gapminder## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,694 more rows
sing = gapminder %>%
filter(country == "Singapore")
######### Identify the relationship between lifeExp and gdp
sing_mod <- lm(lifeExp ~ gdpPercap, data = sing)
summary(sing_mod)##
## Call:
## lm(formula = lifeExp ~ gdpPercap, data = sing)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.994 -0.808 0.892 1.713 1.973
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.4972134 1.0743344 60.03 0.00000000000004 ***
## gdpPercap 0.0003858 0.0000477 8.09 0.00001063766493 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.4 on 10 degrees of freedom
## Multiple R-squared: 0.868, Adjusted R-squared: 0.854
## F-statistic: 65.5 on 1 and 10 DF, p-value: 0.0000106
sing %>%
add_predictions(sing_mod) %>%
ggplot(aes(gdpPercap, pred)) +
geom_line() +
ggtitle("Linear trend + ")sing %>%
add_residuals(sing_mod) %>%
ggplot(aes(gdpPercap, resid)) +
geom_hline(yintercept = 0, colour = "white", size = 3) +
geom_line() +
ggtitle("Remaining pattern")###################### Implement on other countries
by_cont = gapminder %>% nest(-continent)
by_cont = by_cont %>%
mutate (fit = map(data, ~ lm(lifeExp ~ gdpPercap, data = .)),
results = map(fit, glance)) %>%
unnest(results)
by_cont## # A tibble: 5 x 14
## continent data fit r.squared adj.r.squared sigma statistic p.value
## <fct> <lis> <lis> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Asia <tib~ <lm> 0.146 0.144 11.0 67.3 3.29e-15
## 2 Europe <tib~ <lm> 0.610 0.609 3.40 559. 4.05e-75
## 3 Africa <tib~ <lm> 0.181 0.180 8.29 138. 7.60e-29
## 4 Americas <tib~ <lm> 0.312 0.309 7.77 135. 5.45e-26
## 5 Oceania <tib~ <lm> 0.915 0.911 1.13 236. 2.99e-13
## # ... with 6 more variables: df <int>, logLik <dbl>, AIC <dbl>, BIC <dbl>,
## # deviance <dbl>, df.residual <int>
by_cont%>%
ggplot(aes(x = factor(continent), y = r.squared)) +
geom_bar(stat = "identity") +
labs(x = "Continent", y = expression(R^{2}))####### more detailed results
by_cont2 = by_cont %>%
mutate (fit = map(data, ~ lm(lifeExp ~ gdpPercap, data = .)),
results = map(fit, augment)) %>%
unnest(results)
by_cont2 %>%
ggplot(aes(x = lifeExp, y = .fitted)) +
geom_abline(intercept = 0, slope = 1, alpha = .2) + # Line of perfect fit
geom_point() +
facet_grid(continent ~ .) +
theme_bw()