“9 de febrero de 2019”
library(tidyverse)
attach(iris)
#1. Select the first three columns of the iris dataset using their column names
select(iris,"Sepal.Length","Sepal.Width","Petal.Length") %>% head
## Sepal.Length Sepal.Width Petal.Length
## 1 5.1 3.5 1.4
## 2 4.9 3.0 1.4
## 3 4.7 3.2 1.3
## 4 4.6 3.1 1.5
## 5 5.0 3.6 1.4
## 6 5.4 3.9 1.7
#2. Select all the columns of the iris dataset except “Petal Width”
select(iris,-Petal.Width) %>% head
## Sepal.Length Sepal.Width Petal.Length Species
## 1 5.1 3.5 1.4 setosa
## 2 4.9 3.0 1.4 setosa
## 3 4.7 3.2 1.3 setosa
## 4 4.6 3.1 1.5 setosa
## 5 5.0 3.6 1.4 setosa
## 6 5.4 3.9 1.7 setosa
#3. Select all columns of the iris dataset that start with the character string “P”
select(iris,starts_with("P")) %>% head
## Petal.Length Petal.Width
## 1 1.4 0.2
## 2 1.4 0.2
## 3 1.3 0.2
## 4 1.5 0.2
## 5 1.4 0.2
## 6 1.7 0.4
#4. Filter the rows of the iris dataset for Sepal.Length >= 4.6 and Petal.Width >= 0.5
iris %>% filter(Sepal.Length>=4.6,Petal.Width>=0.5) %>% head
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.3 1.7 0.5 setosa
## 2 5.0 3.5 1.6 0.6 setosa
## 3 7.0 3.2 4.7 1.4 versicolor
## 4 6.4 3.2 4.5 1.5 versicolor
## 5 6.9 3.1 4.9 1.5 versicolor
## 6 5.5 2.3 4.0 1.3 versicolor
#5. Pipe the iris data frame to the function that will select two columns (Sepal.Width and Sepal.Length)
iris %>% select(Sepal.Width,Sepal.Length) %>% head
## Sepal.Width Sepal.Length
## 1 3.5 5.1
## 2 3.0 4.9
## 3 3.2 4.7
## 4 3.1 4.6
## 5 3.6 5.0
## 6 3.9 5.4
#6. Arrange rows by a particular column, such as the Sepal.Width
iris %>% arrange(Sepal.Width) %>% head
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.0 2.0 3.5 1.0 versicolor
## 2 6.0 2.2 4.0 1.0 versicolor
## 3 6.2 2.2 4.5 1.5 versicolor
## 4 6.0 2.2 5.0 1.5 virginica
## 5 4.5 2.3 1.3 0.3 setosa
## 6 5.5 2.3 4.0 1.3 versicolor
#7. Select three columns from iris, arrange the rows by Sepal.Length, then arrange the rows by Sepal.Width
iris %>% select(Sepal.Length,Sepal.Width,Species) %>% arrange(Sepal.Length,Sepal.Width) %>% head
## Sepal.Length Sepal.Width Species
## 1 4.3 3.0 setosa
## 2 4.4 2.9 setosa
## 3 4.4 3.0 setosa
## 4 4.4 3.2 setosa
## 5 4.5 2.3 setosa
## 6 4.6 3.1 setosa
#8. Create a new column called proportion, which is the ratio of Sepal.Length to Sepal.Width
iris %>% mutate(proportion=(Sepal.Length/Sepal.Width)) %>% head
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species proportion
## 1 5.1 3.5 1.4 0.2 setosa 1.457143
## 2 4.9 3.0 1.4 0.2 setosa 1.633333
## 3 4.7 3.2 1.3 0.2 setosa 1.468750
## 4 4.6 3.1 1.5 0.2 setosa 1.483871
## 5 5.0 3.6 1.4 0.2 setosa 1.388889
## 6 5.4 3.9 1.7 0.4 setosa 1.384615
#9. Compute the average number of Sepal.Length, apply the mean() function to the column Sepal.Length, and call the summary value “avg_slength”
iris %>% summarise(avg_slength=mean(Sepal.Length))
## avg_slength
## 1 5.843333
#10. Split the iris data frame by the Sepal.Length, then ask for the same summary statistics as above
iris %>% group_by(Sepal.Length) %>% summarise(avg_slength=mean(Sepal.Length))
## # A tibble: 35 x 2
## Sepal.Length avg_slength
## <dbl> <dbl>
## 1 4.3 4.3
## 2 4.4 4.4
## 3 4.5 4.5
## 4 4.6 4.6
## 5 4.7 4.7
## 6 4.8 4.8
## 7 4.9 4.9
## 8 5 5
## 9 5.1 5.1
## 10 5.2 5.2
## # ... with 25 more rows
Ejercicio 1
library(gapminder)
attach(gapminder)
data.frame(select(gapminder,year,lifeExp,country) %>% head)
## year lifeExp country
## 1 1952 28.801 Afghanistan
## 2 1957 30.332 Afghanistan
## 3 1962 31.997 Afghanistan
## 4 1967 34.020 Afghanistan
## 5 1972 36.088 Afghanistan
## 6 1977 38.438 Afghanistan
Ejercicio 2
data.frame(select(gapminder,country:lifeExp) %>% head)
## country continent year lifeExp
## 1 Afghanistan Asia 1952 28.801
## 2 Afghanistan Asia 1957 30.332
## 3 Afghanistan Asia 1962 31.997
## 4 Afghanistan Asia 1967 34.020
## 5 Afghanistan Asia 1972 36.088
## 6 Afghanistan Asia 1977 38.438
Ejercicio 3
data.frame(select(gapminder,-lifeExp) %>% head)
## country continent year pop gdpPercap
## 1 Afghanistan Asia 1952 8425333 779.4453
## 2 Afghanistan Asia 1957 9240934 820.8530
## 3 Afghanistan Asia 1962 10267083 853.1007
## 4 Afghanistan Asia 1967 11537966 836.1971
## 5 Afghanistan Asia 1972 13079460 739.9811
## 6 Afghanistan Asia 1977 14880372 786.1134
Ejercicio 4
everything <- function(){
d=data.frame(select(gapminder,matches("continent"),country,year:gdpPercap))
colnames(d)[colnames(d)=="continent"]<-"cont"
head(d)
}
everything()
## cont country year lifeExp pop gdpPercap
## 1 Asia Afghanistan 1952 28.801 8425333 779.4453
## 2 Asia Afghanistan 1957 30.332 9240934 820.8530
## 3 Asia Afghanistan 1962 31.997 10267083 853.1007
## 4 Asia Afghanistan 1967 34.020 11537966 836.1971
## 5 Asia Afghanistan 1972 36.088 13079460 739.9811
## 6 Asia Afghanistan 1977 38.438 14880372 786.1134
Ejercicio 5
data.frame(arrange(gapminder,year) %>% head)
## country continent year lifeExp pop gdpPercap
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Albania Europe 1952 55.230 1282697 1601.0561
## 3 Algeria Africa 1952 43.077 9279525 2449.0082
## 4 Angola Africa 1952 30.015 4232095 3520.6103
## 5 Argentina Americas 1952 62.485 17876956 5911.3151
## 6 Australia Oceania 1952 69.120 8691212 10039.5956
Ejercicio 6
data.frame(arrange(gapminder,desc(year)) %>% head)
## country continent year lifeExp pop gdpPercap
## 1 Afghanistan Asia 2007 43.828 31889923 974.5803
## 2 Albania Europe 2007 76.423 3600523 5937.0295
## 3 Algeria Africa 2007 72.301 33333216 6223.3675
## 4 Angola Africa 2007 42.731 12420476 4797.2313
## 5 Argentina Americas 2007 75.320 40301927 12779.3796
## 6 Australia Oceania 2007 81.235 20434176 34435.3674
Ejercicio 7
data.frame(arrange(gapminder,year,lifeExp) %>% head)
## country continent year lifeExp pop gdpPercap
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Gambia Africa 1952 30.000 284320 485.2307
## 3 Angola Africa 1952 30.015 4232095 3520.6103
## 4 Sierra Leone Africa 1952 30.331 2143249 879.7877
## 5 Mozambique Africa 1952 31.286 6446316 468.5260
## 6 Burkina Faso Africa 1952 31.975 4469979 543.2552
Ejercicio 8
head(data.frame(gapminder %>% filter(pop>100000000)))
## country continent year lifeExp pop gdpPercap
## 1 Bangladesh Asia 1987 52.819 103764241 751.9794
## 2 Bangladesh Asia 1992 56.018 113704579 837.8102
## 3 Bangladesh Asia 1997 59.412 123315288 972.7700
## 4 Bangladesh Asia 2002 62.013 135656790 1136.3904
## 5 Bangladesh Asia 2007 64.062 150448339 1391.2538
## 6 Brazil Americas 1972 59.504 100840058 4985.7115
Ejercicio 9
data.frame(gapminder %>% filter(continent=="Asia") %>% head)
## country continent year lifeExp pop gdpPercap
## 1 Afghanistan Asia 1952 28.801 8425333 779.4453
## 2 Afghanistan Asia 1957 30.332 9240934 820.8530
## 3 Afghanistan Asia 1962 31.997 10267083 853.1007
## 4 Afghanistan Asia 1967 34.020 11537966 836.1971
## 5 Afghanistan Asia 1972 36.088 13079460 739.9811
## 6 Afghanistan Asia 1977 38.438 14880372 786.1134
Ejercicio 10
i<- data.frame(gapminder %>% filter(gdpPercap>10000))
select(i,-gdpPercap) %>% head
## country continent year lifeExp pop
## 1 Argentina Americas 1977 68.481 26983828
## 2 Argentina Americas 1997 73.275 36203463
## 3 Argentina Americas 2007 75.320 40301927
## 4 Australia Oceania 1952 69.120 8691212
## 5 Australia Oceania 1957 70.330 9712569
## 6 Australia Oceania 1962 70.930 10794968
Ejercicio 11
select(iris, starts_with("Petal")) %>% head
## Petal.Length Petal.Width
## 1 1.4 0.2
## 2 1.4 0.2
## 3 1.3 0.2
## 4 1.5 0.2
## 5 1.4 0.2
## 6 1.7 0.4