Import data
# excel file
data <- read_csv("../00_data/myData.csv")
data
## # A tibble: 20,755 × 4
## Entity Code Year LifeExpectancy
## <chr> <chr> <dbl> <dbl>
## 1 Afghanistan AFG 1950 27.7
## 2 Afghanistan AFG 1951 28.0
## 3 Afghanistan AFG 1952 28.4
## 4 Afghanistan AFG 1953 28.9
## 5 Afghanistan AFG 1954 29.2
## 6 Afghanistan AFG 1955 29.9
## 7 Afghanistan AFG 1956 30.4
## 8 Afghanistan AFG 1957 30.9
## 9 Afghanistan AFG 1958 31.5
## 10 Afghanistan AFG 1959 32.0
## # ℹ 20,745 more rows
Apply the following dplyr verbs to your data
Filter rows
filter(data, Year == 2021, LifeExpectancy == 21)
## # A tibble: 0 × 4
## # ℹ 4 variables: Entity <chr>, Code <chr>, Year <dbl>, LifeExpectancy <dbl>
Arrange rows
arrange(data, Entity, Year, LifeExpectancy)
## # A tibble: 20,755 × 4
## Entity Code Year LifeExpectancy
## <chr> <chr> <dbl> <dbl>
## 1 Afghanistan AFG 1950 27.7
## 2 Afghanistan AFG 1951 28.0
## 3 Afghanistan AFG 1952 28.4
## 4 Afghanistan AFG 1953 28.9
## 5 Afghanistan AFG 1954 29.2
## 6 Afghanistan AFG 1955 29.9
## 7 Afghanistan AFG 1956 30.4
## 8 Afghanistan AFG 1957 30.9
## 9 Afghanistan AFG 1958 31.5
## 10 Afghanistan AFG 1959 32.0
## # ℹ 20,745 more rows
Select columns
select(data, Year, LifeExpectancy)
## # A tibble: 20,755 × 2
## Year LifeExpectancy
## <dbl> <dbl>
## 1 1950 27.7
## 2 1951 28.0
## 3 1952 28.4
## 4 1953 28.9
## 5 1954 29.2
## 6 1955 29.9
## 7 1956 30.4
## 8 1957 30.9
## 9 1958 31.5
## 10 1959 32.0
## # ℹ 20,745 more rows
Add columns
mutate(data, Decade = Year %/% 10 * 10)
## # A tibble: 20,755 × 5
## Entity Code Year LifeExpectancy Decade
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Afghanistan AFG 1950 27.7 1950
## 2 Afghanistan AFG 1951 28.0 1950
## 3 Afghanistan AFG 1952 28.4 1950
## 4 Afghanistan AFG 1953 28.9 1950
## 5 Afghanistan AFG 1954 29.2 1950
## 6 Afghanistan AFG 1955 29.9 1950
## 7 Afghanistan AFG 1956 30.4 1950
## 8 Afghanistan AFG 1957 30.9 1950
## 9 Afghanistan AFG 1958 31.5 1950
## 10 Afghanistan AFG 1959 32.0 1950
## # ℹ 20,745 more rows
Summarize by groups
summarise(data, Year = mean(LifeExpectancy, na.rm = TRUE))
## # A tibble: 1 × 1
## Year
## <dbl>
## 1 61.6