data <- read_excel("../00_data/newmyData.xlsx")
data
## # A tibble: 20,755 × 4
## Entity Code Year LifeExpectancy
## <chr> <chr> <dbl> <dbl>
## 1 Afghanistan AFG 1950 27.7
## 2 Afghanistan AFG 1951 28.0
## 3 Afghanistan AFG 1952 28.4
## 4 Afghanistan AFG 1953 28.9
## 5 Afghanistan AFG 1954 29.2
## 6 Afghanistan AFG 1955 29.9
## 7 Afghanistan AFG 1956 30.4
## 8 Afghanistan AFG 1957 30.9
## 9 Afghanistan AFG 1958 31.5
## 10 Afghanistan AFG 1959 32.0
## # ℹ 20,745 more rows
data %>% count(Code)
## # A tibble: 239 × 2
## Code n
## <chr> <int>
## 1 ABW 72
## 2 AFG 72
## 3 AGO 73
## 4 AIA 72
## 5 ALB 72
## 6 AND 72
## 7 ARE 72
## 8 ARG 87
## 9 ARM 72
## 10 ASM 72
## # ℹ 229 more rows
code_levels <- c("ABW","AFG","ARG","ALB")
data_rev <- data %>%
mutate(Code = Code %>% factor(levels= code_levels))
data_rev
## # A tibble: 20,755 × 4
## Entity Code Year LifeExpectancy
## <chr> <fct> <dbl> <dbl>
## 1 Afghanistan AFG 1950 27.7
## 2 Afghanistan AFG 1951 28.0
## 3 Afghanistan AFG 1952 28.4
## 4 Afghanistan AFG 1953 28.9
## 5 Afghanistan AFG 1954 29.2
## 6 Afghanistan AFG 1955 29.9
## 7 Afghanistan AFG 1956 30.4
## 8 Afghanistan AFG 1957 30.9
## 9 Afghanistan AFG 1958 31.5
## 10 Afghanistan AFG 1959 32.0
## # ℹ 20,745 more rows
data_rev2 <- data_rev %>%
filter(Code %in% c("ABW", "AFG", "ARG","ALB"))
data_rev2
## # A tibble: 303 × 4
## Entity Code Year LifeExpectancy
## <chr> <fct> <dbl> <dbl>
## 1 Afghanistan AFG 1950 27.7
## 2 Afghanistan AFG 1951 28.0
## 3 Afghanistan AFG 1952 28.4
## 4 Afghanistan AFG 1953 28.9
## 5 Afghanistan AFG 1954 29.2
## 6 Afghanistan AFG 1955 29.9
## 7 Afghanistan AFG 1956 30.4
## 8 Afghanistan AFG 1957 30.9
## 9 Afghanistan AFG 1958 31.5
## 10 Afghanistan AFG 1959 32.0
## # ℹ 293 more rows
Make two bar charts here - one before ordering another after
data_summary <- data_rev2 %>%
group_by(Code) %>%
summarise(
LifeExpectancy = mean(LifeExpectancy, na.rm = TRUE)
)
data_summary
## # A tibble: 4 × 2
## Code LifeExpectancy
## <fct> <dbl>
## 1 ABW 70.3
## 2 AFG 45.4
## 3 ARG 65.4
## 4 ALB 68.3
ggplot(data_summary, aes(LifeExpectancy, Code)) + geom_point()
ggplot(data_summary, aes(LifeExpectancy, fct_reorder(Code,LifeExpectancy))) + geom_point()
Show examples of three functions:
data %>%
mutate(Code = fct_recode (Code,
"Albanian" = "ALB",
"Aruban" = "ABW",
"Argentinian" = "ARG",
"Afghani" = "AFG")) %>%
count(Code)
## # A tibble: 239 × 2
## Code n
## <fct> <int>
## 1 Aruban 72
## 2 Afghani 72
## 3 AGO 73
## 4 AIA 72
## 5 Albanian 72
## 6 AND 72
## 7 ARE 72
## 8 Argentinian 87
## 9 ARM 72
## 10 ASM 72
## # ℹ 229 more rows
data %>%
mutate(Code = fct_collapse(Code,
European = "ALB",
Other = c("ABW","AFG","ARG"),
)) %>%
count(Code)
## # A tibble: 237 × 2
## Code n
## <fct> <int>
## 1 Other 231
## 2 AGO 73
## 3 AIA 72
## 4 European 72
## 5 AND 72
## 6 ARE 72
## 7 ARM 72
## 8 ASM 72
## 9 ATG 72
## 10 AUS 104
## # ℹ 227 more rows
data %>%
mutate(Code = fct_lump(Code)) %>%
count(Code)
## # A tibble: 239 × 2
## Code n
## <fct> <int>
## 1 ABW 72
## 2 AFG 72
## 3 AGO 73
## 4 AIA 72
## 5 ALB 72
## 6 AND 72
## 7 ARE 72
## 8 ARG 87
## 9 ARM 72
## 10 ASM 72
## # ℹ 229 more rows
No need to do anything here.