library(tidyverse)
library(tidymodels)
library(ggcorrplot)
library(corrr)
theme_set(theme_bw())
drinks <- read_csv("drinks.csv")
drinks
# A tibble: 193 x 5
country beer_servings spirit_servings wine_servings total_litres_of~
<chr> <dbl> <dbl> <dbl> <dbl>
1 Afghanistan 0 0 0 0
2 Albania 89 132 54 4.9
3 Algeria 25 0 14 0.7
4 Andorra 245 138 312 12.4
5 Angola 217 57 45 5.9
6 Antigua & Barbuda 102 128 45 4.9
7 Argentina 193 25 221 8.3
8 Armenia 21 179 11 3.8
9 Australia 261 72 212 10.4
10 Austria 279 75 191 9.7
# ... with 183 more rows
drinks %>%
filter(total_litres_of_pure_alcohol == 0)
# A tibble: 13 x 5
country beer_servings spirit_servings wine_servings total_litres_of~
<chr> <dbl> <dbl> <dbl> <dbl>
1 Afghanistan 0 0 0 0
2 Bangladesh 0 0 0 0
3 North Korea 0 0 0 0
4 Iran 0 0 0 0
5 Kuwait 0 0 0 0
6 Libya 0 0 0 0
7 Maldives 0 0 0 0
8 Marshall Islands 0 0 0 0
9 Mauritania 0 0 0 0
10 Monaco 0 0 0 0
11 Pakistan 0 0 0 0
12 San Marino 0 0 0 0
13 Somalia 0 0 0 0
drinks %>%
arrange(-beer_servings)
# A tibble: 193 x 5
country beer_servings spirit_servings wine_servings total_litres_of_p~
<chr> <dbl> <dbl> <dbl> <dbl>
1 Namibia 376 3 1 6.8
2 Czech Republic 361 170 134 11.8
3 Gabon 347 98 59 8.9
4 Germany 346 117 175 11.3
5 Lithuania 343 244 56 12.9
6 Poland 343 215 56 10.9
7 Venezuela 333 100 3 7.7
8 Ireland 313 118 165 11.4
9 Palau 306 63 23 6.9
10 Romania 297 122 167 10.4
# ... with 183 more rows
drinks %>%
arrange(-spirit_servings)
# A tibble: 193 x 5
country beer_servings spirit_servings wine_servings total_litres_of~
<chr> <dbl> <dbl> <dbl> <dbl>
1 Grenada 199 438 28 11.9
2 Belarus 142 373 42 14.4
3 Haiti 1 326 1 5.9
4 Russian Federation 247 326 73 11.5
5 St. Lucia 171 315 71 10.1
6 Guyana 93 302 1 7.1
7 Slovakia 196 293 116 11.4
8 Dominica 52 286 26 6.6
9 Thailand 99 258 1 6.4
10 Cook Islands 0 254 74 5.9
# ... with 183 more rows
drinks %>%
arrange(-wine_servings)
# A tibble: 193 x 5
country beer_servings spirit_servings wine_servings total_litres_of~
<chr> <dbl> <dbl> <dbl> <dbl>
1 France 127 151 370 11.8
2 Portugal 194 67 339 11
3 Andorra 245 138 312 12.4
4 Switzerland 185 100 280 10.2
5 Denmark 224 81 278 10.4
6 Slovenia 270 51 276 10.6
7 Luxembourg 236 133 271 11.4
8 Croatia 230 87 254 10.2
9 Italy 85 42 237 6.5
10 Equatorial Guinea 92 0 233 5.8
# ... with 183 more rows
drinks %>%
arrange(-total_litres_of_pure_alcohol)
# A tibble: 193 x 5
country beer_servings spirit_servings wine_servings total_litres_of~
<chr> <dbl> <dbl> <dbl> <dbl>
1 Belarus 142 373 42 14.4
2 Lithuania 343 244 56 12.9
3 Andorra 245 138 312 12.4
4 Grenada 199 438 28 11.9
5 Czech Republic 361 170 134 11.8
6 France 127 151 370 11.8
7 Russian Federation 247 326 73 11.5
8 Ireland 313 118 165 11.4
9 Luxembourg 236 133 271 11.4
10 Slovakia 196 293 116 11.4
# ... with 183 more rows
drinks %>%
filter(total_litres_of_pure_alcohol != 0) %>%
select(is.numeric) %>%
summary()
beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol
Min. : 0.0 Min. : 0.00 Min. : 0.00 Min. : 0.100
1st Qu.: 25.0 1st Qu.: 11.00 1st Qu.: 2.00 1st Qu.: 1.775
Median : 80.5 Median : 66.00 Median : 9.50 Median : 4.800
Mean :113.8 Mean : 86.84 Mean : 53.02 Mean : 5.058
3rd Qu.:193.2 3rd Qu.:133.00 3rd Qu.: 73.25 3rd Qu.: 7.625
Max. :376.0 Max. :438.00 Max. :370.00 Max. :14.400
corel <- drinks %>%
select(-country) %>%
cor
corel
beer_servings spirit_servings wine_servings
beer_servings 1.0000000 0.4588189 0.5271717
spirit_servings 0.4588189 1.0000000 0.1947970
wine_servings 0.5271717 0.1947970 1.0000000
total_litres_of_pure_alcohol 0.8358386 0.6549682 0.6675983
total_litres_of_pure_alcohol
beer_servings 0.8358386
spirit_servings 0.6549682
wine_servings 0.6675983
total_litres_of_pure_alcohol 1.0000000
ggcorrplot(corel,
method = "square",
type = "lower",
lab = T)

drinks %>%
select(-country) %>%
correlate() %>%
network_plot()

drinks %>%
select(-country) %>%
lm(total_litres_of_pure_alcohol ~., . ) %>%
summary()
Call:
lm(formula = total_litres_of_pure_alcohol ~ ., data = .)
Residuals:
Min 1Q Median 3Q Max
-0.8670 -0.6865 -0.4010 -0.0392 7.4990
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.722450 0.149223 4.841 2.67e-06 ***
beer_servings 0.018303 0.001253 14.608 < 2e-16 ***
spirit_servings 0.015558 0.001244 12.511 < 2e-16 ***
wine_servings 0.016005 0.001440 11.112 < 2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.349 on 189 degrees of freedom
Multiple R-squared: 0.8742, Adjusted R-squared: 0.8722
F-statistic: 437.7 on 3 and 189 DF, p-value: < 2.2e-16
drinks_longer <- drinks %>%
filter(total_litres_of_pure_alcohol != 0) %>%
pivot_longer(cols = 2:5, names_to = "beverage", values_to = "value")
drinks_longer %>% head
# A tibble: 6 x 3
country beverage value
<chr> <chr> <dbl>
1 Albania beer_servings 89
2 Albania spirit_servings 132
3 Albania wine_servings 54
4 Albania total_litres_of_pure_alcohol 4.9
5 Algeria beer_servings 25
6 Algeria spirit_servings 0
p1 <- drinks_longer %>%
filter(beverage != "total_litres_of_pure_alcohol") %>%
ggplot(aes(x = beverage, y = value, color = beverage)) +
geom_boxplot() +
scale_color_discrete(labels = c("Beer", "Spirit", "Wine")) +
scale_x_discrete(labels = c("Beer", "Spirit", "Wine")) +
labs(y = "Servings", x = "Beverage", color = "Beverage")
p2 <- drinks_longer %>%
filter(beverage != "total_litres_of_pure_alcohol") %>%
ggplot(aes(value, color = beverage)) +
theme(legend.position = "none") +
geom_density()
p3 <- drinks_longer %>%
filter(beverage != "total_litres_of_pure_alcohol") %>%
ggplot(aes(value, color = beverage, fill = beverage)) +
geom_histogram() +
facet_wrap(~beverage) +
theme(legend.position = "none") +
labs(x = "Servings")
library(patchwork)
(p1 + p2) / p3

beer1 <- drinks %>%
slice_max(beer_servings, n=5) %>%
ggplot(aes(reorder(country, -beer_servings), beer_servings)) +
geom_bar(stat = "identity", fill = "#56B4E9") +
geom_text(aes(label = beer_servings), vjust = -.3) +
ylim(c(0,400)) +
labs(x = "Countries", y = "Beer Servings", title = "Consumption of Beer (Top 6)")
most_beer <- drinks %>%
slice_max(beer_servings, n=5)
beer2 <- drinks_longer %>%
filter(beverage != "total_litres_of_pure_alcohol") %>%
filter(country %in% c("Germany", "Gabon", "Namibia", "Czech Republic", "Lithuania", "Poland")) %>%
ggplot(aes(x = country, y = value, fill = beverage)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Country", y = "Servings", fill = "Beverage", title = "Countries with major consumption of Beer",
subtitle = "Relation with other beverages") +
scale_fill_discrete(labels = c("Beer", "Spirit", "Wine"))
beer1 + beer2

spirit1 <- drinks %>%
slice_max(spirit_servings, n=5) %>%
ggplot(aes(reorder(country, -spirit_servings), spirit_servings)) +
geom_bar(stat = "identity", fill = "#76C8C9") +
geom_text(aes(label = spirit_servings), vjust = -.3) +
ylim(0, 480) +
labs(x = "Countries", y = "Spirit Servings", title = "Consumption of Spirit (Top 5)")
most_spirit <- drinks %>%
slice_max(spirit_servings, n=5)
spirit2 <- drinks_longer %>%
filter(beverage != "total_litres_of_pure_alcohol") %>%
filter(country %in% c("Grenada", "Belarus", "Haiti", "Russian Federation", "St. Lucia")) %>%
ggplot(aes(x = country, y = value, fill = beverage)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Country", y = "Servings", fill = "Beverage", title = "Countries with major consumption of Spirit",
subtitle = "Relation with other beverages") +
scale_fill_discrete(labels = c("Beer", "Spirit", "Wine"))
spirit1 + spirit2

wine1 <- drinks %>%
slice_max(wine_servings, n=5) %>%
ggplot(aes(reorder(country, -wine_servings), wine_servings)) +
geom_bar(stat = "identity", fill = "#44C8C1") +
geom_text(aes(label = wine_servings), vjust = -.3) +
ylim(0, 400) +
labs(x = "Countries", y = "Wine Servings", title = "Consumption of Wine (Top 5)")
most_wine <- drinks %>%
slice_max(wine_servings, n=5)
wine2 <- drinks_longer %>%
filter(beverage != "total_litres_of_pure_alcohol") %>%
filter(country %in% c("Frande", "Portugal", "Andorra", "Switzerland", "Denmark")) %>%
ggplot(aes(x = country, y = value, fill = beverage)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Country", y = "Servings", fill = "Beverage", title = "Countries with major consumption of Wine",
subtitle = "Relation with other beverages") +
scale_fill_discrete(labels = c("Beer", "Spirit", "Wine"))
wine1 + wine2

drinks %>%
filter(total_litres_of_pure_alcohol != 0) %>%
select(is.numeric) %>%
summary()
beer_servings spirit_servings wine_servings total_litres_of_pure_alcohol
Min. : 0.0 Min. : 0.00 Min. : 0.00 Min. : 0.100
1st Qu.: 25.0 1st Qu.: 11.00 1st Qu.: 2.00 1st Qu.: 1.775
Median : 80.5 Median : 66.00 Median : 9.50 Median : 4.800
Mean :113.8 Mean : 86.84 Mean : 53.02 Mean : 5.058
3rd Qu.:193.2 3rd Qu.:133.00 3rd Qu.: 73.25 3rd Qu.: 7.625
Max. :376.0 Max. :438.00 Max. :370.00 Max. :14.400
drinks %>%
filter(country == "Argentina")
# A tibble: 1 x 5
country beer_servings spirit_servings wine_servings total_litres_of_pure_al~
<chr> <dbl> <dbl> <dbl> <dbl>
1 Argentina 193 25 221 8.3