library(tidyverse)
library(tidymodels)
library(ggcorrplot)
library(corrr)

theme_set(theme_bw())
drinks <- read_csv("drinks.csv")
drinks
# A tibble: 193 x 5
   country           beer_servings spirit_servings wine_servings total_litres_of~
   <chr>                     <dbl>           <dbl>         <dbl>            <dbl>
 1 Afghanistan                   0               0             0              0  
 2 Albania                      89             132            54              4.9
 3 Algeria                      25               0            14              0.7
 4 Andorra                     245             138           312             12.4
 5 Angola                      217              57            45              5.9
 6 Antigua & Barbuda           102             128            45              4.9
 7 Argentina                   193              25           221              8.3
 8 Armenia                      21             179            11              3.8
 9 Australia                   261              72           212             10.4
10 Austria                     279              75           191              9.7
# ... with 183 more rows
drinks %>% 
  filter(total_litres_of_pure_alcohol == 0)
# A tibble: 13 x 5
   country          beer_servings spirit_servings wine_servings total_litres_of~
   <chr>                    <dbl>           <dbl>         <dbl>            <dbl>
 1 Afghanistan                  0               0             0                0
 2 Bangladesh                   0               0             0                0
 3 North Korea                  0               0             0                0
 4 Iran                         0               0             0                0
 5 Kuwait                       0               0             0                0
 6 Libya                        0               0             0                0
 7 Maldives                     0               0             0                0
 8 Marshall Islands             0               0             0                0
 9 Mauritania                   0               0             0                0
10 Monaco                       0               0             0                0
11 Pakistan                     0               0             0                0
12 San Marino                   0               0             0                0
13 Somalia                      0               0             0                0
drinks %>% 
  arrange(-beer_servings)
# A tibble: 193 x 5
   country        beer_servings spirit_servings wine_servings total_litres_of_p~
   <chr>                  <dbl>           <dbl>         <dbl>              <dbl>
 1 Namibia                  376               3             1                6.8
 2 Czech Republic           361             170           134               11.8
 3 Gabon                    347              98            59                8.9
 4 Germany                  346             117           175               11.3
 5 Lithuania                343             244            56               12.9
 6 Poland                   343             215            56               10.9
 7 Venezuela                333             100             3                7.7
 8 Ireland                  313             118           165               11.4
 9 Palau                    306              63            23                6.9
10 Romania                  297             122           167               10.4
# ... with 183 more rows
drinks %>% 
  arrange(-spirit_servings)
# A tibble: 193 x 5
   country            beer_servings spirit_servings wine_servings total_litres_of~
   <chr>                      <dbl>           <dbl>         <dbl>            <dbl>
 1 Grenada                      199             438            28             11.9
 2 Belarus                      142             373            42             14.4
 3 Haiti                          1             326             1              5.9
 4 Russian Federation           247             326            73             11.5
 5 St. Lucia                    171             315            71             10.1
 6 Guyana                        93             302             1              7.1
 7 Slovakia                     196             293           116             11.4
 8 Dominica                      52             286            26              6.6
 9 Thailand                      99             258             1              6.4
10 Cook Islands                   0             254            74              5.9
# ... with 183 more rows
drinks %>% 
  arrange(-wine_servings)
# A tibble: 193 x 5
   country           beer_servings spirit_servings wine_servings total_litres_of~
   <chr>                     <dbl>           <dbl>         <dbl>            <dbl>
 1 France                      127             151           370             11.8
 2 Portugal                    194              67           339             11  
 3 Andorra                     245             138           312             12.4
 4 Switzerland                 185             100           280             10.2
 5 Denmark                     224              81           278             10.4
 6 Slovenia                    270              51           276             10.6
 7 Luxembourg                  236             133           271             11.4
 8 Croatia                     230              87           254             10.2
 9 Italy                        85              42           237              6.5
10 Equatorial Guinea            92               0           233              5.8
# ... with 183 more rows
drinks %>% 
  arrange(-total_litres_of_pure_alcohol)
# A tibble: 193 x 5
   country            beer_servings spirit_servings wine_servings total_litres_of~
   <chr>                      <dbl>           <dbl>         <dbl>            <dbl>
 1 Belarus                      142             373            42             14.4
 2 Lithuania                    343             244            56             12.9
 3 Andorra                      245             138           312             12.4
 4 Grenada                      199             438            28             11.9
 5 Czech Republic               361             170           134             11.8
 6 France                       127             151           370             11.8
 7 Russian Federation           247             326            73             11.5
 8 Ireland                      313             118           165             11.4
 9 Luxembourg                   236             133           271             11.4
10 Slovakia                     196             293           116             11.4
# ... with 183 more rows
drinks %>% 
  filter(total_litres_of_pure_alcohol != 0) %>% 
  select(is.numeric) %>% 
  summary()
 beer_servings   spirit_servings  wine_servings    total_litres_of_pure_alcohol
 Min.   :  0.0   Min.   :  0.00   Min.   :  0.00   Min.   : 0.100              
 1st Qu.: 25.0   1st Qu.: 11.00   1st Qu.:  2.00   1st Qu.: 1.775              
 Median : 80.5   Median : 66.00   Median :  9.50   Median : 4.800              
 Mean   :113.8   Mean   : 86.84   Mean   : 53.02   Mean   : 5.058              
 3rd Qu.:193.2   3rd Qu.:133.00   3rd Qu.: 73.25   3rd Qu.: 7.625              
 Max.   :376.0   Max.   :438.00   Max.   :370.00   Max.   :14.400              
corel <- drinks %>% 
  select(-country) %>% 
  cor

corel
                             beer_servings spirit_servings wine_servings
beer_servings                    1.0000000       0.4588189     0.5271717
spirit_servings                  0.4588189       1.0000000     0.1947970
wine_servings                    0.5271717       0.1947970     1.0000000
total_litres_of_pure_alcohol     0.8358386       0.6549682     0.6675983
                             total_litres_of_pure_alcohol
beer_servings                                   0.8358386
spirit_servings                                 0.6549682
wine_servings                                   0.6675983
total_litres_of_pure_alcohol                    1.0000000
ggcorrplot(corel, 
           method = "square",
           type = "lower", 
           lab = T)

drinks %>% 
  select(-country) %>% 
  correlate() %>% 
  network_plot()

drinks %>% 
  select(-country) %>% 
  lm(total_litres_of_pure_alcohol ~., . ) %>% 
  summary()

Call:
lm(formula = total_litres_of_pure_alcohol ~ ., data = .)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.8670 -0.6865 -0.4010 -0.0392  7.4990 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     0.722450   0.149223   4.841 2.67e-06 ***
beer_servings   0.018303   0.001253  14.608  < 2e-16 ***
spirit_servings 0.015558   0.001244  12.511  < 2e-16 ***
wine_servings   0.016005   0.001440  11.112  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.349 on 189 degrees of freedom
Multiple R-squared:  0.8742,    Adjusted R-squared:  0.8722 
F-statistic: 437.7 on 3 and 189 DF,  p-value: < 2.2e-16
drinks_longer <- drinks %>% 
  filter(total_litres_of_pure_alcohol != 0) %>% 
  pivot_longer(cols = 2:5, names_to = "beverage", values_to = "value")

drinks_longer %>% head
# A tibble: 6 x 3
  country beverage                     value
  <chr>   <chr>                        <dbl>
1 Albania beer_servings                 89  
2 Albania spirit_servings              132  
3 Albania wine_servings                 54  
4 Albania total_litres_of_pure_alcohol   4.9
5 Algeria beer_servings                 25  
6 Algeria spirit_servings                0  
p1 <- drinks_longer %>% 
  filter(beverage != "total_litres_of_pure_alcohol") %>% 
  ggplot(aes(x = beverage, y = value, color = beverage)) +
  geom_boxplot() + 
  scale_color_discrete(labels = c("Beer", "Spirit", "Wine")) +
  scale_x_discrete(labels = c("Beer", "Spirit", "Wine")) + 
  labs(y = "Servings", x = "Beverage", color = "Beverage")

p2 <- drinks_longer %>% 
  filter(beverage != "total_litres_of_pure_alcohol") %>% 
  ggplot(aes(value, color = beverage)) +
  theme(legend.position = "none") + 
  geom_density()

p3 <- drinks_longer %>% 
  filter(beverage != "total_litres_of_pure_alcohol") %>% 
  ggplot(aes(value, color = beverage, fill = beverage)) +
  geom_histogram() + 
  facet_wrap(~beverage) + 
  theme(legend.position = "none") + 
  labs(x = "Servings")

library(patchwork)

(p1 + p2) / p3 

beer1 <- drinks %>% 
  slice_max(beer_servings, n=5) %>% 
  ggplot(aes(reorder(country, -beer_servings), beer_servings)) + 
  geom_bar(stat = "identity", fill = "#56B4E9") + 
  geom_text(aes(label = beer_servings), vjust = -.3) + 
  ylim(c(0,400)) + 
  labs(x = "Countries", y = "Beer Servings", title = "Consumption of Beer (Top 6)")

most_beer <- drinks %>% 
  slice_max(beer_servings, n=5) 

beer2 <- drinks_longer %>% 
  filter(beverage != "total_litres_of_pure_alcohol") %>% 
  filter(country %in% c("Germany", "Gabon", "Namibia", "Czech Republic", "Lithuania", "Poland")) %>% 
  ggplot(aes(x = country, y = value, fill = beverage)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  labs(x = "Country", y = "Servings", fill = "Beverage", title = "Countries with major consumption of Beer",
       subtitle = "Relation with other beverages") + 
  scale_fill_discrete(labels = c("Beer", "Spirit", "Wine"))

beer1 + beer2

spirit1 <- drinks %>% 
  slice_max(spirit_servings, n=5) %>% 
  ggplot(aes(reorder(country, -spirit_servings), spirit_servings)) + 
  geom_bar(stat = "identity", fill = "#76C8C9") + 
  geom_text(aes(label = spirit_servings), vjust = -.3) + 
  ylim(0, 480) + 
  labs(x = "Countries", y = "Spirit Servings", title = "Consumption of Spirit (Top 5)")

most_spirit <- drinks %>% 
  slice_max(spirit_servings, n=5) 

spirit2 <- drinks_longer %>% 
  filter(beverage != "total_litres_of_pure_alcohol") %>% 
  filter(country %in% c("Grenada", "Belarus", "Haiti", "Russian Federation", "St. Lucia")) %>% 
  ggplot(aes(x = country, y = value, fill = beverage)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  labs(x = "Country", y = "Servings", fill = "Beverage", title = "Countries with major consumption of Spirit",
       subtitle = "Relation with other beverages") + 
  scale_fill_discrete(labels = c("Beer", "Spirit", "Wine"))

spirit1 + spirit2

wine1 <- drinks %>% 
  slice_max(wine_servings, n=5) %>% 
  ggplot(aes(reorder(country, -wine_servings), wine_servings)) + 
  geom_bar(stat = "identity", fill = "#44C8C1") + 
  geom_text(aes(label = wine_servings), vjust = -.3) + 
  ylim(0, 400) + 
  labs(x = "Countries", y = "Wine Servings", title = "Consumption of Wine (Top 5)")

most_wine <- drinks %>% 
  slice_max(wine_servings, n=5) 

wine2 <- drinks_longer %>% 
  filter(beverage != "total_litres_of_pure_alcohol") %>% 
  filter(country %in% c("Frande", "Portugal", "Andorra", "Switzerland", "Denmark")) %>% 
  ggplot(aes(x = country, y = value, fill = beverage)) + 
  geom_bar(stat = "identity", position = "dodge") + 
  labs(x = "Country", y = "Servings", fill = "Beverage", title = "Countries with major consumption of Wine",
       subtitle = "Relation with other beverages") + 
  scale_fill_discrete(labels = c("Beer", "Spirit", "Wine"))

wine1 + wine2

drinks %>% 
  filter(total_litres_of_pure_alcohol != 0) %>% 
  select(is.numeric) %>% 
  summary()
 beer_servings   spirit_servings  wine_servings    total_litres_of_pure_alcohol
 Min.   :  0.0   Min.   :  0.00   Min.   :  0.00   Min.   : 0.100              
 1st Qu.: 25.0   1st Qu.: 11.00   1st Qu.:  2.00   1st Qu.: 1.775              
 Median : 80.5   Median : 66.00   Median :  9.50   Median : 4.800              
 Mean   :113.8   Mean   : 86.84   Mean   : 53.02   Mean   : 5.058              
 3rd Qu.:193.2   3rd Qu.:133.00   3rd Qu.: 73.25   3rd Qu.: 7.625              
 Max.   :376.0   Max.   :438.00   Max.   :370.00   Max.   :14.400              
drinks %>% 
  filter(country == "Argentina")
# A tibble: 1 x 5
  country   beer_servings spirit_servings wine_servings total_litres_of_pure_al~
  <chr>             <dbl>           <dbl>         <dbl>                    <dbl>
1 Argentina           193              25           221                      8.3