Data Visualization Assignment

R Markdown

Importing libraries and read data

#EDA

head(df_all)

##   ï..country_name year life_ladder log_GDP_per_capita social_support
## 1     Afghanistan 2008       3.724              7.370          0.451
## 2     Afghanistan 2009       4.402              7.540          0.552
## 3     Afghanistan 2010       4.758              7.647          0.539
## 4     Afghanistan 2011       3.832              7.620          0.521
## 5     Afghanistan 2012       3.783              7.705          0.521
## 6     Afghanistan 2013       3.572              7.725          0.484
##   healthy_life_expectancy_at_birth freedom_to_make_life_choices generosity
## 1                            50.80                        0.718      0.168
## 2                            51.20                        0.679      0.190
## 3                            51.60                        0.600      0.121
## 4                            51.92                        0.496      0.162
## 5                            52.24                        0.531      0.236
## 6                            52.56                        0.578      0.061
##   perceptions_of_corruption positive_affect negative_affect
## 1                     0.882           0.518           0.258
## 2                     0.850           0.584           0.237
## 3                     0.707           0.618           0.275
## 4                     0.731           0.611           0.267
## 5                     0.776           0.710           0.268
## 6                     0.823           0.621           0.273

head(df_2021)

##   ï..country_name regional_indicator ladder_score
## 1         Finland     Western Europe        7.842
## 2         Denmark     Western Europe        7.620
## 3     Switzerland     Western Europe        7.571
## 4         Iceland     Western Europe        7.554
## 5     Netherlands     Western Europe        7.464
## 6          Norway     Western Europe        7.392
##   standard_error_of_ladder_score upperwhisker lowerwhisker
## 1                          0.032        7.904        7.780
## 2                          0.035        7.687        7.552
## 3                          0.036        7.643        7.500
## 4                          0.059        7.670        7.438
## 5                          0.027        7.518        7.410
## 6                          0.035        7.462        7.323
##   logged_GDP_per_capita social_support healthy_life_expectancy
## 1                10.775          0.954                    72.0
## 2                10.933          0.954                    72.7
## 3                11.117          0.942                    74.4
## 4                10.878          0.983                    73.0
## 5                10.932          0.942                    72.4
## 6                11.053          0.954                    73.3
##   freedom_to_make_life_choices generosity perceptions_of_corruption
## 1                        0.949     -0.098                     0.186
## 2                        0.946      0.030                     0.179
## 3                        0.919      0.025                     0.292
## 4                        0.955      0.160                     0.673
## 5                        0.913      0.175                     0.338
## 6                        0.960      0.093                     0.270
##   ladder_score_in_dystopia explained_by_Log_GDP_per_capita
## 1                     2.43                           1.446
## 2                     2.43                           1.502
## 3                     2.43                           1.566
## 4                     2.43                           1.482
## 5                     2.43                           1.501
## 6                     2.43                           1.543
##   explained_by_social_support explained_by_healthy_life_expectancy
## 1                       1.106                                0.741
## 2                       1.108                                0.763
## 3                       1.079                                0.816
## 4                       1.172                                0.772
## 5                       1.079                                0.753
## 6                       1.108                                0.782
##   explained_by_freedom_to_make_life_choices explained_by_generosity
## 1                                     0.691                   0.124
## 2                                     0.686                   0.208
## 3                                     0.653                   0.204
## 4                                     0.698                   0.293
## 5                                     0.647                   0.302
## 6                                     0.703                   0.249
##   explained_by_perceptions_of_corruption dystopia_residual
## 1                                  0.481             3.253
## 2                                  0.485             2.868
## 3                                  0.413             2.839
## 4                                  0.170             2.967
## 5                                  0.384             2.798
## 6                                  0.427             2.580

#Univariat analysis

vis_dat(df_2021)

vis_dat(df_all)

#Checking missing values

#gg_miss_var(df_2021)
gg_miss_var(df_2021) + labs(y = "Checking for the missing ones")

gg_miss_var(df_all) + labs(y = "Checking for the missing ones")

#checking summary

summary(df_2021)

##  ï..country_name    regional_indicator  ladder_score  
##  Length:149         Length:149         Min.   :2.523  
##  Class :character   Class :character   1st Qu.:4.852  
##  Mode  :character   Mode  :character   Median :5.534  
##                                        Mean   :5.533  
##                                        3rd Qu.:6.255  
##                                        Max.   :7.842  
##  standard_error_of_ladder_score  upperwhisker    lowerwhisker  
##  Min.   :0.02600                Min.   :2.596   Min.   :2.449  
##  1st Qu.:0.04300                1st Qu.:4.991   1st Qu.:4.706  
##  Median :0.05400                Median :5.625   Median :5.413  
##  Mean   :0.05875                Mean   :5.648   Mean   :5.418  
##  3rd Qu.:0.07000                3rd Qu.:6.344   3rd Qu.:6.128  
##  Max.   :0.17300                Max.   :7.904   Max.   :7.780  
##  logged_GDP_per_capita social_support   healthy_life_expectancy
##  Min.   : 6.635        Min.   :0.4630   Min.   :48.48          
##  1st Qu.: 8.541        1st Qu.:0.7500   1st Qu.:59.80          
##  Median : 9.569        Median :0.8320   Median :66.60          
##  Mean   : 9.432        Mean   :0.8147   Mean   :64.99          
##  3rd Qu.:10.421        3rd Qu.:0.9050   3rd Qu.:69.60          
##  Max.   :11.647        Max.   :0.9830   Max.   :76.95          
##  freedom_to_make_life_choices   generosity       perceptions_of_corruption
##  Min.   :0.3820               Min.   :-0.28800   Min.   :0.0820           
##  1st Qu.:0.7180               1st Qu.:-0.12600   1st Qu.:0.6670           
##  Median :0.8040               Median :-0.03600   Median :0.7810           
##  Mean   :0.7916               Mean   :-0.01513   Mean   :0.7274           
##  3rd Qu.:0.8770               3rd Qu.: 0.07900   3rd Qu.:0.8450           
##  Max.   :0.9700               Max.   : 0.54200   Max.   :0.9390           
##  ladder_score_in_dystopia explained_by_Log_GDP_per_capita
##  Min.   :2.43             Min.   :0.0000                 
##  1st Qu.:2.43             1st Qu.:0.6660                 
##  Median :2.43             Median :1.0250                 
##  Mean   :2.43             Mean   :0.9772                 
##  3rd Qu.:2.43             3rd Qu.:1.3230                 
##  Max.   :2.43             Max.   :1.7510                 
##  explained_by_social_support explained_by_healthy_life_expectancy
##  Min.   :0.0000              Min.   :0.0000                      
##  1st Qu.:0.6470              1st Qu.:0.3570                      
##  Median :0.8320              Median :0.5710                      
##  Mean   :0.7933              Mean   :0.5202                      
##  3rd Qu.:0.9960              3rd Qu.:0.6650                      
##  Max.   :1.1720              Max.   :0.8970                      
##  explained_by_freedom_to_make_life_choices explained_by_generosity
##  Min.   :0.0000                            Min.   :0.000          
##  1st Qu.:0.4090                            1st Qu.:0.105          
##  Median :0.5140                            Median :0.164          
##  Mean   :0.4987                            Mean   :0.178          
##  3rd Qu.:0.6030                            3rd Qu.:0.239          
##  Max.   :0.7160                            Max.   :0.541          
##  explained_by_perceptions_of_corruption dystopia_residual
##  Min.   :0.0000                         Min.   :0.648    
##  1st Qu.:0.0600                         1st Qu.:2.138    
##  Median :0.1010                         Median :2.509    
##  Mean   :0.1351                         Mean   :2.430    
##  3rd Qu.:0.1740                         3rd Qu.:2.794    
##  Max.   :0.5470                         Max.   :3.482

summary(df_all)

##  ï..country_name         year       life_ladder    log_GDP_per_capita
##  Length:1949        Min.   :2005   Min.   :2.375   Min.   : 6.635    
##  Class :character   1st Qu.:2010   1st Qu.:4.640   1st Qu.: 8.464    
##  Mode  :character   Median :2013   Median :5.386   Median : 9.460    
##                     Mean   :2013   Mean   :5.467   Mean   : 9.368    
##                     3rd Qu.:2017   3rd Qu.:6.283   3rd Qu.:10.353    
##                     Max.   :2020   Max.   :8.019   Max.   :11.648    
##                                                    NA's   :36        
##  social_support   healthy_life_expectancy_at_birth freedom_to_make_life_choices
##  Min.   :0.2900   Min.   :32.30                    Min.   :0.2580              
##  1st Qu.:0.7498   1st Qu.:58.69                    1st Qu.:0.6470              
##  Median :0.8355   Median :65.20                    Median :0.7630              
##  Mean   :0.8126   Mean   :63.36                    Mean   :0.7426              
##  3rd Qu.:0.9050   3rd Qu.:68.59                    3rd Qu.:0.8560              
##  Max.   :0.9870   Max.   :77.10                    Max.   :0.9850              
##  NA's   :13       NA's   :55                       NA's   :32                  
##    generosity      perceptions_of_corruption positive_affect  negative_affect 
##  Min.   :-0.3350   Min.   :0.0350            Min.   :0.3220   Min.   :0.0830  
##  1st Qu.:-0.1130   1st Qu.:0.6900            1st Qu.:0.6255   1st Qu.:0.2060  
##  Median :-0.0255   Median :0.8020            Median :0.7220   Median :0.2580  
##  Mean   : 0.0001   Mean   :0.7471            Mean   :0.7100   Mean   :0.2685  
##  3rd Qu.: 0.0910   3rd Qu.:0.8720            3rd Qu.:0.7990   3rd Qu.:0.3200  
##  Max.   : 0.6980   Max.   :0.9830            Max.   :0.9440   Max.   :0.7050  
##  NA's   :89        NA's   :110               NA's   :22       NA's   :16

#top 10 happiest countries in 2021

# dimensions
dimensions <- c('ladder_score',
                'logged_GDP_per_capita',
                'social_support',
                'healthy_life_expectancy',
                'freedom_to_make_life_choices',
                'generosity',
                'perceptions_of_corruption')

# map country to regions
country_region_dict = df_2021 %>% 
  select(country = ï..country_name, region = regional_indicator) %>% unique()

df_2021_long <- df_2021 %>% 
  select(country = ï..country_name, all_of(dimensions)) %>%
  mutate(absence_of_corruption = 1- perceptions_of_corruption) %>%
  pivot_longer(cols = c(all_of(dimensions),'absence_of_corruption'),
               names_to = 'dimension', values_to = 'score') %>%
  filter(dimension != "perceptions_of_corruption")

df_2021_tranformed <- df_2021_long %>%
  group_by(dimension) %>%
  mutate(min_value = min(score),
         max_value = max(score)) %>%
  mutate(score_pct = (score-min_value)/(max_value-min_value)) %>%
  ungroup()

# getting top 10
df_2021_top10 <- df_2021_tranformed %>%
  filter(dimension == "ladder_score") %>%
  slice_max(score, n = 10) %>%
  mutate(cat = 'top_10', 
         country_rank = rank(-score),
         country_label = paste0(country, ' (', country_rank, ')'))

#plotting top 10 countries

ggplot(df_2021_top10, aes(x = reorder(country_label, score))) + 
  geom_chicklet(aes(y = 10, fill = 4.9), width = 0.5, radius = grid::unit(10, "pt")) +
  geom_chicklet(aes(y = score, fill = score), width = 0.5, radius = grid::unit(10, "pt")) +
  geom_text(aes(y = score), label = round(df_2021_top10$score,2), nudge_y = 0.4, size = 3) + 
  scale_y_continuous(expand = c(0, 0.1), position = "right", limits = c(0, 10)) +
  scale_fill_gradient2(low = 'black', high = '#818aeb', mid = 'white', midpoint = 5) + 
  coord_flip() +
  labs(y="Best possible life = 10", x = '',
       title="Top 10 Happiest Countries in 2021",
       subtitle="Happiest countries in Europe",
       caption="Source: The World Happiness Report 2021") + 
  theme_ipsum(grid = '')  +
  theme(plot.title = element_text(size=15),
        plot.subtitle = element_text(size = 12),
        plot.caption = element_text(size = 10),
        axis.title.x = element_text(size= 10, color = '#555955'),
        axis.text.y = element_text(size = 10, color = 'black'),
        axis.text.x = element_blank(),
        legend.position = 'None')

#Getting bottom 10

# getting bottom 10
df_2021_bottom10 <- df_2021_tranformed %>%
  filter(dimension == "ladder_score") %>%
  mutate(country_rank = rank(score),
         country_label = paste0(country, ' (', country_rank, ')')) %>%
  slice_min(score, n = 10) %>%
  mutate(cat = 'bottom_10')

#Plotting bottom 10

ggplot(df_2021_bottom10, aes(x = reorder(country_label, score))) + 
  geom_chicklet(aes(y = 10, fill = 4.9), width = 0.5, radius = grid::unit(10, "pt")) +
  geom_chicklet(aes(y = score, fill = score), width = 0.5, radius = grid::unit(10, "pt")) +
  geom_text(aes(y = score), label = round(df_2021_bottom10$score,2), nudge_y = 0.4, size = 3) + 
  scale_y_continuous(expand = c(0, 0.1), position = "right", limits = c(0, 10)) +
  scale_fill_gradient2(low = '#074040', high = '#4cc2c2', mid = 'white', midpoint = 5) + 
  coord_flip() +
  labs(y="Best possible life = 10", x = '',
       title="Top 10 Saddest Countries in the World",
       subtitle="Countries struck by poverty and war",
       caption="Source: The World Happiness Report 2021") + 
  theme_ipsum(grid = '') +
  theme(plot.title = element_text(size=15),
        plot.subtitle = element_text(size = 12),
        plot.caption = element_text(size = 10),
        axis.title.x = element_text(size= 10, color = '#555955'),
        axis.text.y = element_text(size = 10, color = 'black'),
        axis.text.x = element_blank(),
        legend.position = 'None')

# happiness trend in 2019 (insights wrt covid19)

df_2019_2020 <- df_all %>% 
  filter(year >= 2019) %>%
  left_join(country_region_dict, by = c('ï..country_name' = 'country')) %>%
  select(country = ï..country_name, region, year, ladder = life_ladder)  %>%
  pivot_wider(names_from = 'year', names_prefix = 'year', values_from = 'ladder') %>%
  filter(!is.na(year2019) & !is.na(year2020)) %>%
  group_by(region) %>%
  summarize(happiness_2019 = mean(year2019, na.rm = TRUE),
            happiness_2020 = mean(year2020, na.rm = TRUE)) %>%
  mutate(diff = happiness_2020-happiness_2019) %>%
  arrange(diff) %>%
  mutate(region = factor(region, levels = region))

#plotting happiness levels during covid19

ggplot() + 
  geom_dumbbell(data = df_2019_2020 %>% filter(diff >0),
                aes(y=region, x=happiness_2019, xend=happiness_2020),
                size=1.5, color="#7FB185", 
                colour_xend = "#7FB185", colour_x = "#7FB185",
                size_x = 2.5, size_xend = 5,
                dot_guide=TRUE, dot_guide_size=0.5) +
  geom_dumbbell(data = df_2019_2020 %>% filter(diff <0),
                aes(y=region, x=happiness_2019, xend=happiness_2020),
                size=1.5, color="#edae52", 
                colour_xend = "#edae52", colour_x = "#edae52",
                size_x = 2.5, size_xend = 5,
                dot_guide=TRUE, dot_guide_size=0.5) +
  scale_y_discrete(limits = levels(df_2019_2020$region), expand=c(0.075,1)) +
  labs(x='', y=NULL,
       title="Happiness in pre to amidst Covid",
       subtitle = 'Regions see increases in happiness, despite Covid',
       caption= 'Source: World Happiness Report (2021)') +
  geom_rect(data=df_2019_2020,
            aes(xmin=7.35, xmax=7.65, ymin=-Inf, ymax=Inf),
            fill="#e3e2e1") +
  geom_text(data=df_2019_2020 %>% filter(region == 'South Asia'),
            aes(x=happiness_2020, y=region, label= "2020"),
            color="gray15", size=3, vjust=-1.5) +
  geom_text(data=df_2019_2020 %>% filter(region == 'South Asia'),
            aes(x=happiness_2019, y=region, label= "2019"),
            color="gray15", size=3, vjust=-1.5) +
  geom_text(data=df_2019_2020 %>% filter(diff>0),
            aes(x=happiness_2020 , y=region, label=round(happiness_2020,2)),
            size=3, hjust=-0.5) +
  geom_text(data=df_2019_2020 %>% filter(diff>0),
            aes(x=happiness_2019 , y=region, label=round(happiness_2019,2)),
            color="gray15", size=3, hjust=1.3) +
  geom_text(data=df_2019_2020 %>% filter(diff<0),
            aes(x=happiness_2020 , y=region,
                label=round(happiness_2020,2)),size=3, hjust=1.5) +
  geom_text(data=df_2019_2020 %>% filter(diff<0),
            aes(x=happiness_2019 , y=region,
                label=round(happiness_2019,2)),
            color="gray15", size=3, hjust=-0.3) +
  geom_text(data=df_2019_2020 %>%
              filter(region == 'South Asia'),
            aes(x=7.5, y=region, label="DIFF"),
            size=3, vjust=-1.5, fontface="bold") +
  geom_text(data=df_2019_2020, aes(label=round(diff,2),
                                   y=region, x=7.5), size=3) + 
  theme_ipsum(grid="") +
  theme(plot.title = element_text(size=15),
        plot.subtitle = element_text(size = 12),
        plot.caption = element_text(size = 10),
        axis.title.x = element_text(size= 10, color = '#3a403a'),
        axis.text.y = element_text(size = 10, color = 'black'),
        axis.text.x = element_blank(),
        legend.position = 'left')

#corr mat
df_cor <- df_2021 %>% 
  select(corruption = perceptions_of_corruption,
         generosity = generosity,
         freedom = freedom_to_make_life_choices, 
         life_expectancy = healthy_life_expectancy, 
         social_support = social_support,
         GDP_per_capita = logged_GDP_per_capita, 
         happiness = ladder_score
  )

corr_matrix <- cor(df_cor)
corrplot(corr_matrix,
         method = 'number',
         type = "lower")

corr <- cor(df_cor)
plot_ly(colors = "RdBu") %>%
  add_heatmap(x = rownames(corr), y = colnames(corr), z = corr) %>%
  colorbar(limits = c(-1, 1))

Data Visualization Assignment

Farzana Patel

14/05/2021

R Markdown

Including Plots