Objective
The visualization used in this assignment is about average height of male population in countries in Europe. The data shows average height of males in that particular country on the map. It also differentiates height and categorizes population under 175 cm as “Shorties Gang” and above or equal to 184 cm as “Tall Gang”.
The main problems analyzed in the visualization are:
Too confusing: The data (Avg height in cm) is written across the map (on each country) and some are overlapping other countries. The viewer might get confused between two small countries as the overlap.
Unprofessional : The visualization categorizes data into 2 groups with unprofessional names which is not required.
Poor Choice of Visualization: The best way to visualize these types of data is through color grading the countries on the map as it is easy to compare the difference between the values of different countries.
Unnecessary use of colors: Use of too many colors in one visualization, usage of pink for “shortgang” and black for “tallgang”. The average height of female population is not even shown.
The targeted audience for this visualization is the normal people who can have an idea of the average height of male population in their country in Europe.
Reference
“R/MapPorn - Average Male Height in Europe 2022 (in CM).” Reddit, 2022, www.reddit.com/r/MapPorn/comments/tsvch7/average_male_height_in_europe_2022_in_cm/. Accessed 3 May 2022.
https://www.kaggle.com/datasets/majyhain/height-of-male-and-female-by-country-2022
library(readr)
## Warning: package 'readr' was built under R version 4.0.5
library(stringr)
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.0.5
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
#Importing Dataset
height <- read.csv("C:/Users/nates/Desktop/Data Visualization/Assignment 2/Height of Male and Female by Country 2022.csv", na.strings = "")
#View Dataset
View(height)
#Data Exploration
dim(height)
## [1] 199 6
str(height)
## 'data.frame': 199 obs. of 6 variables:
## $ Rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Country.Name : chr "Netherlands" "Montenegro" "Estonia" "Bosnia and Herzegovina" ...
## $ Male.Height.in.Cm : num 184 183 183 182 182 ...
## $ Female.Height.in.Cm: num 170 170 169 167 169 ...
## $ Male.Height.in.Ft : num 6.03 6.01 6 5.99 5.97 5.97 5.94 5.94 5.94 5.94 ...
## $ Female.Height.in.Ft: num 5.59 5.58 5.53 5.49 5.54 5.56 5.51 5.54 5.48 5.49 ...
height <- subset(height, select = -c(Rank,Male.Height.in.Ft,Female.Height.in.Ft))
names(height)
## [1] "Country.Name" "Male.Height.in.Cm" "Female.Height.in.Cm"
#Rename column names
colnames(height) <- c('region','Height_Male_cm','Height_Female_cm')
names(height)
## [1] "region" "Height_Male_cm" "Height_Female_cm"
length(height)
## [1] 3
summary(height)
## region Height_Male_cm Height_Female_cm
## Length:199 Min. :160.1 Min. :150.9
## Class :character 1st Qu.:169.5 1st Qu.:158.2
## Mode :character Median :173.5 Median :160.6
## Mean :173.1 Mean :160.9
## 3rd Qu.:176.5 3rd Qu.:163.9
## Max. :183.8 Max. :170.4
unique(height$Height_Male)
## [1] 183.78 183.30 182.79 182.47 182.10 181.89 181.19 181.17 181.02 180.98
## [11] 180.76 180.74 180.72 180.69 180.57 180.48 180.46 180.28 180.15 179.72
## [21] 179.48 179.26 179.09 179.04 178.96 178.84 178.77 178.75 178.73 178.70
## [31] 178.69 178.60 178.52 178.46 178.32 178.21 177.82 177.72 177.49 177.19
## [41] 177.09 177.03 176.97 176.94 176.85 176.65 176.59 176.43 176.39 176.36
## [51] 176.35 176.18 176.11 176.06 176.03 175.98 175.90 175.73 175.66 175.62
## [61] 175.59 175.52 175.50 175.11 175.05 175.04 175.02 174.96 174.84 174.83
## [71] 174.76 174.69 174.65 174.57 174.51 174.42 174.40 174.38 174.37 174.32
## [81] 174.17 174.08 174.07 174.04 174.00 173.98 173.84 173.81 173.79 173.71
## [91] 173.67 173.56 173.53 173.50 173.27 173.16 173.01 172.88 172.76 172.75
## [101] 172.23 172.15 172.13 172.07 172.06 171.89 171.85 171.84 171.70 171.66
## [111] 171.64 171.61 171.55 171.30 171.23 171.22 170.94 170.77 170.70 170.69
## [121] 170.67 170.62 170.60 170.52 170.48 170.46 170.37 170.30 170.29 170.26
## [131] 170.19 170.14 170.09 169.91 169.75 169.63 169.59 169.57 169.41 169.20
## [141] 169.01 168.89 168.84 168.74 168.60 168.50 168.46 168.44 168.43 168.36
## [151] 168.29 168.23 168.18 168.17 168.10 168.07 167.92 167.87 167.72 167.62
## [161] 167.34 167.33 167.32 167.05 166.98 166.75 166.70 166.50 166.43 166.31
## [171] 166.26 166.02 165.68 165.54 165.48 165.35 165.26 165.23 165.16 165.08
## [181] 164.42 164.36 164.30 163.10 163.07 162.78 160.13
unique(height$Height_Female)
## [1] 170.36 169.96 168.66 167.47 168.91 169.47 167.96 168.81 167.12 167.20
## [11] 166.62 166.80 168.29 167.63 165.78 166.48 166.45 166.67 166.18 166.89
## [21] 166.11 163.06 165.81 163.40 164.50 163.67 165.53 165.72 164.67 164.73
## [31] 164.33 165.99 166.93 164.49 165.07 167.31 166.52 163.94 164.66 165.30
## [41] 167.03 167.55 165.66 164.32 163.31 161.69 164.52 162.55 165.52 160.88
## [51] 162.56 161.80 161.18 163.92 162.03 166.08 163.38 162.22 163.24 162.47
## [61] 162.41 163.46 162.96 163.23 161.74 161.28 162.35 161.99 160.10 159.46
## [71] 160.62 161.22 161.21 162.26 161.81 163.82 162.95 162.83 161.23 161.56
## [81] 164.58 160.53 162.23 160.36 161.37 164.28 161.40 159.76 158.75 162.78
## [91] 159.85 160.13 160.04 160.70 161.30 160.72 162.06 158.94 159.42 158.29
## [101] 160.55 160.58 159.57 160.41 158.50 161.33 157.96 162.12 158.44 160.20
## [111] 159.38 158.14 163.57 158.78 159.86 160.29 159.82 159.89 156.39 158.84
## [121] 159.52 157.58 158.12 160.05 159.43 159.81 158.86 157.90 158.19 159.13
## [131] 161.04 155.62 160.26 158.58 155.18 159.66 157.82 158.92 157.06 159.65
## [141] 158.43 157.15 158.53 156.30 156.11 158.10 158.40 161.73 160.48 158.65
## [151] 158.05 158.72 155.58 155.88 156.72 154.28 156.54 157.32 155.08 154.77
## [161] 155.25 155.15 156.89 154.39 154.71 157.34 154.90 154.36 156.14 160.06
## [171] 154.75 154.76 154.14 153.06 152.38 152.39 150.91 155.42 156.79 153.10
## [181] 152.71
#Cleaning
any(is.na(height))
## [1] FALSE
#Checking Outliers in columns
boxplot(height$Height_Male,col = "#3399FF")
boxplot(height$Height_Female, col = "pink")
#Data Visualization
mapdata <- map_data("world")
mapdata = merge(x= mapdata,y= height, by="region")
#Selecting countries in Europe
DF <- mapdata[mapdata$region %in% c('Albania','Andorra','Armenia','Austria','Azerbaijan','Belarus','Belgium','Bosnia and Herzegovina','Bulgaria','Croatia','Cyprus','Czechia','Denmark','Estonia','Finland','France','Georgia','Germany','Greece'),]
map1 <- ggplot(DF, aes(x= long, y= lat, group=group)) +
geom_polygon(aes(fill = Height_Male_cm))
map2 <- map1 + scale_fill_gradient(name = " Avg Height - Male", low = "#CCFFFF", high = "#003366", na.value = "grey50")+
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank())
map2 <- map2 + ggtitle(" Average height of Male Population in Europe ")
map2
map3 <- ggplot(DF, aes(x= long, y= lat, group=group)) +
geom_polygon(aes(fill = Height_Female_cm))
map4 <- map3 + scale_fill_gradient(name = "Avg Height - Female", low = "#E5FFCC", high = "#336600", na.value = "grey50")+
theme(axis.text.x = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank())
map4 <- map4 + ggtitle(" Average height of Female Population in Europe ")
map4
bar1 <- ggplot(DF, aes(x = region, y = Height_Male_cm)) +
geom_col(position = "dodge") +
theme(axis.text.x = element_text(angle=75, hjust=1)) +
coord_cartesian(ylim=c(155,185)) +
ggtitle(" Average height of Male Population in Europe ")
bar2 <- ggplot(DF, aes(x = region, y = Height_Female_cm)) +
geom_col(position = "dodge") +
theme(axis.text.x = element_text(angle=75, hjust=1)) +
coord_cartesian(ylim=c(155,185)) +
ggtitle(" Average height of Female Population in Europe ")
map2
map4
bar1
bar2