library(ggtext)
## Warning: package 'ggtext' was built under R version 4.0.5
library(tidyselect)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(gridtext)
## Warning: package 'gridtext' was built under R version 4.0.5
data <- read_csv("grossperheadUK.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## region = col_character(),
## `2006` = col_double(),
## `2007` = col_double(),
## `2008` = col_double(),
## `2009` = col_double(),
## `2010` = col_double()
## )
data
## # A tibble: 13 x 6
## region `2006` `2007` `2008` `2009` `2010`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 North Est 14901 15530 15673 15304 15744
## 2 North West 16382 17165 17344 16884 17381
## 3 Yorkshire and The Humber 16227 16900 17012 16512 16917
## 4 East Midlands 17013 17806 17952 17519 18090
## 5 West Midlands 16365 17098 17143 16602 17060
## 6 East of England 18514 19337 19294 18536 18966
## 7 London 31714 33721 34964 34779 35026
## 8 South East 20472 21593 21859 21257 21924
## 9 South West 17576 18383 18606 18184 18669
## 10 Wales 14407 14042 15122 14664 14145
## 11 Scotland 18484 19492 19991 19755 20220
## 12 Northen Ireland 15359 16013 15928 15249 15651
## 13 United Kingdom 19542 20539 20911 20341 20849
df <- data %>%
pivot_longer(cols = ! region,
names_to = "year",
values_to = "value") %>%
mutate(region = as.factor(region),
year= as.numeric(year))
ggplot() +
geom_line(data = df,
aes(x = year, y= value, color = region)) +
ylim(c(0, 40000))
Problem : The relative position of each area appears to change only mrginally over time.
We should look at the last year of data and examen each area relative to the overall average for the United Kingdom.
df2010 <- df %>%
filter(year == 2010) %>%
mutate(UK100 = round(value/20849*100,0))
df2010
## # A tibble: 13 x 4
## region year value UK100
## <fct> <dbl> <dbl> <dbl>
## 1 North Est 2010 15744 76
## 2 North West 2010 17381 83
## 3 Yorkshire and The Humber 2010 16917 81
## 4 East Midlands 2010 18090 87
## 5 West Midlands 2010 17060 82
## 6 East of England 2010 18966 91
## 7 London 2010 35026 168
## 8 South East 2010 21924 105
## 9 South West 2010 18669 90
## 10 Wales 2010 14145 68
## 11 Scotland 2010 20220 97
## 12 Northen Ireland 2010 15651 75
## 13 United Kingdom 2010 20849 100
ggplot() +
geom_bar(data = filter(df2010, region != "United Kingdom"),
stat = "identity",
aes(x = reorder(region, value), y = UK100),
fill = "#215968") +
geom_hline(yintercept = 100, color ="red") +
scale_y_continuous(limits = c(0, 200),
breaks = seq(0, 200, by = 25)) +
# annotate("text", x = 13, y = 100, label = "United Kingdom Average", color = "red", hjust = 0.5) +
geom_label(aes(x = 1, y = 100, label = 'United Kingdom Average = 100'),
fill = '#dddddd', lineheight = 2, hjust = -0.05, color= "red") +
labs(title = "Relative Gross Values Added per head, 2010\n",
y= "United Kingdom = 100")+
coord_flip() +
theme(plot.title = element_markdown(size=18, hjust =5.5, lineheight = 6),
plot.subtitle = element_markdown(size=12,face="bold", color="#777B7E"),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(color ="#777B7E", face="bold", size = 12),
axis.title.x = element_markdown(hjust = 0,size = 12),
axis.text.x = element_text(color ="#777B7E", face="bold", size = 12),
axis.line.x = element_line(color="grey", size = 1),
axis.ticks.x = element_line(color="#a9a9a9"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin = unit(c(0.5,0,0.5,0.5), "cm")) # margin(t = 2, r = 0, b = 0, l = 0, unit = "pt"))
library(gapminder)
df <- gapminder
df %>%
filter(country %in% c("Canada", "Mexico","United States"),
year >= 1980 ) %>%
select(country, year, pop)
## # A tibble: 18 x 3
## country year pop
## <fct> <int> <int>
## 1 Canada 1982 25201900
## 2 Canada 1987 26549700
## 3 Canada 1992 28523502
## 4 Canada 1997 30305843
## 5 Canada 2002 31902268
## 6 Canada 2007 33390141
## 7 Mexico 1982 71640904
## 8 Mexico 1987 80122492
## 9 Mexico 1992 88111030
## 10 Mexico 1997 95895146
## 11 Mexico 2002 102479927
## 12 Mexico 2007 108700891
## 13 United States 1982 232187835
## 14 United States 1987 242803533
## 15 United States 1992 256894189
## 16 United States 1997 272911760
## 17 United States 2002 287675526
## 18 United States 2007 301139947
df %>%
filter(country %in% c("Canada", "Mexico","United States",
"Norway","Denmark","Sweden"),
year >= 1987 ) %>%
select(country, year, pop) %>%
mutate(Year = factor(year)) %>%
ggplot() +
geom_line(aes(x= Year, y = pop, group = country, color = country), size= 1.5)