I went through the sample open data sites that were included in chapter nine open data but could not find data that i could use.I went through other sites and was able to find this data at https://data.london.gov.uk/dataset/jobs-by-age-and-gender and includes a notation that it has a UK Open Government Licence.
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.4 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readr)
jobs_by_age_and_gender <- read_csv("C:/Users/Catherine/Desktop/jobs_by_age_and_gender.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## date = col_character(),
## age = col_character(),
## gender = col_character(),
## all_people = col_double(),
## full_time = col_double(),
## part_time = col_double()
## )
View(jobs_by_age_and_gender)
head(jobs_by_age_and_gender)
## # A tibble: 6 x 6
## date age gender all_people full_time part_time
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Apr 2004-Mar 2005 16-19 All People 86900 35800 51100
## 2 Apr 2004-Mar 2005 16-19 Female 45600 16300 29300
## 3 Apr 2004-Mar 2005 16-19 Male 41300 19400 21800
## 4 Apr 2004-Mar 2005 16-64 All People 3819100 3131700 684300
## 5 Apr 2004-Mar 2005 16-64 Female 1644900 1145300 497400
## 6 Apr 2004-Mar 2005 16-64 Male 2174200 1986300 187000
library(dplyr)
gender_groups <- jobs_by_age_and_gender %>%
filter(gender %in% c("M", "Female"), age == "16-64", str_detect(date,"Jan."))
head(gender_groups)
## # A tibble: 6 x 6
## date age gender all_people full_time part_time
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Jan 2004-Dec 2004 16-64 Female 1641100 1129900 508700
## 2 Jan 2005-Dec 2005 16-64 Female 1672800 1161100 510700
## 3 Jan 2006-Dec 2006 16-64 Female 1701200 1190900 509800
## 4 Jan 2007-Dec 2007 16-64 Female 1718300 1191500 525200
## 5 Jan 2008-Dec 2008 16-64 Female 1778200 1229200 548600
## 6 Jan 2009-Dec 2009 16-64 Female 1797500 1221100 575000
age_groups <- jobs_by_age_and_gender %>%
filter(gender %in% c("Male", "Female"), str_detect(date,"Jan."), str_detect(date,".2019"), age != "16-64") %>%
group_by(gender, age) %>%
mutate(gender = factor(gender)) %>%
arrange(gender)
names(age_groups) <- c("Date", "Age Group", "Gender", "Full-time", "Part-time")
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
head(age_groups)
## # A tibble: 6 x 6
## # Groups: Gender, Age Group [6]
## Date `Age Group` Gender `Full-time` `Part-time` NA
## <chr> <chr> <fct> <dbl> <dbl> <dbl>
## 1 Jan 2019-Dec 2019 16-19 Female 40100 13100 27000
## 2 Jan 2019-Dec 2019 20-24 Female 183500 126600 56900
## 3 Jan 2019-Dec 2019 25-49 Female 1513400 1099600 413400
## 4 Jan 2019-Dec 2019 50+ Female 584800 340800 243100
## 5 Jan 2019-Dec 2019 16-19 Male 30800 9200 21600
## 6 Jan 2019-Dec 2019 20-24 Male 190100 149500 40300
##Plot full time employment trends by gender of the last ten years
library(ggplot2)
# Plot
gender_groups %>% tail(20) %>%
ggplot( aes(x=date, y=full_time, group=gender, color=gender)) +
geom_line() +
ggtitle("Full Time Employment Trends by Gender") +
theme_light() +
ylab("Number Employed") + xlab("Time Period") + theme(axis.text.x = element_text(angle = 40))
##Plot Part-time employment by gender of the last ten years
options(scipen = 999)
gender_groups %>% tail(20) %>%
ggplot( aes(x=date, y=part_time, group=gender, color=gender)) +
geom_line() +
ggtitle("Part-time Employment Trends by Gender") +
theme_light() +
ylab("Number Employed") + xlab ("Time Period") + theme(axis.text.x = element_text(angle = 40))