#loading Packages
library(tidyverse)
library(here)
library(ggbeeswarm)
library(RColorBrewer)
library(ggplot2)
library(dplyr)
library(reshape2)
#set out plotting themes------
theme_set(theme_classic())
#read in the datatable
popincollege<- read.csv( "college-region-population.csv")
# use pop in college data
popincollege%>%
# select 2016 as study year, remove provincial total and gender total.
filter(Year=="2016",Region.Name!="British Columbia",Gender!="T")%>%
#plot use region name and total as bar height and identify male and female.
ggplot(aes(x=Region.Name,y=Total,fill=Gender))+
#add bars
geom_col()+
#view it vertical
coord_flip()+
#add labels
labs(title = "Total Population in College Region (2016)", subtitle = "by Gender",x="College Region", y="Population",caption = "")
Figure 1, Total male and female population in each college region in year 2016.
# use popincollege data
popincollege%>%
# select study year as 2016, remove totals
filter(Year=="2016",Region.Name!="British Columbia",Gender!="T")%>%
#remove some columns, keeps ones below.
select(Region.Name,Gender,X0:X90.)%>%
#pivot table by name and gender
melt(idr=C("Region.Name","Gender"))%>%
#rename columns after pivot.
rename(Age=variable,population=value)%>%
#get male as neigtive, for human pyramid.
mutate(population = ifelse(Gender=="M", population*(-1),
population*1),Age_new=str_replace(Age,"X",""))%>%
#use age and population as varible for plot, and color differently by gender.
ggplot(aes(x = Age_new,y = population, fill=Gender)) +
#add bar
geom_bar(stat = "identity", width = 0.5) +
#view vertical
coord_flip()+
#apply color scheme
scale_fill_brewer(type = "seq",palette = 7)+
#labels
labs(title="Population Pyramid of College Regions",subtitle = "Please note age 90 includes all population above 90!",x="Age", y="Population")
## Using Region.Name, Gender as id variables
Figure 2, Human pyramid for total population in BC.
# use popincollege
popincollege%>%
#select year 2016, and remove totals
filter(Year=="2016",Region.Name!="British Columbia",Gender!="T")%>%
#select two study regions.
filter(Region.Name=="Camosun"|Region.Name=="Okanagan")%>%
#remove some columns by only select a few.
select(Region.Name,Gender,X0:X90.)%>%
#convert to long form.
melt(idr=C("Region.Name","Gender"))%>%
#rename column names
rename(Age=variable,population=value)%>%
#adjust values for graph
mutate(population = ifelse(Gender=="M", population*(-1),
population*1),Age_new=str_replace(Age,"X",""))%>%
#ploting by age and population
ggplot(aes(x = Age_new,y = population, fill=Gender)) +
geom_bar(stat = "identity", width = 0.5) +
coord_flip()+
scale_fill_brewer(type = "seq",palette = 7)+
facet_wrap(~Region.Name)+
labs(title="Population Pyramind of College Regions",subtitle = "Age 90 includes all population above 90!",x="Age", y="Population")
## Using Region.Name, Gender as id variables
Figure 3, Humam pyramid for Camosun and Okanagan college regions.
#save plot into a png file
ggsave("Camosun_Okanagan.png")
## Saving 7 x 5 in image
Compare from figure 2 to figure 3, Camosun college region has a similar age profile to total population than Okanagan college region. It general have age concentration in 20-30 and 60-70 ranges. Okanagan college region has more clean peaks in 60-70 age range. Therefore, I can conclude Okanagan has an aging population than Camosun college region.