#constants----------------------letters <-"[:alpha:]"#functions---------------------------apply_props <-function(tbbl, val){ tbbl|>mutate(count=prop*val)}#compare current and previous pssm in terms of occupation proportions.noc_mapping <-read_csv(here("data","noc_mapping.csv"))|>clean_names()|>select(noc_2016=noc_2016_v1_3_code, noc_2021=noc_2021_v1_0_code)old_noc_props <-read_excel(here("data","B.C. Post-Secondary Supply Model 2019-20 to 2030-31 2021-09-21 Internal Use.xlsx"),sheet ="Occupation Projections",na ="N")|>filter(`Region Name`=="British Columbia",`Age Group`=="17 to 29" )|>pivot_longer(cols=starts_with("2"))|>clean_names()|>mutate(noc_2016=as.numeric(noc_2016))|>full_join(noc_mapping)|>group_by(noc_2021)|>summarize(old_value=sum(value, na.rm =TRUE))|>ungroup()|>mutate(old_prop=old_value/sum(old_value))new_noc_props <-read_excel(here("data",#"draft_internal_use_PSSM_2023-24_to_2034-35_20241002.xlsx"),"draft_internal_use_PSSM_2023-24_to_2034-35_20241018.xlsx"),sheet ="Occupation Projections",na ="N",skip=1)|>filter(`Region Name`=="British Columbia",`Age Group`=="17 to 29")|>pivot_longer(cols=starts_with("2"))|>clean_names()|>group_by(noc_2021, occupation_description)|>summarize(new_value=sum(value, na.rm =TRUE))|>ungroup()|>mutate(new_prop=new_value/sum(new_value),noc_2021=as.numeric(noc_2021))noc_props <-inner_join(new_noc_props, old_noc_props, by="noc_2021")new_vs_old_plt <-ggplot(noc_props, aes(old_prop, new_prop,text=paste0("Occupation: ", occupation_description,"\n Previous Proportion= ", scales::percent(old_prop, accuracy = .001),"\n Current Proportion= ", scales::percent(new_prop, accuracy = .001) ) ) )+geom_abline(slope =1, intercept =0, col="white", lwd=2)+geom_point(alpha=.25)+scale_x_continuous(trans="log10", labels = scales::percent)+scale_y_continuous(trans="log10", labels=scales::percent)+labs(x="Proportion of BC Grads in Occupation: Previous PSSM",y="Proportion of BC Grads in Occupation: Current PSSM",title="Comparison of PSSM BC graduate proportions by Occupation" )#compare with props based on bc grad counts (by cip) and canada cip-noc table.#read in the data-------------------------------------cip_counts_col_names <-unlist(as.vector(read_csv(here("data","cip_counts_bc.csv"), skip=12, n_max =1, col_names =FALSE)))cip_counts <-read_csv(here("data","cip_counts_bc.csv"), skip=14, na ="..", col_names = cip_counts_col_names)|>mutate(CIP=ex_between(`Field of study 5`,"[","]"),CIP=str_remove_all(CIP, letters),CIP=str_pad(CIP, width=5, side ="left", pad ="0"),field_of_study=word(`Field of study 5`, sep="\\["), .before =everything())|>select(-`Field of study 5`)|>pivot_longer(cols =starts_with("2"))|>group_by(CIP, field_of_study)|>summarise(mean_grads=mean(value, na.rm =TRUE))cip_noc_long <- vroom::vroom(here("data","cip_2_noc_canada.csv"), skip =13)[-1,]colnames(cip_noc_long)[1] <-"field_of_study"cip_noc_long <- cip_noc_long|> janitor::remove_empty("cols")|>pivot_longer(cols=-field_of_study, names_to ="noc", values_to ="count")|>mutate(count=as.numeric(str_remove_all(count,",")),CIP=str_sub(field_of_study, 1, 5),field_of_study=str_sub(field_of_study, 7))richs_supply <- cip_noc_long|>group_by(CIP, field_of_study)|>mutate(prop=count/sum(count, na.rm =TRUE))|>select(-count)|>nest()|>inner_join(cip_counts, by="CIP")|>mutate(data=map2(data, mean_grads, apply_props))|>select(data)|>unnest(data)|>group_by(noc)|>summarize(supply_by_occupation=sum(count, na.rm =TRUE))|>ungroup()|>mutate(richs_prop=supply_by_occupation/sum(supply_by_occupation, na.rm =TRUE),noc_2021=as.numeric(str_sub(noc, 1,5)) )rich_vs_new <-inner_join(richs_supply, new_noc_props)rich_vs_new_plt <-ggplot(rich_vs_new, aes(richs_prop, new_prop,text=paste0("Occupation: ", occupation_description,"\n Rich's Proportion= ", scales::percent(richs_prop, accuracy = .001),"\n PSSM Proportion= ", scales::percent(new_prop, accuracy = .001) )))+geom_abline(slope =1, intercept =0, col="white", lwd=2)+geom_point(alpha=.25)+scale_x_continuous(trans="log10", labels = scales::percent)+scale_y_continuous(trans="log10", labels=scales::percent)+labs(x="Proportion of BC Grads in Occupation: Rich",y="Proportion of BC Grads in Occupation: Current PSSM",title="Comparison of PSSM vs. applying CIP/NOC proportions to mean CIP Counts" )#Compare with last version of PSSM-------------------------------------last_version_noc_props <-read_excel(here("data","draft_internal_use_PSSM_2023-24_to_2034-35_20241002.xlsx"),sheet ="Occupation Projections",na ="N",skip=1)|>filter(`Region Name`=="British Columbia",`Age Group`=="17 to 29")|>pivot_longer(cols=starts_with("2"))|>clean_names()|>group_by(noc_2021, occupation_description)|>summarize(last_value=sum(value, na.rm =TRUE))|>ungroup()|>mutate(last_prop=last_value/sum(last_value),noc_2021=as.numeric(noc_2021))version_compare <-inner_join(new_noc_props, last_version_noc_props)options(scipen =999)version_compare_plt <-ggplot(version_compare, aes(new_value, last_value,text=paste0("Occupation: ", occupation_description,"\n Last Value= ", scales::comma(last_value),"\n New Value= ", scales::comma(new_value) )))+geom_abline(slope =1, intercept =0, col="white", lwd=2)+geom_jitter(alpha=.25, width=.05, height=.05)+scale_x_continuous(trans="log10")+scale_y_continuous(trans="log10")+labs(x="BC Grads in Occupation: New Version",y="BC Grads in Occupation: Last version",title="Comparison of 2 versions of current PSSM" )
Comparison with previous PSSM
Here we compare the by occupation grad proportions for the new vs the old PSSM (for Age Group==“17 to 29”)
Show the code
ggplotly(new_vs_old_plt, tooltip="text")
Comparison with my method
My method is to take the historic mean BC grad counts by CIP (Table: 37-10-0183-01), and apply proportions from Canada’s CIP-NOC Table: 98-10-0403-01 (for ages 15-24) to form a prediction of where (i.e. NOCs) the grads will end up working.