Call packages and define ggplot theme.
library(data.table);library(tidyverse);library(lubridate);library(viridis);library(ggthemes);library(patchwork)
my_theme<-function(){
theme_clean()+theme(plot.background = element_blank())+
theme(text=element_text(family="Palatino"),
legend.title = element_text(family="Palatino"),
legend.text = element_text(family="Palatino"),
legend.position = "right",
plot.title.position = "plot")
}
Deadlines timing
deadlines<-fread("deadlines.csv")
deadlines<-deadlines%>%
replace(.=="", NA)%>%
mutate(deadline=mdy(deadline),
applied=mdy(applied))
What percentage were in Nov?
nrow(subset(deadlines, !is.na(deadline) & deadline>="2021-11-01" & deadline<="2021-12-01"))/nrow(subset(deadlines, !is.na(deadline)))
[1] 0.6031746
Plot histogram of deadlines
deadlines%>%
ggplot()+
geom_histogram(aes(x=deadline), binwidth=1)+
scale_x_date(breaks = "month", date_labels="%b '%y", limits=c(as.Date("2021-07-30"), as.Date("2022-04-16")))+
labs(x="", y="", caption="Sample: all my job applications that specified a deadline")+
my_theme()+
ggtitle("Number of job application deadlines over time")
ggsave("plots/deadline-histogram.png", dpi=250, width=7, height=4)

Applications timing
What % were submitted in Nov?
nrow(subset(deadlines, !is.na(applied) & applied>="2021-11-01" & applied<="2021-12-01"))/nrow(subset(deadlines, !is.na(applied)))
[1] 0.7862069
Plot histogram of application submissions
deadlines%>%
ggplot()+
geom_histogram(aes(x=applied), binwidth=1)+
scale_x_date(breaks = "month", date_labels="%b '%y", limits=c(as.Date("2021-07-30"), as.Date("2022-04-16")))+
labs(x="", y="")+
my_theme()+
ggtitle("Number of job applications submitted over time")
ggsave("plots/apps-histogram.png", dpi=250, width=7, height=4)

Phases of the market
Compare the timing of: sending applications, doing 1st round interviews, doing flyouts, getting offers
dates<-fread("job-dates.csv")
dates<-dates%>%
replace(.=="", NA)%>%
mutate(date=mdy(date))
dates%>%ggplot()+
geom_density(aes(x=date, fill=type), adjust=3, size=.25)+
scale_fill_viridis_d(option="inferno", alpha=0.7, end = .9,
limits=c("applications", "1st rounds", "flyouts", "offers"), name="JM phase")+
scale_x_date(breaks = "month", date_labels="%b '%y")+
labs(x="", y="")+
my_theme()+
ggtitle("When was each phase of the job market?", subtitle="Density plot of each phase")
ggsave("plots/jm-phase-density.png", dpi=250, width=8, height=4.5)

An alternative way of seeing the timing: cumulative count for each phase
ggplot(dates, aes(x=date,color=type)) +
stat_bin(data=subset(dates, type=="applications"),aes(y=cumsum(..count..)),geom="step", binwidth = 5)+
stat_bin(data=subset(dates, type=="1st rounds"),aes(y=cumsum(..count..)),geom="step", binwidth = 5)+
stat_bin(data=subset(dates, type=="flyouts"),aes(y=cumsum(..count..)),geom="step", binwidth = 5)+
stat_bin(data=subset(dates, type=="offers"),aes(y=cumsum(..count..)),geom="step", binwidth = 5)+
scale_color_viridis_d(option="inferno", alpha=1, end = .9,
limits=c("applications", "1st rounds", "flyouts", "offers"), name="JM phase")+
scale_x_date(breaks = "month", date_labels="%b '%y")+
labs(x="", y="")+ggtitle("Cumulative counts of JM milestones")+
my_theme()
ggsave("plots/phase-step-counts.png", dpi=250, width=7, height=3.5)

Raster plot of job market
I need to take the raw spreadsheet and create a data frame that is 145 jobs x 170 dates with categorical information for each cell. Ie, what is the status of each job at each date? This can then be visualized as a raster plot.
#define dates for each of the 145 jobs i applied to
jpaths<-fread("job-paths.csv")
jpaths<-jpaths%>%
replace(.=="", NA)%>%
mutate(applied=mdy(applied),
invite1=mdy(invite1),
interview1done=mdy(interview1done),
flyoutinvite=mdy(flyoutinvite),
flyoutdone=mdy(flyoutdone),
offer=mdy(offer), accepted=mdy(accepted))%>%
mutate(jobn=row_number())%>%
arrange(applied)%>%mutate(appnum=row_number())
#make repeated set of dates
date<-seq(as.Date("2021-09-01"), as.Date("2022-02-17"), by="days")
df<-as.data.frame(date)
df<-df%>%slice(rep(row_number(), 145))%>%
mutate(row=row_number())%>%
mutate(jobn=ceiling(row/170))%>%select(-row)
# get the full 170 dates x 145 jobs df
df1<-inner_join(df, jpaths)
Joining, by = "jobn"
df1<-df1%>%
mutate(status=ifelse(date<applied, "have not applied yet", "applied"))%>%
mutate(status=ifelse(date>=invite1 & !is.na(invite1), "invited to 1st round interview", status))%>%
mutate(status=ifelse(date>=interview1done & !is.na(interview1done), "1st round interview done", status))%>%
mutate(status=ifelse(date>=flyoutinvite & !is.na(flyoutinvite), "invited to flyout", status))%>%
mutate(status=ifelse(date>=flyoutdone & !is.na(flyoutdone), "flyout done", status))%>%
mutate(status=ifelse(date>=offer & !is.na(offer), "offer made", status))%>%
mutate(status=ifelse(date>=accepted & !is.na(accepted), "offer accepted", status))
Make raster plot
df1%>%
ggplot(aes(x=date, y=appnum)) +
#ggplot(aes(x=date, y=reorder(jobn, applied))) +
geom_raster(aes(fill=factor(status)))+
scale_fill_viridis_d(option="inferno", alpha=1, end = .9,
limits=c("have not applied yet", "applied", "invited to 1st round interview",
"1st round interview done", "invited to flyout", "flyout done", "offer made", "offer accepted"), name="JM phase")+
my_theme()+
theme(axis.text.y=element_text(size=0))+
labs(x="", y="")+
scale_x_date(breaks = "month", date_labels="%b '%y")+
ggtitle('The Job Market')
ggsave("plots/raster-jm.png", dpi=250, width=9, height=5)

Make void raster plot (art-y one)
df1%>%
ggplot(aes(x=date, y=appnum)) +
#ggplot(aes(x=date, y=reorder(jobn, applied))) +
geom_raster(aes(fill=factor(status)))+
scale_fill_viridis_d(option="inferno", alpha=1, end = .9,
limits=c("have not applied yet", "applied", "invited to 1st round interview",
"1st round interview done", "invited to flyout", "flyout done", "offer made", "offer accepted"), name="JM phase")+
theme_void()+theme(legend.position = "none", plot.background = element_blank())+
theme(axis.text.y=element_text(size=0))+
labs(x="", y="")+
scale_x_date(breaks = "month", date_labels="%b '%y")
ggsave("plots/raster-jm-void1.png", dpi=500, width=5, height=5)

---
title: "Job Market Visualizations"
author: Alex Albright
date: "3/21/22"
output: html_notebook
---

Call packages and define ggplot theme.

```{r}
library(data.table);library(tidyverse);library(lubridate);library(viridis);library(ggthemes);library(patchwork)

my_theme<-function(){
  theme_clean()+theme(plot.background = element_blank())+
    theme(text=element_text(family="Palatino"),
        legend.title = element_text(family="Palatino"),
        legend.text = element_text(family="Palatino"),
        legend.position = "right",
        plot.title.position = "plot")
}
```

# Deadlines timing

```{r}
deadlines<-fread("deadlines.csv")
deadlines<-deadlines%>%
  replace(.=="", NA)%>%
  mutate(deadline=mdy(deadline),
         applied=mdy(applied))
```

What percentage were in Nov?

```{r}
nrow(subset(deadlines, !is.na(deadline) & deadline>="2021-11-01" & deadline<="2021-12-01"))/nrow(subset(deadlines, !is.na(deadline)))
```

Plot histogram of deadlines
```{r}
deadlines%>%
  ggplot()+
  geom_histogram(aes(x=deadline), binwidth=1)+
  scale_x_date(breaks = "month", date_labels="%b '%y", limits=c(as.Date("2021-07-30"), as.Date("2022-04-16")))+
  labs(x="", y="", caption="Sample: all my job applications that specified a deadline")+
  my_theme()+
  ggtitle("Number of job application deadlines over time")
ggsave("plots/deadline-histogram.png", dpi=250, width=7, height=4)
```

# Applications timing

What % were submitted in Nov?
```{r}
nrow(subset(deadlines, !is.na(applied) & applied>="2021-11-01" & applied<="2021-12-01"))/nrow(subset(deadlines, !is.na(applied)))
```

Plot histogram of application submissions
```{r}
deadlines%>%
  ggplot()+
  geom_histogram(aes(x=applied), binwidth=1)+
  scale_x_date(breaks = "month", date_labels="%b '%y", limits=c(as.Date("2021-07-30"), as.Date("2022-04-16")))+
  labs(x="", y="")+
  my_theme()+
  ggtitle("Number of job applications submitted over time")
ggsave("plots/apps-histogram.png", dpi=250, width=7, height=4)
```

# Phases of the market

Compare the timing of: sending applications, doing 1st round interviews, doing flyouts, getting offers

```{r}
dates<-fread("job-dates.csv")
dates<-dates%>%
  replace(.=="", NA)%>%
  mutate(date=mdy(date))

dates%>%ggplot()+
  geom_density(aes(x=date, fill=type), adjust=3, size=.25)+
  scale_fill_viridis_d(option="inferno", alpha=0.7, end = .9,
                       limits=c("applications", "1st rounds", "flyouts", "offers"), name="JM phase")+
  scale_x_date(breaks = "month", date_labels="%b '%y")+
  labs(x="", y="")+
  my_theme()+
  ggtitle("When was each phase of the job market?", subtitle="Density plot of each phase")

ggsave("plots/jm-phase-density.png", dpi=250, width=8, height=4.5)
```
An alternative way of seeing the timing: cumulative count for each phase

```{r}
ggplot(dates, aes(x=date,color=type)) +
  stat_bin(data=subset(dates, type=="applications"),aes(y=cumsum(..count..)),geom="step", binwidth = 5)+
  stat_bin(data=subset(dates, type=="1st rounds"),aes(y=cumsum(..count..)),geom="step",  binwidth = 5)+
  stat_bin(data=subset(dates, type=="flyouts"),aes(y=cumsum(..count..)),geom="step",  binwidth = 5)+
  stat_bin(data=subset(dates, type=="offers"),aes(y=cumsum(..count..)),geom="step",  binwidth = 5)+
  scale_color_viridis_d(option="inferno", alpha=1, end = .9,
                       limits=c("applications", "1st rounds", "flyouts", "offers"), name="JM phase")+
  scale_x_date(breaks = "month", date_labels="%b '%y")+
  labs(x="", y="")+ggtitle("Cumulative counts of JM milestones")+
  my_theme()

ggsave("plots/phase-step-counts.png", dpi=250, width=7, height=3.5)
```

# Raster plot of job market

I need to take the raw spreadsheet and create a data frame that is 145 jobs x 170 dates with categorical information for each cell. Ie, what is the status of each job at each date? This can then be visualized as a raster plot.

```{r}
#define dates for each of the 145 jobs i applied to
jpaths<-fread("job-paths.csv")
jpaths<-jpaths%>%
  replace(.=="", NA)%>%
  mutate(applied=mdy(applied),
        invite1=mdy(invite1), 
        interview1done=mdy(interview1done),
        flyoutinvite=mdy(flyoutinvite),
        flyoutdone=mdy(flyoutdone),
        offer=mdy(offer), accepted=mdy(accepted))%>%
  mutate(jobn=row_number())%>%
  arrange(applied)%>%mutate(appnum=row_number())

#make repeated set of dates
date<-seq(as.Date("2021-09-01"), as.Date("2022-02-17"), by="days")
df<-as.data.frame(date)

df<-df%>%slice(rep(row_number(), 145))%>%
  mutate(row=row_number())%>%
  mutate(jobn=ceiling(row/170))%>%select(-row)

# get the full 170 dates x 145 jobs df
df1<-inner_join(df, jpaths)

df1<-df1%>%
  mutate(status=ifelse(date<applied, "have not applied yet", "applied"))%>%
  mutate(status=ifelse(date>=invite1 & !is.na(invite1), "invited to 1st round interview", status))%>%
  mutate(status=ifelse(date>=interview1done & !is.na(interview1done), "1st round interview done", status))%>%
  mutate(status=ifelse(date>=flyoutinvite & !is.na(flyoutinvite), "invited to flyout", status))%>%
  mutate(status=ifelse(date>=flyoutdone & !is.na(flyoutdone), "flyout done", status))%>%
  mutate(status=ifelse(date>=offer & !is.na(offer), "offer made", status))%>%
  mutate(status=ifelse(date>=accepted & !is.na(accepted), "offer accepted", status))
```

Make raster plot

```{r}
df1%>%
  ggplot(aes(x=date, y=appnum)) +
  #ggplot(aes(x=date, y=reorder(jobn, applied))) +
  geom_raster(aes(fill=factor(status)))+
  scale_fill_viridis_d(option="inferno", alpha=1, end = .9,
  limits=c("have not applied yet", "applied", "invited to 1st round interview", 
           "1st round interview done", "invited to flyout", "flyout done", "offer made", "offer accepted"), name="JM phase")+
  my_theme()+
  theme(axis.text.y=element_text(size=0))+
  labs(x="", y="")+
  scale_x_date(breaks = "month", date_labels="%b '%y")+
  ggtitle('The Job Market')
ggsave("plots/raster-jm.png", dpi=250, width=9, height=5)
```

Make void raster plot (art-y one)

```{r}
df1%>%
  ggplot(aes(x=date, y=appnum)) +
  #ggplot(aes(x=date, y=reorder(jobn, applied))) +
  geom_raster(aes(fill=factor(status)))+
  scale_fill_viridis_d(option="inferno", alpha=1, end = .9,
  limits=c("have not applied yet", "applied", "invited to 1st round interview", 
           "1st round interview done", "invited to flyout", "flyout done", "offer made", "offer accepted"), name="JM phase")+
  theme_void()+theme(legend.position = "none", plot.background = element_blank())+
  theme(axis.text.y=element_text(size=0))+
  labs(x="", y="")+
  scale_x_date(breaks = "month", date_labels="%b '%y")
ggsave("plots/raster-jm-void1.png", dpi=500, width=5, height=5)
```

