library(ggplot2)
library(readr)
library(sqldf)
library(formattable)
options(scipen=999)
College <- read_csv("C:/Users/shahi/Desktop/Sanjay/2019-georgetown-study-data.csv")
## Warning: Missing column names filled in: 'X1' [1]
Filtered <- sqldf("
SELECT *
FROM College
WHERE Institution = 'Albright College'
OR Institution = 'Juniata College'
OR Institution = 'Kings College'
OR Institution = 'Lebanon Valley College'
OR Institution = 'Lycoming College'
OR Institution = 'Susquehanna University'
OR Institution = 'York College'
OR Institution = 'Elizabethtown College'
")
colnames(Filtered)[12] <- "Median_debt"
colnames(Filtered)[14] <- 'Median_10yr_Earnings'
colnames(Filtered)[16] <- 'Net_Price'
colnames(Filtered)[22] <- 'TenYearNPV'
colnames(Filtered)[26] <- 'TwentyYearNPV'
colnames(Filtered)[30] <- 'ThirtyYearNPV'
Filtered$Median_debt <- currency(Filtered$Median_debt, digits = 0L)
ggplot(data = Filtered, aes(x=Institution, y= Median_debt, fill = Institution ))+
geom_bar(stat = "identity")+
geom_text(aes(label = Median_debt), position= position_dodge(width=0.9), vjust=-0.25)+
scale_x_discrete(labels = abbreviate)+
labs(title = "Median Debt for College Graduates",
y = "Debt")+
geom_hline(aes(yintercept = mean(Median_debt)))

Filtered$Median_10yr_Earnings <- currency(Filtered$Median_10yr_Earnings, digits = 0L)
ggplot(data = Filtered, aes(x=Institution, y= Median_10yr_Earnings, fill = Institution ))+
geom_bar(stat = "identity")+
geom_text(aes(label = Median_10yr_Earnings), position= position_dodge(width=0.9), vjust=-0.25)+
scale_x_discrete(labels = abbreviate)+
labs(title = "Median 10 Year Income for College Graduates",
y = "Income")+
geom_hline(aes(yintercept = mean(Median_10yr_Earnings)))

Filtered$Net_Price <- currency(Filtered$Net_Price, digits = 0L)
ggplot(data = Filtered, aes(x=Institution, y= Net_Price, fill = Institution ))+
geom_bar(stat = "identity")+
geom_text(aes(label = Net_Price), position= position_dodge(width=0.9), vjust=-0.25)+
scale_x_discrete(labels = abbreviate)+
labs(title = "Net Price",
y = "Cost")+
geom_hline(aes(yintercept = mean(Net_Price)))

Filtered$TenYearNPV <- currency(Filtered$TenYearNPV, digits = 0L)
ggplot(data = Filtered, aes(x=Institution, y= TenYearNPV, fill = Institution ))+
geom_bar(stat = "identity")+
geom_text(aes(label = TenYearNPV), position= position_dodge(width=0.9), vjust=-0.25)+
scale_x_discrete(labels = abbreviate)+
labs(title = 'Net Present Value',
subtitle = "Based on 10-year earnings",
y = " ") +
geom_hline(aes(yintercept = mean(TenYearNPV)))

Filtered$TwentyYearNPV <- currency(Filtered$TwentyYearNPV, digits = 0L)
Filtered$TwentyYearNPV_AVG <- mean(Filtered$TwentyYearNPV)
ggplot(data = Filtered, aes(x=Institution, y= TwentyYearNPV, fill = Institution ))+
geom_bar(stat = "identity")+
geom_text(aes(label = TwentyYearNPV), position= position_dodge(width=0.9), vjust=-0.25)+
scale_x_discrete(labels = abbreviate)+
labs(title = 'Net Present Value',
subtitle = "Based on 20-year earnings",
y = " ") +
geom_hline(aes(yintercept = mean(TwentyYearNPV)))

Filtered$ThirtyYearNPV <- currency(Filtered$ThirtyYearNPV, digits = 0L)
ggplot(data = Filtered, aes(x=Institution, y= ThirtyYearNPV, fill = Institution ))+
geom_bar(stat = "identity")+
geom_text(aes(label = ThirtyYearNPV), position= position_dodge(width=0.9), vjust=-0.25)+
scale_x_discrete(labels = abbreviate)+
labs(title = 'Net Present Value',
subtitle = "Based on 30-year earnings",
y = " ")+
geom_hline(aes(yintercept = mean(ThirtyYearNPV)))
