library(ggplot2)
library(readr)
library(sqldf)
library(formattable)

options(scipen=999)

College <- read_csv("C:/Users/shahi/Desktop/Sanjay/2019-georgetown-study-data.csv")
## Warning: Missing column names filled in: 'X1' [1]
Filtered <- sqldf("
SELECT *
FROM College
WHERE Institution = 'Albright College'
OR Institution = 'Juniata College'
OR Institution = 'Kings College' 
OR Institution = 'Lebanon Valley College'
OR Institution = 'Lycoming College'
OR Institution = 'Susquehanna University'
OR Institution = 'York College'
OR Institution = 'Elizabethtown College'
      
      ")

colnames(Filtered)[12] <- "Median_debt"
colnames(Filtered)[14] <- 'Median_10yr_Earnings'
colnames(Filtered)[16] <- 'Net_Price'
colnames(Filtered)[22] <- 'TenYearNPV'
colnames(Filtered)[26] <- 'TwentyYearNPV'
colnames(Filtered)[30] <- 'ThirtyYearNPV'



Filtered$Median_debt <- currency(Filtered$Median_debt, digits = 0L)

ggplot(data = Filtered, aes(x=Institution, y= Median_debt, fill = Institution ))+ 
  geom_bar(stat = "identity")+
  geom_text(aes(label = Median_debt), position= position_dodge(width=0.9), vjust=-0.25)+
  scale_x_discrete(labels = abbreviate)+
  labs(title =  "Median Debt for College Graduates",
       y = "Debt")+
  geom_hline(aes(yintercept = mean(Median_debt)))

Filtered$Median_10yr_Earnings <- currency(Filtered$Median_10yr_Earnings, digits = 0L)

ggplot(data = Filtered, aes(x=Institution, y= Median_10yr_Earnings, fill = Institution ))+ 
  geom_bar(stat = "identity")+
  geom_text(aes(label = Median_10yr_Earnings), position= position_dodge(width=0.9), vjust=-0.25)+
  scale_x_discrete(labels = abbreviate)+
  labs(title =  "Median 10 Year Income for College Graduates",
       y = "Income")+
  geom_hline(aes(yintercept = mean(Median_10yr_Earnings)))

Filtered$Net_Price <- currency(Filtered$Net_Price, digits = 0L)

ggplot(data = Filtered, aes(x=Institution, y= Net_Price, fill = Institution ))+ 
  geom_bar(stat = "identity")+
  geom_text(aes(label = Net_Price), position= position_dodge(width=0.9), vjust=-0.25)+
  scale_x_discrete(labels = abbreviate)+
  labs(title =  "Net Price",
       y = "Cost")+
  geom_hline(aes(yintercept = mean(Net_Price)))

Filtered$TenYearNPV <- currency(Filtered$TenYearNPV, digits = 0L)

ggplot(data = Filtered, aes(x=Institution, y= TenYearNPV, fill = Institution ))+ 
  geom_bar(stat = "identity")+
  geom_text(aes(label = TenYearNPV), position= position_dodge(width=0.9), vjust=-0.25)+
  scale_x_discrete(labels = abbreviate)+
  labs(title =  'Net Present Value',
      subtitle = "Based on 10-year earnings",
       y = " ") +
  geom_hline(aes(yintercept = mean(TenYearNPV)))

Filtered$TwentyYearNPV <- currency(Filtered$TwentyYearNPV, digits = 0L)

Filtered$TwentyYearNPV_AVG <- mean(Filtered$TwentyYearNPV)

ggplot(data = Filtered, aes(x=Institution, y= TwentyYearNPV, fill = Institution ))+ 
  geom_bar(stat = "identity")+
  geom_text(aes(label = TwentyYearNPV), position= position_dodge(width=0.9), vjust=-0.25)+
  scale_x_discrete(labels = abbreviate)+
  labs(title =  'Net Present Value',
      subtitle = "Based on 20-year earnings",
       y = " ") +
  geom_hline(aes(yintercept = mean(TwentyYearNPV)))

Filtered$ThirtyYearNPV <- currency(Filtered$ThirtyYearNPV, digits = 0L)

ggplot(data = Filtered, aes(x=Institution, y= ThirtyYearNPV, fill = Institution ))+ 
  geom_bar(stat = "identity")+
  geom_text(aes(label = ThirtyYearNPV), position= position_dodge(width=0.9), vjust=-0.25)+
  scale_x_discrete(labels = abbreviate)+
  labs(title =  'Net Present Value',
      subtitle = "Based on 30-year earnings",
       y = " ")+
  geom_hline(aes(yintercept = mean(ThirtyYearNPV)))