LIBRARIES USED

ISLR

ggplot2

dplyr

library(ggplot2)
library(dplyr)
library(ISLR)
data(package="ISLR")
data1= ISLR::College
summary(College)
 Private        Apps           Accept          Enroll       Top10perc    
 No :212   Min.   :   81   Min.   :   72   Min.   :  35   Min.   : 1.00  
 Yes:565   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242   1st Qu.:15.00  
           Median : 1558   Median : 1110   Median : 434   Median :23.00  
           Mean   : 3002   Mean   : 2019   Mean   : 780   Mean   :27.56  
           3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902   3rd Qu.:35.00  
           Max.   :48094   Max.   :26330   Max.   :6392   Max.   :96.00  
   Top25perc      F.Undergrad     P.Undergrad         Outstate    
 Min.   :  9.0   Min.   :  139   Min.   :    1.0   Min.   : 2340  
 1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0   1st Qu.: 7320  
 Median : 54.0   Median : 1707   Median :  353.0   Median : 9990  
 Mean   : 55.8   Mean   : 3700   Mean   :  855.3   Mean   :10441  
 3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0   3rd Qu.:12925  
 Max.   :100.0   Max.   :31643   Max.   :21836.0   Max.   :21700  
   Room.Board       Books           Personal         PhD        
 Min.   :1780   Min.   :  96.0   Min.   : 250   Min.   :  8.00  
 1st Qu.:3597   1st Qu.: 470.0   1st Qu.: 850   1st Qu.: 62.00  
 Median :4200   Median : 500.0   Median :1200   Median : 75.00  
 Mean   :4358   Mean   : 549.4   Mean   :1341   Mean   : 72.66  
 3rd Qu.:5050   3rd Qu.: 600.0   3rd Qu.:1700   3rd Qu.: 85.00  
 Max.   :8124   Max.   :2340.0   Max.   :6800   Max.   :103.00  
    Terminal       S.F.Ratio      perc.alumni        Expend     
 Min.   : 24.0   Min.   : 2.50   Min.   : 0.00   Min.   : 3186  
 1st Qu.: 71.0   1st Qu.:11.50   1st Qu.:13.00   1st Qu.: 6751  
 Median : 82.0   Median :13.60   Median :21.00   Median : 8377  
 Mean   : 79.7   Mean   :14.09   Mean   :22.74   Mean   : 9660  
 3rd Qu.: 92.0   3rd Qu.:16.50   3rd Qu.:31.00   3rd Qu.:10830  
 Max.   :100.0   Max.   :39.80   Max.   :64.00   Max.   :56233  
   Grad.Rate     
 Min.   : 10.00  
 1st Qu.: 53.00  
 Median : 65.00  
 Mean   : 65.46  
 3rd Qu.: 78.00  
 Max.   :118.00  

Plots

Scatter Plot

To show the relationship between the number of applications received (Apps) and the number of students accepted (Accept) by colleges.

ggplot(data1, aes(x = Apps, y = Accept, color = Private)) +
  geom_point() +
  labs(title = "Applications vs. Acceptances by College Type") +
  theme_minimal()

Histogram

To display the distribution of graduation rates (Grad_Rate) across colleges.

ggplot(data1, aes(x = Grad.Rate, fill = Private)) +
  geom_histogram(bins = 10, position = "dodge") +
  labs(title = "Distribution of Graduation Rates by College Type") +
  theme_minimal()

Box Plot

To compare the room and board costs (Room.Board) between public and private colleges.

ggplot(data1, aes(x = Private, y = Room.Board, fill = Private)) +
  geom_boxplot() +
  labs(title = "Room and Board Costs by College Type") +
  theme_minimal()

Density Plot

To visualize the distribution of out-of-state tuition fees (Outstate) for different college types.

ggplot(data1, aes(x = Outstate, fill = Private)) +
  geom_density(alpha = 0.5) +
  labs(title = "Density of Out-of-State Tuition Fees by College Type") +
  theme_minimal()

Bar Plot

To compare the average percentage of alumni who donate (perc.alumni) across college types.

data1 %>%
  group_by(Private) %>%
  summarise(avg_perc_alumni = mean(perc.alumni)) %>%
  ggplot(aes(x = Private, y = avg_perc_alumni, fill = Private)) +
  geom_bar(stat = "identity") +
  labs(title = "Average Percentage of Alumni Donors by College Type") +
  theme_minimal()

Line Plot

To track the expenditures (Expend) of each college.

ggplot(data1, aes(x = row.names(data1), y = Expend, group = Private, color = Private)) +
  geom_line() +
  labs(title = "Expenditures per College") +
  theme_minimal()

Donut

visualize the distribution of college types (public vs. private) within the dataset.

college_type_counts <- data1 %>%
  count(Private) %>%
  mutate(percentage = n / sum(n) * 100, 
         label = paste0(Private, " (", round(percentage, 1), "%)"))


ggplot(college_type_counts, aes(x = 2, y = n, fill = Private)) +
  geom_bar(stat = "identity", width = 1, color = "white") +
  coord_polar("y") +
  xlim(0.5, 2.5) + 
  geom_text(aes(label = label), position = position_stack(vjust = 0.5)) +
  labs(title = "Proportion of College Types") +
  theme_void() + 
  theme(legend.position = "none")