This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
Warning message:
R graphics engine version 12 is not supported by this version of RStudio. The Plots tab will be disabled until a newer version of RStudio is installed.
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
college <- read.csv(file='/home/hoang/R/College.csv')
college
fix(college)
rownames(college) = college[,1]
fix(college)
college = college[,-1]
college
summary(college)
Private Apps Accept Enroll Top10perc Top25perc F.Undergrad P.Undergrad Outstate Room.Board Books
No :212 Min. : 81 Min. : 72 Min. : 35 Min. : 1.00 Min. : 9.0 Min. : 139 Min. : 1.0 Min. : 2340 Min. :1780 Min. : 96.0
Yes:565 1st Qu.: 776 1st Qu.: 604 1st Qu.: 242 1st Qu.:15.00 1st Qu.: 41.0 1st Qu.: 992 1st Qu.: 95.0 1st Qu.: 7320 1st Qu.:3597 1st Qu.: 470.0
Median : 1558 Median : 1110 Median : 434 Median :23.00 Median : 54.0 Median : 1707 Median : 353.0 Median : 9990 Median :4200 Median : 500.0
Mean : 3002 Mean : 2019 Mean : 780 Mean :27.56 Mean : 55.8 Mean : 3700 Mean : 855.3 Mean :10441 Mean :4358 Mean : 549.4
3rd Qu.: 3624 3rd Qu.: 2424 3rd Qu.: 902 3rd Qu.:35.00 3rd Qu.: 69.0 3rd Qu.: 4005 3rd Qu.: 967.0 3rd Qu.:12925 3rd Qu.:5050 3rd Qu.: 600.0
Max. :48094 Max. :26330 Max. :6392 Max. :96.00 Max. :100.0 Max. :31643 Max. :21836.0 Max. :21700 Max. :8124 Max. :2340.0
Personal PhD Terminal S.F.Ratio perc.alumni Expend Grad.Rate
Min. : 250 Min. : 8.00 Min. : 24.0 Min. : 2.50 Min. : 0.00 Min. : 3186 Min. : 10.00
1st Qu.: 850 1st Qu.: 62.00 1st Qu.: 71.0 1st Qu.:11.50 1st Qu.:13.00 1st Qu.: 6751 1st Qu.: 53.00
Median :1200 Median : 75.00 Median : 82.0 Median :13.60 Median :21.00 Median : 8377 Median : 65.00
Mean :1341 Mean : 72.66 Mean : 79.7 Mean :14.09 Mean :22.74 Mean : 9660 Mean : 65.46
3rd Qu.:1700 3rd Qu.: 85.00 3rd Qu.: 92.0 3rd Qu.:16.50 3rd Qu.:31.00 3rd Qu.:10830 3rd Qu.: 78.00
Max. :6800 Max. :103.00 Max. :100.0 Max. :39.80 Max. :64.00 Max. :56233 Max. :118.00
pairs(college[,1:10])
plot(Outstate~Private, data=college)
Elite <- rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite = as.factor(Elite)
college = data.frame(college, Elite)
summary(college)
Private Apps Accept Enroll Top10perc Top25perc F.Undergrad P.Undergrad Outstate Room.Board Books
No :212 Min. : 81 Min. : 72 Min. : 35 Min. : 1.00 Min. : 9.0 Min. : 139 Min. : 1.0 Min. : 2340 Min. :1780 Min. : 96.0
Yes:565 1st Qu.: 776 1st Qu.: 604 1st Qu.: 242 1st Qu.:15.00 1st Qu.: 41.0 1st Qu.: 992 1st Qu.: 95.0 1st Qu.: 7320 1st Qu.:3597 1st Qu.: 470.0
Median : 1558 Median : 1110 Median : 434 Median :23.00 Median : 54.0 Median : 1707 Median : 353.0 Median : 9990 Median :4200 Median : 500.0
Mean : 3002 Mean : 2019 Mean : 780 Mean :27.56 Mean : 55.8 Mean : 3700 Mean : 855.3 Mean :10441 Mean :4358 Mean : 549.4
3rd Qu.: 3624 3rd Qu.: 2424 3rd Qu.: 902 3rd Qu.:35.00 3rd Qu.: 69.0 3rd Qu.: 4005 3rd Qu.: 967.0 3rd Qu.:12925 3rd Qu.:5050 3rd Qu.: 600.0
Max. :48094 Max. :26330 Max. :6392 Max. :96.00 Max. :100.0 Max. :31643 Max. :21836.0 Max. :21700 Max. :8124 Max. :2340.0
Personal PhD Terminal S.F.Ratio perc.alumni Expend Grad.Rate Elite
Min. : 250 Min. : 8.00 Min. : 24.0 Min. : 2.50 Min. : 0.00 Min. : 3186 Min. : 10.00 No :699
1st Qu.: 850 1st Qu.: 62.00 1st Qu.: 71.0 1st Qu.:11.50 1st Qu.:13.00 1st Qu.: 6751 1st Qu.: 53.00 Yes: 78
Median :1200 Median : 75.00 Median : 82.0 Median :13.60 Median :21.00 Median : 8377 Median : 65.00
Mean :1341 Mean : 72.66 Mean : 79.7 Mean :14.09 Mean :22.74 Mean : 9660 Mean : 65.46
3rd Qu.:1700 3rd Qu.: 85.00 3rd Qu.: 92.0 3rd Qu.:16.50 3rd Qu.:31.00 3rd Qu.:10830 3rd Qu.: 78.00
Max. :6800 Max. :103.00 Max. :100.0 Max. :39.80 Max. :64.00 Max. :56233 Max. :118.00
boxplot(Outstate~Elite, data=college, xlab = "Elite", ylab = "Outstate")
par(mfrow=c(2,2))
hist(college$Apps, breaks = 50, xlim=c(0,2500), main="Apps")
hist(college$Enroll, breaks = 25, main="Enroll")
hist(college$Expend, breaks = 25, main="Expend")
hist(college$Outstate, breaks = 25, main="Outstate")