Câu c)
# Tóm lược dữ liệu của data college
summary(college)
Private Apps Accept Enroll
No :212 Min. : 81 Min. : 72 Min. : 35
Yes:565 1st Qu.: 776 1st Qu.: 604 1st Qu.: 242
Median : 1558 Median : 1110 Median : 434
Mean : 3002 Mean : 2019 Mean : 780
3rd Qu.: 3624 3rd Qu.: 2424 3rd Qu.: 902
Max. :48094 Max. :26330 Max. :6392
Top10perc Top25perc F.Undergrad P.Undergrad
Min. : 1.00 Min. : 9.0 Min. : 139 Min. : 1.0
1st Qu.:15.00 1st Qu.: 41.0 1st Qu.: 992 1st Qu.: 95.0
Median :23.00 Median : 54.0 Median : 1707 Median : 353.0
Mean :27.56 Mean : 55.8 Mean : 3700 Mean : 855.3
3rd Qu.:35.00 3rd Qu.: 69.0 3rd Qu.: 4005 3rd Qu.: 967.0
Max. :96.00 Max. :100.0 Max. :31643 Max. :21836.0
Outstate Room.Board Books Personal
Min. : 2340 Min. :1780 Min. : 96.0 Min. : 250
1st Qu.: 7320 1st Qu.:3597 1st Qu.: 470.0 1st Qu.: 850
Median : 9990 Median :4200 Median : 500.0 Median :1200
Mean :10441 Mean :4358 Mean : 549.4 Mean :1341
3rd Qu.:12925 3rd Qu.:5050 3rd Qu.: 600.0 3rd Qu.:1700
Max. :21700 Max. :8124 Max. :2340.0 Max. :6800
PhD Terminal S.F.Ratio perc.alumni
Min. : 8.00 Min. : 24.0 Min. : 2.50 Min. : 0.00
1st Qu.: 62.00 1st Qu.: 71.0 1st Qu.:11.50 1st Qu.:13.00
Median : 75.00 Median : 82.0 Median :13.60 Median :21.00
Mean : 72.66 Mean : 79.7 Mean :14.09 Mean :22.74
3rd Qu.: 85.00 3rd Qu.: 92.0 3rd Qu.:16.50 3rd Qu.:31.00
Max. :103.00 Max. :100.0 Max. :39.80 Max. :64.00
Expend Grad.Rate
Min. : 3186 Min. : 10.00
1st Qu.: 6751 1st Qu.: 53.00
Median : 8377 Median : 65.00
Mean : 9660 Mean : 65.46
3rd Qu.:10830 3rd Qu.: 78.00
Max. :56233 Max. :118.00
#Tạo ra ma trận đồ thị phân tán 10 cột đầu tiên của dữ liệu
pairs(college[,1:10])

# attach dùng để khiến cho những biến feature trong dữ liệu có sẵn trong Rstudio theo tên
attach(college)
The following object is masked _by_ .GlobalEnv:
Private
The following objects are masked from college (pos = 3):
Accept, Apps, Books, Enroll, Expend, F.Undergrad,
Grad.Rate, Outstate, P.Undergrad, perc.alumni,
Personal, PhD, Private, Room.Board, S.F.Ratio,
Terminal, Top10perc, Top25perc
The following objects are masked from college (pos = 4):
Accept, Apps, Books, Enroll, Expend, F.Undergrad,
Grad.Rate, Outstate, P.Undergrad, perc.alumni,
Personal, PhD, Private, Room.Board, S.F.Ratio,
Terminal, Top10perc, Top25perc
The following objects are masked from college (pos = 5):
Accept, Apps, Books, Enroll, Expend, F.Undergrad,
Grad.Rate, Outstate, P.Undergrad, perc.alumni,
Personal, PhD, Private, Room.Board, S.F.Ratio,
Terminal, Top10perc, Top25perc
The following objects are masked from college (pos = 6):
Accept, Apps, Books, Enroll, Expend, F.Undergrad,
Grad.Rate, Outstate, P.Undergrad, perc.alumni,
Personal, PhD, Private, Room.Board, S.F.Ratio,
Terminal, Top10perc, Top25perc
The following objects are masked from college (pos = 7):
Accept, Apps, Books, Enroll, Expend, F.Undergrad,
Grad.Rate, Outstate, P.Undergrad, perc.alumni,
Personal, PhD, Private, Room.Board, S.F.Ratio,
Terminal, Top10perc, Top25perc
The following objects are masked from college (pos = 8):
Accept, Apps, Books, Enroll, Expend, F.Undergrad,
Grad.Rate, Outstate, P.Undergrad, perc.alumni,
Personal, PhD, Private, Room.Board, S.F.Ratio,
Terminal, Top10perc, Top25perc
The following objects are masked from college (pos = 9):
Accept, Apps, Books, Enroll, Expend, F.Undergrad,
Grad.Rate, Outstate, P.Undergrad, perc.alumni,
Personal, PhD, Private, Room.Board, S.F.Ratio,
Terminal, Top10perc, Top25perc
#as.factor() dùng để chuyển giá trị định lượng thành định tính
Private = as.factor(Private)
# Tạo đồ thị phân tán của dữ liệu trường tư (private) và trường công (Outstate)
plot(Private, Outstate , col ="red", varwidth =T, xlab="Private",
ylab="Outstate",main="Outstate vs Private Boxplot")

# Tạo ra 1 vector với độ dài tương ứng với số dòng của dataset, với giá trị mặc định trả về là 0
Elite=rep("No",nrow(college ))
#Xác định giá trị của vector tương ứng dữ liệu dòng TOp10perc lớn hơn 50 và thay đổi thành giá trị Yes
Elite[Top10perc >50] ="Yes"
#as.factor() dùng để chuyển giá trị định lượng thành định tính
Elite = as.factor(Elite)
#Kết hợp dữ liệu college với vector vừa được tạo và trả về 1 dữ liệu mới đáp ứng dữ liệu các cột Top10perc >50
college = data.frame(college, Elite)
#Tóm lược xem có bao nhiêu trường đại học ưu tú trong csdl
summary(college$Elite)
No Yes
699 78
# Tạo đồ thị phân tán của dữ liệu bên trường ưu tú (Elite) và trường công (Outstate)
plot(Elite, Outstate,xlab="Elite",
ylab="Outstate",main="Outstate vs. Elite Boxplot")

#Historam của các biến dữ liệu với các dữ liệu cột được chia thành cửa sổ 3x3 bằng cách sử dụng hàm mfrow=c(3,3)
par(mfrow= c(3,3))
hist(Apps,col = 2,breaks = 15)
hist(Accept,col = 2,breaks = 15)
hist(Enroll,col = 2,breaks = 15)
hist(Top10perc,col = 2,breaks = 15)
hist(Top25perc,col = 2,breaks = 15)
hist(F.Undergrad,col = 2,breaks = 15)
hist(Outstate,col = 2,breaks = 15)
hist(Room.Board,col = 2,breaks = 15)
hist(Books,col = 2,breaks = 15)

LS0tDQp0aXRsZTogIkLDoGkgdOG6rXAgMSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQojIyMgVMOqbjogTWFpIEh1eQ0KIyMjIE1TU1Y6IDQzLjAxLjEwNC4wNjUNCiMjIyBT4buRIHRo4bupIHThu7E6IDA4DQoNCg0KIyBDw6J1IGEpDQoNCmBgYHtyfQ0KIyDEkOG7jWMgZGF0YXNldA0KY29sbGVnZT0gcmVhZC5jc3YoJ0NvbGxlZ2UuY3N2JykNCmNvbGxlZ2UNCmBgYA0KDQojIEPDonUgYikNCg0KYGBge3J9DQojIEfhu41pIGThu68gbGnhu4d1IGPhu5l0IMSR4bqndSB0acOqbiBjaOG7qWEgdMOqbiBjw6FjIHRyxrDhu51uZyDEkeG6oWkgaOG7jWMgY+G7p2EgZGF0YXNldCB2w6AgdHLDrG5oIGNoaeG6v3UgZOG7ryBsaeG7h3UgZMaw4bubaSBi4bqjbmcgdMOtbmgNCnJvd25hbWVzKGNvbGxlZ2UpID0gY29sbGVnZVssMV0gDQpmaXgoY29sbGVnZSkNCmBgYA0KDQpgYGB7cn0NCiNM4bqleSB04bqldCBj4bqjIGPDoWMgY+G7mXQgY+G7p2EgY29sbGVnZSBuZ2/huqFpIHRy4burIGPhu5l0IMSR4bqndSB0acOqbg0KY29sbGVnZSA8LSBjb2xsZWdlWywtMV0gDQpmaXgoY29sbGVnZSkNCmNvbGxlZ2UNCmBgYA0KDQojIEPDonUgYykNCg0KYGBge3J9DQojIFTDs20gbMaw4bujYyBk4buvIGxp4buHdSBj4bunYSBkYXRhIGNvbGxlZ2UNCnN1bW1hcnkoY29sbGVnZSkNCiNU4bqhbyByYSBtYSB0cuG6rW4gxJHhu5MgdGjhu4sgcGjDom4gdMOhbiAxMCBj4buZdCDEkeG6p3UgdGnDqm4gY+G7p2EgZOG7ryBsaeG7h3UNCnBhaXJzKGNvbGxlZ2VbLDE6MTBdKQ0KIyBhdHRhY2ggZMO5bmcgxJHhu4Mga2hp4bq/biBjaG8gbmjhu69uZyBiaeG6v24gZmVhdHVyZSB0cm9uZyBk4buvIGxp4buHdSBjw7Mgc+G6tW4gdHJvbmcgUnN0dWRpbyB0aGVvIHTDqm4NCmF0dGFjaChjb2xsZWdlKQ0KI2FzLmZhY3RvcigpIGTDuW5nIMSR4buDIGNodXnhu4NuIGdpw6EgdHLhu4sgxJHhu4tuaCBsxrDhu6NuZyB0aMOgbmggxJHhu4tuaCB0w61uaA0KUHJpdmF0ZSA9IGFzLmZhY3RvcihQcml2YXRlKQ0KIyBU4bqhbyDEkeG7kyB0aOG7iyBwaMOibiB0w6FuIGPhu6dhIGThu68gbGnhu4d1IHRyxrDhu51uZyB0xrAgKHByaXZhdGUpIHbDoCB0csaw4budbmcgY8O0bmcgKE91dHN0YXRlKQ0KcGxvdChQcml2YXRlLCBPdXRzdGF0ZSAsIGNvbCA9InJlZCIsIHZhcndpZHRoID1ULCB4bGFiPSJQcml2YXRlIiwNCnlsYWI9Ik91dHN0YXRlIixtYWluPSJPdXRzdGF0ZSB2cyBQcml2YXRlIEJveHBsb3QiKQ0KDQpgYGANCmBgYHtyfQ0KIyBU4bqhbyByYSAxIHZlY3RvciB24bubaSDEkeG7mSBkw6BpIHTGsMahbmcg4bupbmcgduG7m2kgc+G7kSBkw7JuZyBj4bunYSBkYXRhc2V0LCB24bubaSBnacOhIHRy4buLIG3hurdjIMSR4buLbmggdHLhuqMgduG7gSBsw6AgMA0KRWxpdGU9cmVwKCJObyIsbnJvdyhjb2xsZWdlICkpDQojWMOhYyDEkeG7i25oIGdpw6EgdHLhu4sgY+G7p2EgdmVjdG9yIHTGsMahbmcg4bupbmcgZOG7ryBsaeG7h3UgZMOybmcgVE9wMTBwZXJjIGzhu5tuIGjGoW4gNTAgdsOgIHRoYXkgxJHhu5VpIHRow6BuaCBnacOhIHRy4buLIFllcw0KRWxpdGVbVG9wMTBwZXJjID41MF0gPSJZZXMiDQojYXMuZmFjdG9yKCkgZMO5bmcgxJHhu4MgY2h1eeG7g24gZ2nDoSB0cuG7iyDEkeG7i25oIGzGsOG7o25nIHRow6BuaCDEkeG7i25oIHTDrW5oDQpFbGl0ZSA9IGFzLmZhY3RvcihFbGl0ZSkNCiNL4bq/dCBo4bujcCBk4buvIGxp4buHdSBjb2xsZWdlIHbhu5tpIHZlY3RvciB24burYSDEkcaw4bujYyB04bqhbyB2w6AgdHLhuqMgduG7gSAxIGThu68gbGnhu4d1IG3hu5tpIMSRw6FwIOG7qW5nIGThu68gbGnhu4d1IGPDoWMgY+G7mXQgVG9wMTBwZXJjID41MA0KY29sbGVnZSA9IGRhdGEuZnJhbWUoY29sbGVnZSwgRWxpdGUpDQoNCmBgYA0KYGBge3J9DQojVMOzbSBsxrDhu6NjIHhlbSBjw7MgYmFvIG5oacOqdSB0csaw4budbmcgxJHhuqFpIGjhu41jIMawdSB0w7ogdHJvbmcgY3NkbA0Kc3VtbWFyeShjb2xsZWdlJEVsaXRlKQ0KIyBU4bqhbyDEkeG7kyB0aOG7iyBwaMOibiB0w6FuIGPhu6dhIGThu68gbGnhu4d1IGLDqm4gdHLGsOG7nW5nIMawdSB0w7ogKEVsaXRlKSB2w6AgdHLGsOG7nW5nIGPDtG5nIChPdXRzdGF0ZSkNCnBsb3QoRWxpdGUsIE91dHN0YXRlLHhsYWI9IkVsaXRlIiwNCnlsYWI9Ik91dHN0YXRlIixtYWluPSJPdXRzdGF0ZSB2cy4gRWxpdGUgQm94cGxvdCIpDQpgYGANCmBgYHtyfQ0KI0hpc3RvcmFtIGPhu6dhIGPDoWMgYmnhur9uICBk4buvIGxp4buHdSB24bubaSBjw6FjIGThu68gbGnhu4d1IGPhu5l0IMSRxrDhu6NjIGNoaWEgdGjDoG5oIGPhu61hIHPhu5UgM3gzIGLhurFuZyBjw6FjaCBz4butIGThu6VuZyBow6BtIG1mcm93PWMoMywzKQ0KcGFyKG1mcm93PSBjKDMsMykpDQpoaXN0KEFwcHMsY29sID0gMixicmVha3MgPSAxNSkNCmhpc3QoQWNjZXB0LGNvbCA9IDIsYnJlYWtzID0gMTUpDQpoaXN0KEVucm9sbCxjb2wgPSAyLGJyZWFrcyA9IDE1KQ0KaGlzdChUb3AxMHBlcmMsY29sID0gMixicmVha3MgPSAxNSkNCmhpc3QoVG9wMjVwZXJjLGNvbCA9IDIsYnJlYWtzID0gMTUpDQpoaXN0KEYuVW5kZXJncmFkLGNvbCA9IDIsYnJlYWtzID0gMTUpDQpoaXN0KE91dHN0YXRlLGNvbCA9IDIsYnJlYWtzID0gMTUpDQpoaXN0KFJvb20uQm9hcmQsY29sID0gMixicmVha3MgPSAxNSkNCmhpc3QoQm9va3MsY29sID0gMixicmVha3MgPSAxNSkNCmBgYA0KDQoNCg==