Tên: Mai Huy

MSSV: 43.01.104.065

Số thứ tự: 08

Câu a)

# Đọc dataset
college= read.csv('College.csv')
college

Câu b)

# Gọi dữ liệu cột đầu tiên chứa tên các trường đại học của dataset và trình chiếu dữ liệu dưới bảng tính
rownames(college) = college[,1] 
fix(college)
#Lấy tất cả các cột của college ngoại trừ cột đầu tiên
college <- college[,-1] 
fix(college)
college

Câu c)

# Tóm lược dữ liệu của data college
summary(college)
 Private        Apps           Accept          Enroll    
 No :212   Min.   :   81   Min.   :   72   Min.   :  35  
 Yes:565   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242  
           Median : 1558   Median : 1110   Median : 434  
           Mean   : 3002   Mean   : 2019   Mean   : 780  
           3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902  
           Max.   :48094   Max.   :26330   Max.   :6392  
   Top10perc       Top25perc      F.Undergrad     P.Undergrad     
 Min.   : 1.00   Min.   :  9.0   Min.   :  139   Min.   :    1.0  
 1st Qu.:15.00   1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0  
 Median :23.00   Median : 54.0   Median : 1707   Median :  353.0  
 Mean   :27.56   Mean   : 55.8   Mean   : 3700   Mean   :  855.3  
 3rd Qu.:35.00   3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0  
 Max.   :96.00   Max.   :100.0   Max.   :31643   Max.   :21836.0  
    Outstate       Room.Board       Books           Personal   
 Min.   : 2340   Min.   :1780   Min.   :  96.0   Min.   : 250  
 1st Qu.: 7320   1st Qu.:3597   1st Qu.: 470.0   1st Qu.: 850  
 Median : 9990   Median :4200   Median : 500.0   Median :1200  
 Mean   :10441   Mean   :4358   Mean   : 549.4   Mean   :1341  
 3rd Qu.:12925   3rd Qu.:5050   3rd Qu.: 600.0   3rd Qu.:1700  
 Max.   :21700   Max.   :8124   Max.   :2340.0   Max.   :6800  
      PhD            Terminal       S.F.Ratio      perc.alumni   
 Min.   :  8.00   Min.   : 24.0   Min.   : 2.50   Min.   : 0.00  
 1st Qu.: 62.00   1st Qu.: 71.0   1st Qu.:11.50   1st Qu.:13.00  
 Median : 75.00   Median : 82.0   Median :13.60   Median :21.00  
 Mean   : 72.66   Mean   : 79.7   Mean   :14.09   Mean   :22.74  
 3rd Qu.: 85.00   3rd Qu.: 92.0   3rd Qu.:16.50   3rd Qu.:31.00  
 Max.   :103.00   Max.   :100.0   Max.   :39.80   Max.   :64.00  
     Expend        Grad.Rate     
 Min.   : 3186   Min.   : 10.00  
 1st Qu.: 6751   1st Qu.: 53.00  
 Median : 8377   Median : 65.00  
 Mean   : 9660   Mean   : 65.46  
 3rd Qu.:10830   3rd Qu.: 78.00  
 Max.   :56233   Max.   :118.00  
#Tạo ra ma trận đồ thị phân tán 10 cột đầu tiên của dữ liệu
pairs(college[,1:10])

# attach dùng để khiến cho những biến feature trong dữ liệu có sẵn trong Rstudio theo tên
attach(college)
The following object is masked _by_ .GlobalEnv:

    Private

The following objects are masked from college (pos = 3):

    Accept, Apps, Books, Enroll, Expend, F.Undergrad,
    Grad.Rate, Outstate, P.Undergrad, perc.alumni,
    Personal, PhD, Private, Room.Board, S.F.Ratio,
    Terminal, Top10perc, Top25perc

The following objects are masked from college (pos = 4):

    Accept, Apps, Books, Enroll, Expend, F.Undergrad,
    Grad.Rate, Outstate, P.Undergrad, perc.alumni,
    Personal, PhD, Private, Room.Board, S.F.Ratio,
    Terminal, Top10perc, Top25perc

The following objects are masked from college (pos = 5):

    Accept, Apps, Books, Enroll, Expend, F.Undergrad,
    Grad.Rate, Outstate, P.Undergrad, perc.alumni,
    Personal, PhD, Private, Room.Board, S.F.Ratio,
    Terminal, Top10perc, Top25perc

The following objects are masked from college (pos = 6):

    Accept, Apps, Books, Enroll, Expend, F.Undergrad,
    Grad.Rate, Outstate, P.Undergrad, perc.alumni,
    Personal, PhD, Private, Room.Board, S.F.Ratio,
    Terminal, Top10perc, Top25perc

The following objects are masked from college (pos = 7):

    Accept, Apps, Books, Enroll, Expend, F.Undergrad,
    Grad.Rate, Outstate, P.Undergrad, perc.alumni,
    Personal, PhD, Private, Room.Board, S.F.Ratio,
    Terminal, Top10perc, Top25perc

The following objects are masked from college (pos = 8):

    Accept, Apps, Books, Enroll, Expend, F.Undergrad,
    Grad.Rate, Outstate, P.Undergrad, perc.alumni,
    Personal, PhD, Private, Room.Board, S.F.Ratio,
    Terminal, Top10perc, Top25perc

The following objects are masked from college (pos = 9):

    Accept, Apps, Books, Enroll, Expend, F.Undergrad,
    Grad.Rate, Outstate, P.Undergrad, perc.alumni,
    Personal, PhD, Private, Room.Board, S.F.Ratio,
    Terminal, Top10perc, Top25perc
#as.factor() dùng để chuyển giá trị định lượng thành định tính
Private = as.factor(Private)
# Tạo đồ thị phân tán của dữ liệu trường tư (private) và trường công (Outstate)
plot(Private, Outstate , col ="red", varwidth =T, xlab="Private",
ylab="Outstate",main="Outstate vs Private Boxplot")

# Tạo ra 1 vector với độ dài tương ứng với số dòng của dataset, với giá trị mặc định trả về là 0
Elite=rep("No",nrow(college ))
#Xác định giá trị của vector tương ứng dữ liệu dòng TOp10perc lớn hơn 50 và thay đổi thành giá trị Yes
Elite[Top10perc >50] ="Yes"
#as.factor() dùng để chuyển giá trị định lượng thành định tính
Elite = as.factor(Elite)
#Kết hợp dữ liệu college với vector vừa được tạo và trả về 1 dữ liệu mới đáp ứng dữ liệu các cột Top10perc >50
college = data.frame(college, Elite)
#Tóm lược xem có bao nhiêu trường đại học ưu tú trong csdl
summary(college$Elite)
 No Yes 
699  78 
# Tạo đồ thị phân tán của dữ liệu bên trường ưu tú (Elite) và trường công (Outstate)
plot(Elite, Outstate,xlab="Elite",
ylab="Outstate",main="Outstate vs. Elite Boxplot")

#Historam của các biến  dữ liệu với các dữ liệu cột được chia thành cửa sổ 3x3 bằng cách sử dụng hàm mfrow=c(3,3)
par(mfrow= c(3,3))
hist(Apps,col = 2,breaks = 15)
hist(Accept,col = 2,breaks = 15)
hist(Enroll,col = 2,breaks = 15)
hist(Top10perc,col = 2,breaks = 15)
hist(Top25perc,col = 2,breaks = 15)
hist(F.Undergrad,col = 2,breaks = 15)
hist(Outstate,col = 2,breaks = 15)
hist(Room.Board,col = 2,breaks = 15)
hist(Books,col = 2,breaks = 15)

LS0tDQp0aXRsZTogIkLDoGkgdOG6rXAgMSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQojIyMgVMOqbjogTWFpIEh1eQ0KIyMjIE1TU1Y6IDQzLjAxLjEwNC4wNjUNCiMjIyBT4buRIHRo4bupIHThu7E6IDA4DQoNCg0KIyBDw6J1IGEpDQoNCmBgYHtyfQ0KIyDEkOG7jWMgZGF0YXNldA0KY29sbGVnZT0gcmVhZC5jc3YoJ0NvbGxlZ2UuY3N2JykNCmNvbGxlZ2UNCmBgYA0KDQojIEPDonUgYikNCg0KYGBge3J9DQojIEfhu41pIGThu68gbGnhu4d1IGPhu5l0IMSR4bqndSB0acOqbiBjaOG7qWEgdMOqbiBjw6FjIHRyxrDhu51uZyDEkeG6oWkgaOG7jWMgY+G7p2EgZGF0YXNldCB2w6AgdHLDrG5oIGNoaeG6v3UgZOG7ryBsaeG7h3UgZMaw4bubaSBi4bqjbmcgdMOtbmgNCnJvd25hbWVzKGNvbGxlZ2UpID0gY29sbGVnZVssMV0gDQpmaXgoY29sbGVnZSkNCmBgYA0KDQpgYGB7cn0NCiNM4bqleSB04bqldCBj4bqjIGPDoWMgY+G7mXQgY+G7p2EgY29sbGVnZSBuZ2/huqFpIHRy4burIGPhu5l0IMSR4bqndSB0acOqbg0KY29sbGVnZSA8LSBjb2xsZWdlWywtMV0gDQpmaXgoY29sbGVnZSkNCmNvbGxlZ2UNCmBgYA0KDQojIEPDonUgYykNCg0KYGBge3J9DQojIFTDs20gbMaw4bujYyBk4buvIGxp4buHdSBj4bunYSBkYXRhIGNvbGxlZ2UNCnN1bW1hcnkoY29sbGVnZSkNCiNU4bqhbyByYSBtYSB0cuG6rW4gxJHhu5MgdGjhu4sgcGjDom4gdMOhbiAxMCBj4buZdCDEkeG6p3UgdGnDqm4gY+G7p2EgZOG7ryBsaeG7h3UNCnBhaXJzKGNvbGxlZ2VbLDE6MTBdKQ0KIyBhdHRhY2ggZMO5bmcgxJHhu4Mga2hp4bq/biBjaG8gbmjhu69uZyBiaeG6v24gZmVhdHVyZSB0cm9uZyBk4buvIGxp4buHdSBjw7Mgc+G6tW4gdHJvbmcgUnN0dWRpbyB0aGVvIHTDqm4NCmF0dGFjaChjb2xsZWdlKQ0KI2FzLmZhY3RvcigpIGTDuW5nIMSR4buDIGNodXnhu4NuIGdpw6EgdHLhu4sgxJHhu4tuaCBsxrDhu6NuZyB0aMOgbmggxJHhu4tuaCB0w61uaA0KUHJpdmF0ZSA9IGFzLmZhY3RvcihQcml2YXRlKQ0KIyBU4bqhbyDEkeG7kyB0aOG7iyBwaMOibiB0w6FuIGPhu6dhIGThu68gbGnhu4d1IHRyxrDhu51uZyB0xrAgKHByaXZhdGUpIHbDoCB0csaw4budbmcgY8O0bmcgKE91dHN0YXRlKQ0KcGxvdChQcml2YXRlLCBPdXRzdGF0ZSAsIGNvbCA9InJlZCIsIHZhcndpZHRoID1ULCB4bGFiPSJQcml2YXRlIiwNCnlsYWI9Ik91dHN0YXRlIixtYWluPSJPdXRzdGF0ZSB2cyBQcml2YXRlIEJveHBsb3QiKQ0KDQpgYGANCmBgYHtyfQ0KIyBU4bqhbyByYSAxIHZlY3RvciB24bubaSDEkeG7mSBkw6BpIHTGsMahbmcg4bupbmcgduG7m2kgc+G7kSBkw7JuZyBj4bunYSBkYXRhc2V0LCB24bubaSBnacOhIHRy4buLIG3hurdjIMSR4buLbmggdHLhuqMgduG7gSBsw6AgMA0KRWxpdGU9cmVwKCJObyIsbnJvdyhjb2xsZWdlICkpDQojWMOhYyDEkeG7i25oIGdpw6EgdHLhu4sgY+G7p2EgdmVjdG9yIHTGsMahbmcg4bupbmcgZOG7ryBsaeG7h3UgZMOybmcgVE9wMTBwZXJjIGzhu5tuIGjGoW4gNTAgdsOgIHRoYXkgxJHhu5VpIHRow6BuaCBnacOhIHRy4buLIFllcw0KRWxpdGVbVG9wMTBwZXJjID41MF0gPSJZZXMiDQojYXMuZmFjdG9yKCkgZMO5bmcgxJHhu4MgY2h1eeG7g24gZ2nDoSB0cuG7iyDEkeG7i25oIGzGsOG7o25nIHRow6BuaCDEkeG7i25oIHTDrW5oDQpFbGl0ZSA9IGFzLmZhY3RvcihFbGl0ZSkNCiNL4bq/dCBo4bujcCBk4buvIGxp4buHdSBjb2xsZWdlIHbhu5tpIHZlY3RvciB24burYSDEkcaw4bujYyB04bqhbyB2w6AgdHLhuqMgduG7gSAxIGThu68gbGnhu4d1IG3hu5tpIMSRw6FwIOG7qW5nIGThu68gbGnhu4d1IGPDoWMgY+G7mXQgVG9wMTBwZXJjID41MA0KY29sbGVnZSA9IGRhdGEuZnJhbWUoY29sbGVnZSwgRWxpdGUpDQoNCmBgYA0KYGBge3J9DQojVMOzbSBsxrDhu6NjIHhlbSBjw7MgYmFvIG5oacOqdSB0csaw4budbmcgxJHhuqFpIGjhu41jIMawdSB0w7ogdHJvbmcgY3NkbA0Kc3VtbWFyeShjb2xsZWdlJEVsaXRlKQ0KIyBU4bqhbyDEkeG7kyB0aOG7iyBwaMOibiB0w6FuIGPhu6dhIGThu68gbGnhu4d1IGLDqm4gdHLGsOG7nW5nIMawdSB0w7ogKEVsaXRlKSB2w6AgdHLGsOG7nW5nIGPDtG5nIChPdXRzdGF0ZSkNCnBsb3QoRWxpdGUsIE91dHN0YXRlLHhsYWI9IkVsaXRlIiwNCnlsYWI9Ik91dHN0YXRlIixtYWluPSJPdXRzdGF0ZSB2cy4gRWxpdGUgQm94cGxvdCIpDQpgYGANCmBgYHtyfQ0KI0hpc3RvcmFtIGPhu6dhIGPDoWMgYmnhur9uICBk4buvIGxp4buHdSB24bubaSBjw6FjIGThu68gbGnhu4d1IGPhu5l0IMSRxrDhu6NjIGNoaWEgdGjDoG5oIGPhu61hIHPhu5UgM3gzIGLhurFuZyBjw6FjaCBz4butIGThu6VuZyBow6BtIG1mcm93PWMoMywzKQ0KcGFyKG1mcm93PSBjKDMsMykpDQpoaXN0KEFwcHMsY29sID0gMixicmVha3MgPSAxNSkNCmhpc3QoQWNjZXB0LGNvbCA9IDIsYnJlYWtzID0gMTUpDQpoaXN0KEVucm9sbCxjb2wgPSAyLGJyZWFrcyA9IDE1KQ0KaGlzdChUb3AxMHBlcmMsY29sID0gMixicmVha3MgPSAxNSkNCmhpc3QoVG9wMjVwZXJjLGNvbCA9IDIsYnJlYWtzID0gMTUpDQpoaXN0KEYuVW5kZXJncmFkLGNvbCA9IDIsYnJlYWtzID0gMTUpDQpoaXN0KE91dHN0YXRlLGNvbCA9IDIsYnJlYWtzID0gMTUpDQpoaXN0KFJvb20uQm9hcmQsY29sID0gMixicmVha3MgPSAxNSkNCmhpc3QoQm9va3MsY29sID0gMixicmVha3MgPSAxNSkNCmBgYA0KDQoNCg==