Student report

Author

Duc Nguyen

Published

June 26, 2026

1 Import dataset

df <- read.csv("raw/students.csv")
head(df)
  no stud.id                name gender age height weight   religion nc.score semester                  major
1  1  833917 Gonzales, Christina Female  19    160   64.8     Muslim     1.91      1st      Political Science
2  2  898539      Lozano, T'Hani Female  19    172   73.0      Other     1.56      2nd        Social Sciences
3  3  379678      Williams, Hanh Female  22    168   70.6 Protestant     1.24      3rd        Social Sciences
4  4  807564         Nem, Denzel   Male  19    183   79.7      Other     1.37      2nd Environmental Sciences
5  5  383291     Powell, Heather Female  21    175   71.4   Catholic     1.46      1st Environmental Sciences
6  6  256074      Perez, Jadrian   Male  19    189   85.8   Catholic     1.34      2nd      Political Science
                       minor score1 score2 online.tutorial graduated salary  X
1            Social Sciences     NA     NA               0         0     NA NA
2 Mathematics and Statistics     NA     NA               0         0     NA NA
3 Mathematics and Statistics     45     46               0         0     NA NA
4 Mathematics and Statistics     NA     NA               0         0     NA NA
5 Mathematics and Statistics     NA     NA               0         0     NA NA
6 Mathematics and Statistics     NA     NA               0         0     NA NA

1.1 Check dataset

dim(df)
[1] 8240   18
names(df)
 [1] "no"              "stud.id"         "name"            "gender"          "age"             "height"         
 [7] "weight"          "religion"        "nc.score"        "semester"        "major"           "minor"          
[13] "score1"          "score2"          "online.tutorial" "graduated"       "salary"          "X"              
summary(df)
       no          stud.id           name              gender               age            height          weight     
 Min.   :   1   Min.   :110250   Length:8240        Length:8240        Min.   :18.00   Min.   :135.0   Min.   : 51.4  
 1st Qu.:2060   1st Qu.:331223   Class :character   Class :character   1st Qu.:20.00   1st Qu.:163.0   1st Qu.: 66.5  
 Median :4120   Median :545132   Mode  :character   Mode  :character   Median :21.00   Median :171.0   Median : 71.8  
 Mean   :4120   Mean   :548935                                         Mean   :22.54   Mean   :171.4   Mean   : 73.0  
 3rd Qu.:6180   3rd Qu.:770854                                         3rd Qu.:23.00   3rd Qu.:180.0   3rd Qu.: 78.5  
 Max.   :8239   Max.   :989901                                         Max.   :64.00   Max.   :206.0   Max.   :116.0  
 NA's   :1      NA's   :1                                              NA's   :1       NA's   :1       NA's   :1      
   religion            nc.score       semester            major              minor               score1     
 Length:8240        Min.   :1.000   Length:8240        Length:8240        Length:8240        Min.   :30.00  
 Class :character   1st Qu.:1.460   Class :character   Class :character   Class :character   1st Qu.:58.00  
 Mode  :character   Median :2.040   Mode  :character   Mode  :character   Mode  :character   Median :70.00  
                    Mean   :2.166                                                            Mean   :68.17  
                    3rd Qu.:2.780                                                            3rd Qu.:78.00  
                    Max.   :4.000                                                            Max.   :97.00  
                    NA's   :1                                                                NA's   :3348   
     score2      online.tutorial    graduated          salary         X          
 Min.   :31.00   Min.   :0.0000   Min.   :0.0000   Min.   :11444   Mode:logical  
 1st Qu.:59.00   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:35207   NA's:8240     
 Median :71.00   Median :0.0000   Median :0.0000   Median :41672                 
 Mean   :69.49   Mean   :0.3862   Mean   :0.2128   Mean   :42522                 
 3rd Qu.:80.00   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:49373                 
 Max.   :98.00   Max.   :1.0000   Max.   :1.0000   Max.   :75597                 
 NA's   :3348    NA's   :1        NA's   :1        NA's   :6487                  

2 Các lệnh trên data frame

R có 2 thành phần chính:

  • Các lệnh subset/extract dữ liệu: table, string, text ==> cần hiểu data structure mà R lưu trữ, và cơ chế để trích xuất/remove/edit data đó ==> clean data, thu được facet của data phù hợp với research question.

  • Các lệnh tính toán, compute dữ liệu: R base function stat, package chuyên môn ==> Dataviz, maps, text anlysis, linear regression, PCA, clustering: thuat toan va example/workflow (Quarto để học viên run lại được trên máy tính), sử dụng case study, real dataset.

df_clean <- df[ 1:30    , - c(1, 18)    ]
df_clean
   stud.id                        name gender age height weight   religion nc.score semester                      major
1   833917         Gonzales, Christina Female  19    160   64.8     Muslim     1.91      1st          Political Science
2   898539              Lozano, T'Hani Female  19    172   73.0      Other     1.56      2nd            Social Sciences
3   379678              Williams, Hanh Female  22    168   70.6 Protestant     1.24      3rd            Social Sciences
4   807564                 Nem, Denzel   Male  19    183   79.7      Other     1.37      2nd     Environmental Sciences
5   383291             Powell, Heather Female  21    175   71.4   Catholic     1.46      1st     Environmental Sciences
6   256074              Perez, Jadrian   Male  19    189   85.8   Catholic     1.34      2nd          Political Science
7   754591               Clardy, Anita Female  21    156   65.9 Protestant     1.11      2nd          Political Science
8   146494        Allen, Rebecca Marie Female  21    167   65.7      Other     2.03      3rd          Political Science
9   723584               Tracy, Robert   Male  18    195   94.4      Other     1.29      3rd      Economics and Finance
10  314281              Nimmons, Laura Female  18    165   66.0   Orthodox     1.19      2nd     Environmental Sciences
11  200803             Lang, Mackenzie Female  22    162   66.8      Other     1.04      4th      Economics and Finance
12  444907          Rodriguez, Brianna Female  18    172   66.8      Other     3.81      3rd     Environmental Sciences
13  354271 Covar Orendain, Christopher   Male  23    185   84.6   Orthodox     1.00      4th     Environmental Sciences
14  317812              Lopez, Monique Female  20    158   64.4   Catholic     2.50      6th     Environmental Sciences
15  604115               Davis, Shagun Female  19    157   66.3   Orthodox     1.92      2nd      Economics and Finance
16  889551                 Adams, Jose   Male  20    172   73.9      Other     3.61      4th Mathematics and Statistics
17  350040             Hines, Haileigh Female  22    156   61.7      Other     2.27      6th          Political Science
18  240279            Daugherty, Jesus   Male  22    182   82.1   Catholic     1.42      1st      Economics and Finance
19  865835               Roybal, Ebony Female  21    162   69.2   Catholic     1.32      3rd          Political Science
20  137196          Baysinger, Tanisha Female  22    168   70.9 Protestant     2.33      2nd     Environmental Sciences
21  708242             Phillips, Laiba Female  20    167   68.5      Other     1.79      4th                    Biology
22  499002         Culbertson, Deshawn   Male  37    175   70.4 Protestant     1.97      2nd          Political Science
23  873149            O Reilly, Joshua   Male  19    164   70.3 Protestant     1.68      2nd          Political Science
24  807361          Johnson, Stephanie Female  38    155   67.0   Catholic     2.30      2nd     Environmental Sciences
25  531029                  Mix, Aziel   Male  23    183   81.8   Catholic     2.11      4th      Economics and Finance
26  970589             Gonzalez, Dixie Female  26    145   54.0      Other     1.14      1st                    Biology
27  250298              Clark, Janelle Female  25    161   66.8      Other     1.45      3rd            Social Sciences
28  763393            Woolsey, Bronson   Male  24    182   80.1 Protestant     1.09      5th      Economics and Finance
29  544433              Diawara, Erica Female  54    169   71.4 Protestant     1.75      2nd          Political Science
30  252935              Lord, Benjamin   Male  22    172   69.6 Protestant     3.94      3rd Mathematics and Statistics
                        minor score1 score2 online.tutorial graduated   salary
1             Social Sciences     NA     NA               0         0       NA
2  Mathematics and Statistics     NA     NA               0         0       NA
3  Mathematics and Statistics     45     46               0         0       NA
4  Mathematics and Statistics     NA     NA               0         0       NA
5  Mathematics and Statistics     NA     NA               0         0       NA
6  Mathematics and Statistics     NA     NA               0         0       NA
7             Social Sciences     NA     NA               0         0       NA
8       Economics and Finance     58     62               0         0       NA
9      Environmental Sciences     57     67               0         0       NA
10 Mathematics and Statistics     NA     NA               0         0       NA
11     Environmental Sciences     62     61               1         1 45254.11
12      Economics and Finance     76     82               0         0       NA
13 Mathematics and Statistics     71     76               1         1 40552.79
14            Social Sciences     66     70               1         1 27007.03
15          Political Science     NA     NA               0         0       NA
16          Political Science     87     91               1         0       NA
17                    Biology     57     54               0         1 33969.16
18     Environmental Sciences     NA     NA               0         0       NA
19     Environmental Sciences     69     46               1         0       NA
20          Political Science     NA     NA               0         0       NA
21      Economics and Finance     77     80               1         0       NA
22     Environmental Sciences     NA     NA               0         0       NA
23     Environmental Sciences     NA     NA               0         0       NA
24                    Biology     NA     NA               0         0       NA
25     Environmental Sciences     69     65               0         0       NA
26     Environmental Sciences     NA     NA               0         0       NA
27      Economics and Finance     45     37               1         0       NA
28            Social Sciences     61     73               1         1 50617.64
29     Environmental Sciences     NA     NA               0         0       NA
30            Social Sciences     89     90               1         0       NA
head(df_clean)
  stud.id                name gender age height weight   religion nc.score semester                  major
1  833917 Gonzales, Christina Female  19    160   64.8     Muslim     1.91      1st      Political Science
2  898539      Lozano, T'Hani Female  19    172   73.0      Other     1.56      2nd        Social Sciences
3  379678      Williams, Hanh Female  22    168   70.6 Protestant     1.24      3rd        Social Sciences
4  807564         Nem, Denzel   Male  19    183   79.7      Other     1.37      2nd Environmental Sciences
5  383291     Powell, Heather Female  21    175   71.4   Catholic     1.46      1st Environmental Sciences
6  256074      Perez, Jadrian   Male  19    189   85.8   Catholic     1.34      2nd      Political Science
                       minor score1 score2 online.tutorial graduated salary
1            Social Sciences     NA     NA               0         0     NA
2 Mathematics and Statistics     NA     NA               0         0     NA
3 Mathematics and Statistics     45     46               0         0     NA
4 Mathematics and Statistics     NA     NA               0         0     NA
5 Mathematics and Statistics     NA     NA               0         0     NA
6 Mathematics and Statistics     NA     NA               0         0     NA
names(df_clean)
 [1] "stud.id"         "name"            "gender"          "age"             "height"          "weight"         
 [7] "religion"        "nc.score"        "semester"        "major"           "minor"           "score1"         
[13] "score2"          "online.tutorial" "graduated"       "salary"         
df_age <- df_clean[ , c(1,3,4,10)]

df_age <- df_clean[ , c("stud.id", "gender", "age", "major")]

df_age
   stud.id gender age                      major
1   833917 Female  19          Political Science
2   898539 Female  19            Social Sciences
3   379678 Female  22            Social Sciences
4   807564   Male  19     Environmental Sciences
5   383291 Female  21     Environmental Sciences
6   256074   Male  19          Political Science
7   754591 Female  21          Political Science
8   146494 Female  21          Political Science
9   723584   Male  18      Economics and Finance
10  314281 Female  18     Environmental Sciences
11  200803 Female  22      Economics and Finance
12  444907 Female  18     Environmental Sciences
13  354271   Male  23     Environmental Sciences
14  317812 Female  20     Environmental Sciences
15  604115 Female  19      Economics and Finance
16  889551   Male  20 Mathematics and Statistics
17  350040 Female  22          Political Science
18  240279   Male  22      Economics and Finance
19  865835 Female  21          Political Science
20  137196 Female  22     Environmental Sciences
21  708242 Female  20                    Biology
22  499002   Male  37          Political Science
23  873149   Male  19          Political Science
24  807361 Female  38     Environmental Sciences
25  531029   Male  23      Economics and Finance
26  970589 Female  26                    Biology
27  250298 Female  25            Social Sciences
28  763393   Male  24      Economics and Finance
29  544433 Female  54          Political Science
30  252935   Male  22 Mathematics and Statistics