StudentsPerformance = read_csv("StudentsPerformance.csv")
str(StudentsPerformance)
## spc_tbl_ [1,000 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ gender : chr [1:1000] "female" "female" "female" "male" ...
## $ race/ethnicity : chr [1:1000] "group B" "group C" "group B" "group A" ...
## $ parental level of education: chr [1:1000] "bachelor's degree" "some college" "master's degree" "associate's degree" ...
## $ lunch : chr [1:1000] "standard" "standard" "standard" "free/reduced" ...
## $ test preparation course : chr [1:1000] "none" "completed" "none" "none" ...
## $ math score : num [1:1000] 72 69 90 47 76 71 88 40 64 38 ...
## $ reading score : num [1:1000] 72 90 95 57 78 83 95 43 64 60 ...
## $ writing score : num [1:1000] 74 88 93 44 75 78 92 39 67 50 ...
## - attr(*, "spec")=
## .. cols(
## .. gender = col_character(),
## .. `race/ethnicity` = col_character(),
## .. `parental level of education` = col_character(),
## .. lunch = col_character(),
## .. `test preparation course` = col_character(),
## .. `math score` = col_double(),
## .. `reading score` = col_double(),
## .. `writing score` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
head(StudentsPerformance,n=15)
## # A tibble: 15 × 8
## gender `race/ethnicity` parental level of educ…¹ lunch test preparation cou…²
## <chr> <chr> <chr> <chr> <chr>
## 1 female group B bachelor's degree stan… none
## 2 female group C some college stan… completed
## 3 female group B master's degree stan… none
## 4 male group A associate's degree free… none
## 5 male group C some college stan… none
## 6 female group B associate's degree stan… none
## 7 female group B some college stan… completed
## 8 male group B some college free… none
## 9 male group D high school free… completed
## 10 female group B high school free… none
## 11 male group C associate's degree stan… none
## 12 male group D associate's degree stan… none
## 13 female group B high school stan… none
## 14 male group A some college stan… completed
## 15 female group A master's degree stan… none
## # ℹ abbreviated names: ¹`parental level of education`,
## # ²`test preparation course`
## # ℹ 3 more variables: `math score` <dbl>, `reading score` <dbl>,
## # `writing score` <dbl>
square_of_math.score=function(x){x^2}
x=StudentsPerformance$`math score`
square_of_math.score(x)
## [1] 5184 4761 8100 2209 5776 5041 7744 1600 4096 1444 3364 1600
## [13] 4225 6084 2500 4761 7744 324 2116 2916 4356 4225 1936 4761
## [25] 5476 5329 4761 4489 4900 3844 4761 3969 3136 1600 9409 6561
## [37] 5476 2500 5625 3249 3025 3364 2809 3481 2500 4225 3025 4356
## [49] 3249 6724 2809 5929 2809 7744 5041 1089 6724 2704 3364 0
## [61] 6241 1521 3844 4761 3481 4489 2025 3600 3721 1521 3364 3969
## [73] 1681 3721 2401 1936 900 6400 3721 3844 2209 2401 2500 5184
## [85] 1764 5329 5776 5041 3364 5329 4225 729 5041 1849 6241 6084
## [97] 4225 3969 3364 4225 6241 4624 7225 3600 9604 3364 7569 4356
## [109] 2704 4900 5929 3844 2916 2601 9801 7056 5625 6084 2601 3025
## [121] 6241 8281 7744 3969 6889 7569 5184 4225 6724 2601 7921 2809
## [133] 7569 5625 5476 3364 2601 4900 3481 5041 5776 3481 1764 3249
## [145] 7744 484 7744 5329 4624 10000 3844 5929 3481 2916 3844 4900
## [157] 4356 3600 3721 4356 6724 5625 2401 2704 6561 9216 2809 3364
## [169] 4624 4489 5184 8836 6241 3969 1849 6561 2116 5041 2704 9409
## [181] 3844 2116 2500 4225 2025 4225 6400 3844 2304 5929 4356 5776
## [193] 3844 5929 4761 3721 3481 3025 2025 6084 4489 4225 4761 3249
## [205] 3481 5476 6724 6561 5476 3364 6400 1225 1764 3600 7569 7056
## [217] 6889 1156 4356 3721 3136 7569 3025 7396 2704 2025 5184 3249
## [229] 4624 7744 5776 2116 4489 8464 6889 6400 3969 4096 2916 7056
## [241] 5329 6400 3136 3481 5625 7225 7921 3364 4225 4624 2209 5041
## [253] 3600 6400 2916 3844 4096 6084 4900 4225 4096 6241 1936 9801
## [265] 5776 3481 3969 4761 7744 5041 4761 3364 2209 4225 7744 6889
## [277] 7225 3481 4225 5329 2809 2025 5329 4900 1369 6561 9409 4489
## [289] 7744 5929 5776 7396 3969 4225 6084 4489 2116 5041 1600 8100
## [301] 6561 3136 4489 6400 5476 4761 9801 2601 2809 2401 5329 4356
## [313] 4489 4624 3481 5041 5929 6889 3969 3136 4489 5625 5041 1849
## [325] 1681 6724 3721 784 6724 1681 5041 2209 3844 8100 6889 3721
## [337] 5776 2401 576 1225 3364 3721 4761 4489 6241 5184 3844 5929
## [349] 5625 7569 2704 4356 3969 2116 3481 3721 3969 1764 3481 6400
## [361] 3364 7225 2704 729 3481 2401 4761 3721 1936 5329 7056 2025
## [373] 5476 6724 3481 2116 6400 7225 5041 4356 6400 7569 6241 1444
## [385] 1444 4489 4096 3249 3844 5329 5329 5929 5776 3249 4225 2304
## [397] 2500 7225 5476 3600 3481 2809 2401 7744 2916 3969 4225 6724
## [409] 2704 7569 4900 7056 5041 3969 2601 7056 5041 5476 4624 3249
## [421] 6724 3249 2209 3481 1681 3844 7396 4761 4225 4624 4096 3721
## [433] 3721 2209 5329 2500 5625 5625 4900 7921 4489 6084 3481 5329
## [445] 6241 4489 4761 7396 2209 6561 4096 10000 4225 4225 2809 1369
## [457] 6241 2809 10000 5184 2809 2916 5041 5929 5625 7056 676 5184
## [469] 5929 8281 6889 3969 4624 3481 8100 5041 5776 6400 3025 5776
## [481] 5329 2704 4624 3481 2401 4900 3721 3600 4096 6241 4225 4096
## [493] 6889 6561 2916 4624 2916 3481 4356 5776 5476 8836 3969 9025
## [505] 1600 6724 4624 3025 6241 7396 5776 4096 3844 2916 5929 5776
## [517] 5476 4356 4356 4489 5041 8281 4761 2916 2809 4624 3136 1296
## [529] 841 3844 4624 2209 3844 6241 5329 4356 2601 2601 7225 9409
## [541] 5625 6241 6561 6724 4096 6084 8464 5184 3844 6241 6241 7569
## [553] 1600 5929 2809 1024 3025 3721 2809 5329 5476 3969 9216 3969
## [565] 2304 2304 8464 3721 3969 4624 5041 8281 2809 2500 5476 1600
## [577] 3721 6561 2304 2809 6561 5929 3969 5329 4761 4225 3025 1936
## [589] 2916 2304 3364 5041 4624 5476 8464 3136 900 2809 4761 4225
## [601] 2916 841 5776 3600 7056 5625 7225 1600 3721 3364 4761 3364
## [613] 8836 4225 6724 3600 1369 7744 9025 4225 1225 3844 3364 10000
## [625] 3721 10000 4761 3721 2401 1936 4489 6241 4356 5625 7056 5041
## [637] 4489 6400 7396 5776 1681 5476 5184 5476 4900 4225 3481 4096
## [649] 2500 4761 2601 4624 7225 4225 5329 3844 5929 4761 1849 8100
## [661] 5476 5329 3025 4225 6400 2500 3969 5929 5329 6561 4356 2704
## [673] 4761 4225 4761 2500 5329 4900 6561 3969 4489 3600 3844 841
## [685] 3844 8836 7225 5929 2809 8649 2401 5329 4356 5929 2401 6241
## [697] 5625 3481 3249 4356 6241 3249 7569 3969 3481 3844 2116 4356
## [709] 7921 1764 8649 6400 9604 6561 3600 5776 5329 9216 5776 8281
## [721] 3844 3025 5476 2500 2209 6561 4225 4624 5329 2809 4624 3025
## [733] 7569 3025 2809 4489 8464 2809 6561 3721 6400 1369 6561 3481
## [745] 3025 5184 4761 4761 2500 7569 5041 4624 6241 5929 3364 7056
## [757] 3025 4900 2704 4761 2809 2304 6084 3844 3600 5476 3364 5776
## [769] 4624 3364 2704 5625 2704 3844 4356 2401 4356 1225 5184 8836
## [781] 2116 5929 5776 2704 8281 1024 5184 361 4624 2704 2304 3600
## [793] 4356 7921 1764 3249 4900 4900 4761 2704 4489 5776 7569 6724
## [805] 5329 5625 4096 1681 8100 3481 2601 2025 2916 7569 5184 8836
## [817] 2025 3721 3600 5929 7225 6084 2401 5041 2304 3844 3136 4225
## [829] 4761 4624 3721 5476 4096 5929 3364 3600 5329 5625 3364 4356
## [841] 1521 4096 529 5476 1600 8100 8281 4096 3481 6400 5041 3721
## [853] 7569 6724 3844 9409 5625 4225 2704 7569 2809 6561 1521 5041
## [865] 9409 6724 3481 3721 6084 2401 3481 4900 6724 8100 1849 6400
## [877] 6561 3249 3481 4096 3969 5041 4096 3025 2601 3844 8649 2916
## [889] 4761 1936 7396 7225 2500 7744 3481 1024 1296 3969 4489 4225
## [901] 7225 5329 1156 8649 4489 7744 3249 6241 4489 4900 2500 4761
## [913] 2704 2209 2116 4624 10000 1936 3249 8281 4761 1225 5184 2916
## [925] 5476 5476 4096 4225 2116 2304 4489 3844 3721 4900 9604 4900
## [937] 4489 3249 7225 5929 5184 6084 6561 3721 3364 2916 6724 2401
## [949] 2401 3249 8836 5625 5476 3364 3844 5184 7056 8464 2025 5625
## [961] 3136 2304 10000 4225 5184 3844 4356 3969 4624 5625 7921 6084
## [973] 2809 2401 2916 4096 3600 3844 3025 8281 64 6561 6241 6084
## [985] 5476 3249 1600 6561 1936 4489 7396 4225 3025 3844 3969 7744
## [997] 3844 3481 4624 5929
filtered_data = StudentsPerformance %>% filter(`math score` >= 70,
`test preparation course` == "completed")
head(filtered_data, n=30)
## # A tibble: 30 × 8
## gender `race/ethnicity` parental level of educ…¹ lunch test preparation cou…²
## <chr> <chr> <chr> <chr> <chr>
## 1 female group B some college stan… completed
## 2 male group A some college stan… completed
## 3 male group D bachelor's degree free… completed
## 4 male group E associate's degree stan… completed
## 5 female group D associate's degree free… completed
## 6 male group C high school stan… completed
## 7 male group E associate's degree free… completed
## 8 female group C some high school free… completed
## 9 female group E associate's degree stan… completed
## 10 male group E bachelor's degree free… completed
## # ℹ 20 more rows
## # ℹ abbreviated names: ¹`parental level of education`,
## # ²`test preparation course`
## # ℹ 3 more variables: `math score` <dbl>, `reading score` <dbl>,
## # `writing score` <dbl>
dep_var = "math score"
indep_var = c("reading score","writing score")
dep_df = StudentsPerformance%>%select(all_of(dep_var))
indep_df = StudentsPerformance%>%select(all_of(indep_var))
joined_df = cbind(dep_df,indep_df)
head(joined_df, n=50)
## math score reading score writing score
## 1 72 72 74
## 2 69 90 88
## 3 90 95 93
## 4 47 57 44
## 5 76 78 75
## 6 71 83 78
## 7 88 95 92
## 8 40 43 39
## 9 64 64 67
## 10 38 60 50
## 11 58 54 52
## 12 40 52 43
## 13 65 81 73
## 14 78 72 70
## 15 50 53 58
## 16 69 75 78
## 17 88 89 86
## 18 18 32 28
## 19 46 42 46
## 20 54 58 61
## 21 66 69 63
## 22 65 75 70
## 23 44 54 53
## 24 69 73 73
## 25 74 71 80
## 26 73 74 72
## 27 69 54 55
## 28 67 69 75
## 29 70 70 65
## 30 62 70 75
## 31 69 74 74
## 32 63 65 61
## 33 56 72 65
## 34 40 42 38
## 35 97 87 82
## 36 81 81 79
## 37 74 81 83
## 38 50 64 59
## 39 75 90 88
## 40 57 56 57
## 41 55 61 54
## 42 58 73 68
## 43 53 58 65
## 44 59 65 66
## 45 50 56 54
## 46 65 54 57
## 47 55 65 62
## 48 66 71 76
## 49 57 74 76
## 50 82 84 82
StudentsPerformance_clean = na.omit(StudentsPerformance)
duplicated(StudentsPerformance)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [37] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [49] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [61] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [73] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [85] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [97] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [109] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [121] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [145] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [157] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [169] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [181] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [193] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [205] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [217] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [229] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [241] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [253] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [277] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [289] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [301] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [313] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [325] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [337] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [349] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [361] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [373] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [385] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [397] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [409] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [421] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [433] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [445] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [457] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [469] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [481] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [493] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [505] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [517] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [529] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [541] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [553] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [565] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [577] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [589] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [601] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [613] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [625] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [637] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [649] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [661] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [673] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [685] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [697] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [709] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [721] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [733] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [745] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [757] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [769] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [781] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [793] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [805] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [817] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [829] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [841] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [853] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [865] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [877] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [889] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [901] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [913] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [925] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [937] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [949] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [961] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [973] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [985] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [997] FALSE FALSE FALSE FALSE
StudentsPerformance_duplicate = StudentsPerformance[!duplicated(StudentsPerformance),]
StudentsPerformance_descendingorder = StudentsPerformance%>%arrange(desc(`math score`))
head(StudentsPerformance_descendingorder,n=10)
## # A tibble: 10 × 8
## gender `race/ethnicity` parental level of educ…¹ lunch test preparation cou…²
## <chr> <chr> <chr> <chr> <chr>
## 1 male group E associate's degree free… completed
## 2 female group E some college stan… none
## 3 female group E bachelor's degree stan… none
## 4 male group A some college stan… completed
## 5 male group D some college stan… completed
## 6 male group E bachelor's degree stan… completed
## 7 female group E associate's degree stan… none
## 8 female group E bachelor's degree stan… completed
## 9 female group E high school stan… none
## 10 male group E some college stan… completed
## # ℹ abbreviated names: ¹`parental level of education`,
## # ²`test preparation course`
## # ℹ 3 more variables: `math score` <dbl>, `reading score` <dbl>,
## # `writing score` <dbl>
StudentsPerformance_Renamed = StudentsPerformance%>%rename(Parent.Edu.Level=`parental level of education`,race_eth=`race/ethnicity`)
print(StudentsPerformance_Renamed)
## # A tibble: 1,000 × 8
## gender race_eth Parent.Edu.Level lunch test preparation cou…¹ `math score`
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 female group B bachelor's degree stand… none 72
## 2 female group C some college stand… completed 69
## 3 female group B master's degree stand… none 90
## 4 male group A associate's degree free/… none 47
## 5 male group C some college stand… none 76
## 6 female group B associate's degree stand… none 71
## 7 female group B some college stand… completed 88
## 8 male group B some college free/… none 40
## 9 male group D high school free/… completed 64
## 10 female group B high school free/… none 38
## # ℹ 990 more rows
## # ℹ abbreviated name: ¹`test preparation course`
## # ℹ 2 more variables: `reading score` <dbl>, `writing score` <dbl>
StudentsPerformance_extended = StudentsPerformance%>%mutate(total.score=`math score` + `reading score` + `writing score`, average.score = total.score/3)
head(StudentsPerformance_extended[,c("math score","reading score","writing score","total.score","average.score")])
## # A tibble: 6 × 5
## `math score` `reading score` `writing score` total.score average.score
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 72 72 74 218 72.7
## 2 69 90 88 247 82.3
## 3 90 95 93 278 92.7
## 4 47 57 44 148 49.3
## 5 76 78 75 229 76.3
## 6 71 83 78 232 77.3
set.seed(123)
train_ratio = 0.7
n = nrow(StudentsPerformance)
train_index = sample(1:n, size = round(train_ratio * n), replace = FALSE)
training_set = StudentsPerformance[train_index, ]
print(training_set)
## # A tibble: 700 × 8
## gender `race/ethnicity` parental level of educ…¹ lunch test preparation cou…²
## <chr> <chr> <chr> <chr> <chr>
## 1 female group C bachelor's degree free… completed
## 2 female group E some college stan… none
## 3 female group B master's degree free… completed
## 4 male group E some college stan… none
## 5 female group C master's degree stan… completed
## 6 female group E high school free… none
## 7 male group D bachelor's degree free… completed
## 8 female group D bachelor's degree stan… none
## 9 male group C high school free… completed
## 10 male group A some high school free… none
## # ℹ 690 more rows
## # ℹ abbreviated names: ¹`parental level of education`,
## # ²`test preparation course`
## # ℹ 3 more variables: `math score` <dbl>, `reading score` <dbl>,
## # `writing score` <dbl>
summary(StudentsPerformance)
## gender race/ethnicity parental level of education
## Length:1000 Length:1000 Length:1000
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## lunch test preparation course math score reading score
## Length:1000 Length:1000 Min. : 0.00 Min. : 17.00
## Class :character Class :character 1st Qu.: 57.00 1st Qu.: 59.00
## Mode :character Mode :character Median : 66.00 Median : 70.00
## Mean : 66.09 Mean : 69.17
## 3rd Qu.: 77.00 3rd Qu.: 79.00
## Max. :100.00 Max. :100.00
## writing score
## Min. : 10.00
## 1st Qu.: 57.75
## Median : 69.00
## Mean : 68.05
## 3rd Qu.: 79.00
## Max. :100.00
#14a. Mean (Using Math Score):
mean(StudentsPerformance$`math score`)
## [1] 66.089
get_mode = function(v){
v_no_na = v[!is.na(v)]
uniq_vals = unique(v_no_na)
uniq_vals[which.max(tabulate(match(v_no_na, uniq_vals)))]
}
numeric_col = StudentsPerformance$`math score`
mode_val = get_mode(numeric_col)
mode_val
## [1] 65
range(StudentsPerformance$`math score`)
## [1] 0 100
ggplot(data=StudentsPerformance, aes(x = `reading score`, y = `math score`)) + geom_point(color = "steelblue")
# Comment: This scatter plot shows the relationship between reading
scores and math scores.
ggplot(data=StudentsPerformance, aes(x = `race/ethnicity`, y = `math score`)) + geom_col(fill = "maroon")
# Comment: This bar plot compares average math scores across different
race/ethnicity groups.
cor(StudentsPerformance$`reading score`, StudentsPerformance$`writing score`)
## [1] 0.9545981
Comment: This displays the internal structure of the dataset, including column names, data types, and sample values. It helps us understand what kinds of variables we are working with.
List the variables in your dataset: