library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
sat = read.csv(file = "https://raw.githubusercontent.com/LeJQC/Bridge-Course/main/satgpa.csv")
summary(sat)
## X sex sat_v sat_m
## Min. : 1.0 Min. :1.000 Min. :24.00 Min. :29.0
## 1st Qu.: 250.8 1st Qu.:1.000 1st Qu.:43.00 1st Qu.:49.0
## Median : 500.5 Median :1.000 Median :49.00 Median :55.0
## Mean : 500.5 Mean :1.484 Mean :48.93 Mean :54.4
## 3rd Qu.: 750.2 3rd Qu.:2.000 3rd Qu.:54.00 3rd Qu.:60.0
## Max. :1000.0 Max. :2.000 Max. :76.00 Max. :77.0
## sat_sum hs_gpa fy_gpa
## Min. : 53.0 Min. :1.800 Min. :0.000
## 1st Qu.: 93.0 1st Qu.:2.800 1st Qu.:1.980
## Median :103.0 Median :3.200 Median :2.465
## Mean :103.3 Mean :3.198 Mean :2.468
## 3rd Qu.:113.0 3rd Qu.:3.700 3rd Qu.:3.020
## Max. :144.0 Max. :4.500 Max. :4.000
mean(sat$sat_sum)
## [1] 103.329
mean(sat$hs_gpa)
## [1] 3.1981
median(sat$sat_sum)
## [1] 103
median(sat$hs_gpa)
## [1] 3.2
above_average <- subset (sat, sat_sum > 103)
above_average <- select(above_average, -"X", -"fy_gpa")
head(above_average)
## sex sat_v sat_m sat_sum hs_gpa
## 1 1 65 62 127 3.40
## 2 2 58 64 122 4.00
## 3 2 56 60 116 3.75
## 5 1 55 52 107 4.00
## 6 2 55 56 111 4.00
## 7 1 57 65 122 2.80
colnames(above_average) <- c("Gender","Verbal", "Math", "Total_Score", "HS_GPA")
head(above_average)
## Gender Verbal Math Total_Score HS_GPA
## 1 1 65 62 127 3.40
## 2 2 58 64 122 4.00
## 3 2 56 60 116 3.75
## 5 1 55 52 107 4.00
## 6 2 55 56 111 4.00
## 7 1 57 65 122 2.80
summary(above_average)
## Gender Verbal Math Total_Score HS_GPA
## Min. :1.000 Min. :37.00 Min. :42.00 Min. :104 Min. :2.000
## 1st Qu.:1.000 1st Qu.:50.00 1st Qu.:56.00 1st Qu.:108 1st Qu.:3.000
## Median :1.000 Median :54.00 Median :60.00 Median :113 Median :3.500
## Mean :1.404 Mean :54.83 Mean :60.17 Mean :115 Mean :3.378
## 3rd Qu.:2.000 3rd Qu.:59.00 3rd Qu.:64.00 3rd Qu.:121 3rd Qu.:3.800
## Max. :2.000 Max. :76.00 Max. :77.00 Max. :144 Max. :4.000
mean(above_average$Total_Score)
## [1] 115.0062
median(above_average$Total_Score)
## [1] 113
mean(above_average$HS_GPA)
## [1] 3.377629
median(above_average$HS_GPA)
## [1] 3.5
The mean and mean of the above_average dataset is higher than the orginial date set.
above_average$Gender <- ifelse(above_average$Gender == "1", "male", "female")
head(above_average)
## Gender Verbal Math Total_Score HS_GPA
## 1 male 65 62 127 3.40
## 2 female 58 64 122 4.00
## 3 female 56 60 116 3.75
## 5 male 55 52 107 4.00
## 6 female 55 56 111 4.00
## 7 male 57 65 122 2.80
head(above_average)
## Gender Verbal Math Total_Score HS_GPA
## 1 male 65 62 127 3.40
## 2 female 58 64 122 4.00
## 3 female 56 60 116 3.75
## 5 male 55 52 107 4.00
## 6 female 55 56 111 4.00
## 7 male 57 65 122 2.80