data(hsb) #Career choice of high school students
View(hsb) #View hsb
dim(hsb) # how many columns and rows
## [1] 200 11
str(hsb) # structure of hsb
## 'data.frame': 200 obs. of 11 variables:
## $ id : int 70 121 86 141 172 113 50 11 84 48 ...
## $ gender : Factor w/ 2 levels "female","male": 2 1 2 2 2 2 2 2 2 2 ...
## $ race : Factor w/ 4 levels "african-amer",..: 4 4 4 4 4 4 1 3 4 1 ...
## $ ses : Factor w/ 3 levels "high","low","middle": 2 3 1 1 3 3 3 3 3 3 ...
## $ schtyp : Factor w/ 2 levels "private","public": 2 2 2 2 2 2 2 2 2 2 ...
## $ prog : Factor w/ 3 levels "academic","general",..: 2 3 2 3 1 1 2 1 2 1 ...
## $ read : int 57 68 44 63 47 44 50 34 63 57 ...
## $ write : int 52 59 33 44 52 52 59 46 57 55 ...
## $ math : int 41 53 54 47 57 51 42 45 54 52 ...
## $ science: int 47 63 58 53 53 63 53 39 58 50 ...
## $ socst : int 57 61 31 56 61 61 61 36 51 51 ...
head(hsb) # the first six data of hsb
## id gender race ses schtyp prog read write math science socst
## 1 70 male white low public general 57 52 41 47 57
## 2 121 female white middle public vocation 68 59 53 63 61
## 3 86 male white high public general 44 33 54 58 31
## 4 141 male white high public vocation 63 44 47 53 56
## 5 172 male white middle public academic 47 52 57 53 61
## 6 113 male white middle public academic 44 52 51 63 61
tail(hsb) # the last six data of hsb
## id gender race ses schtyp prog read write math science socst
## 195 179 female white middle private academic 47 65 60 50 56
## 196 31 female asian middle private general 55 59 52 42 56
## 197 145 female white middle public vocation 42 46 38 36 46
## 198 187 female white middle private general 57 41 57 55 52
## 199 118 female white middle public general 55 62 58 58 61
## 200 137 female white high public academic 63 65 65 53 61
sum(is.na(hsb)) # to find if there has a NA value
## [1] 0
summary(hsb) # the summary of hsb
## id gender race ses schtyp
## Min. : 1.00 female:109 african-amer: 20 high :58 private: 32
## 1st Qu.: 50.75 male : 91 asian : 11 low :47 public :168
## Median :100.50 hispanic : 24 middle:95
## Mean :100.50 white :145
## 3rd Qu.:150.25
## Max. :200.00
## prog read write math science
## academic:105 Min. :28.00 Min. :31.00 Min. :33.00 Min. :26.00
## general : 45 1st Qu.:44.00 1st Qu.:45.75 1st Qu.:45.00 1st Qu.:44.00
## vocation: 50 Median :50.00 Median :54.00 Median :52.00 Median :53.00
## Mean :52.23 Mean :52.77 Mean :52.65 Mean :51.85
## 3rd Qu.:60.00 3rd Qu.:60.00 3rd Qu.:59.00 3rd Qu.:58.00
## Max. :76.00 Max. :67.00 Max. :75.00 Max. :74.00
## socst
## Min. :26.00
## 1st Qu.:46.00
## Median :52.00
## Mean :52.41
## 3rd Qu.:61.00
## Max. :71.00
The data was collected as a subset of the “High School and Beyond” study conducted by the National Education Longitudinal Studies (NELS) program of the National Center for Education Statistics (NCES).
For the detail of variables: id:ID of student
gender:a factor with levels female male
race:a factor with levels african-amer asian hispanic white
ses:socioeconomic class - a factor with levels high low middle
schtyp:school type - a factor with levels private public
prog:choice of high school program - a factor with levels academic general vocation
read:reading score
write:writing score
math:math score
science:science score
socst:social science score
As the results of function show out, we can find that hsb contains 200 rows and 11 columns. HSB is a data frame data. We can use head() and tail() function to see the first and the last six data of hsb. I use is.na() function to find out that there is no NA value. And for the last part, I summary the hsb data.
請計算不同性別、族群學生,在五個科目上的平均數、標準差、偏態與峰度
tapply(hsb$read, hsb$gender, mean)
## female male
## 51.73394 52.82418
tapply(hsb$write, hsb$gender, mean)
## female male
## 54.99083 50.12088
tapply(hsb$math, hsb$gender, mean)
## female male
## 52.39450 52.94505
tapply(hsb$science, hsb$gender, mean)
## female male
## 50.69725 53.23077
tapply(hsb$socst, hsb$gender, mean)
## female male
## 52.91743 51.79121
tapply(hsb$read, hsb$gender, sd)
## female male
## 10.05783 10.50671
tapply(hsb$write, hsb$gender, sd)
## female male
## 8.133715 10.305161
tapply(hsb$math, hsb$gender, sd)
## female male
## 9.151015 9.664784
tapply(hsb$science, hsb$gender, sd)
## female male
## 9.038503 10.732171
tapply(hsb$socst, hsb$gender, sd)
## female male
## 10.23441 11.33384
tapply(hsb$read, hsb$gender, skewness)
## female male
## 0.32341745 0.04674873
tapply(hsb$write, hsb$gender, skewness)
## female male
## -0.5899993 -0.1798980
tapply(hsb$math, hsb$gender, skewness)
## female male
## 0.2346739 0.3256960
tapply(hsb$science, hsb$gender, skewness)
## female male
## -0.130718 -0.345221
tapply(hsb$socst, hsb$gender, skewness)
## female male
## -0.3532812 -0.3713532
tapply(hsb$read, hsb$gender, kurtosis)
## female male
## 2.500028 2.262737
tapply(hsb$write, hsb$gender, kurtosis)
## female male
## 2.544105 1.872877
tapply(hsb$math, hsb$gender, kurtosis)
## female male
## 2.284784 2.356806
tapply(hsb$science, hsb$gender, kurtosis)
## female male
## 2.510875 2.371868
tapply(hsb$socst, hsb$gender, kurtosis)
## female male
## 2.519207 2.335229
I use tapply() function to see the descriptive function of different genders in 5 kinds of score.
tapply(hsb$read, hsb$race, mean)
## african-amer asian hispanic white
## 46.80000 51.90909 46.66667 53.92414
tapply(hsb$write, hsb$race, mean)
## african-amer asian hispanic white
## 48.20000 58.00000 46.45833 54.05517
tapply(hsb$math, hsb$race, mean)
## african-amer asian hispanic white
## 46.75000 57.27273 47.41667 53.97241
tapply(hsb$science, hsb$race, mean)
## african-amer asian hispanic white
## 42.80000 51.45455 45.37500 54.20000
tapply(hsb$socst, hsb$race, mean)
## african-amer asian hispanic white
## 49.45000 51.00000 47.79167 53.68276
tapply(hsb$read, hsb$race, sd)
## african-amer asian hispanic white
## 7.120024 7.660999 10.239169 10.276783
tapply(hsb$write, hsb$race, sd)
## african-amer asian hispanic white
## 9.322299 7.899367 8.272422 9.172558
tapply(hsb$math, hsb$race, sd)
## african-amer asian hispanic white
## 6.487843 10.120187 6.983936 9.383011
tapply(hsb$science, hsb$race, sd)
## african-amer asian hispanic white
## 9.445690 9.490665 8.218815 9.094870
tapply(hsb$socst, hsb$race, sd)
## african-amer asian hispanic white
## 10.850540 9.746794 9.250049 10.813253
tapply(hsb$read, hsb$race, skewness)
## african-amer asian hispanic white
## 0.56341685 -0.14903573 0.64286691 0.05686143
tapply(hsb$write, hsb$race, skewness)
## african-amer asian hispanic white
## 0.2445555 -0.8732263 0.3420989 -0.7452076
tapply(hsb$math, hsb$race, skewness)
## african-amer asian hispanic white
## 1.5769616 -0.2921502 0.1538455 0.1120681
tapply(hsb$science, hsb$race, skewness)
## african-amer asian hispanic white
## 0.1618054 -0.3239360 0.2121507 -0.2394562
tapply(hsb$socst, hsb$race, skewness)
## african-amer asian hispanic white
## -0.3779222 0.3398069 0.1741367 -0.5568882
tapply(hsb$read, hsb$race, kurtosis)
## african-amer asian hispanic white
## 4.394131 2.457166 3.381917 2.203770
tapply(hsb$write, hsb$race, kurtosis)
## african-amer asian hispanic white
## 2.069851 2.518799 2.777992 2.710911
tapply(hsb$math, hsb$race, kurtosis)
## african-amer asian hispanic white
## 5.739716 1.843283 2.494265 2.351510
tapply(hsb$science, hsb$race, kurtosis)
## african-amer asian hispanic white
## 1.833490 2.240411 3.361737 2.506333
tapply(hsb$socst, hsb$race, kurtosis)
## african-amer asian hispanic white
## 2.553280 2.849030 2.218556 2.641000
I use tapply() function to see the descriptive function of different races in 5 kinds of score.
plot(hsb$gender) #the plot of gender
plot(hsb$race) #the plot of race
par(mfrow=c(2,3))
plot(hsb$gender,hsb$read,main="Read score",col="darkgreen",xlab="gender",ylab="score")
plot(hsb$gender,hsb$write,main="Write score",col="yellow",xlab="gender",ylab="score")
plot(hsb$gender,hsb$math,main="Math score",col="gray",xlab="gender",ylab="score")
plot(hsb$gender,hsb$science,main="Science score",col="darkblue",xlab="gender",ylab="score")
plot(hsb$gender,hsb$socst,main="Socst score",col="darkred",xlab="gender",ylab="score")
par(mfrow=c(2,3))
plot(hsb$race,hsb$read,main="Read score",col="darkgreen",xlab="race",ylab="score")
plot(hsb$race,hsb$write,main="Write score",col="yellow",xlab="race",ylab="score")
plot(hsb$race,hsb$math,main="Math score",col="gray",xlab="race",ylab="score")
plot(hsb$race,hsb$science,main="Science score",col="darkblue",xlab="race",ylab="score")
plot(hsb$race,hsb$socst,main="Socst score",col="darkred",xlab="race",ylab="score")
I use the plot() function to see the difference about gender and race in different scores. It’s clear to see the mean and outliers. The write scores among gender has a great difference. And among the race plot, lots of scores except socst have a great difference.