data()
data(package = .packages(all.available = TRUE))
data("USArrests")
mydata <- force(USArrests)
head(mydata)
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
summary(mydata)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
library(psych)
mydata2 <- force(sat.act)
head(mydata2)
## gender education age ACT SATV SATQ
## 29442 2 3 19 24 500 500
## 29457 2 3 23 35 600 500
## 29498 2 3 20 21 480 470
## 29503 1 4 27 26 550 520
## 29504 1 2 33 31 600 550
## 29518 1 5 26 28 640 640
colnames(mydata2) <- c("Gender", "Education", "Age", "General", "Verbal", "Quantitative")
head(mydata2)
## Gender Education Age General Verbal Quantitative
## 29442 2 3 19 24 500 500
## 29457 2 3 23 35 600 500
## 29498 2 3 20 21 480 470
## 29503 1 4 27 26 550 520
## 29504 1 2 33 31 600 550
## 29518 1 5 26 28 640 640
colnames(mydata2)[4] <- "general"
colnames(mydata2)[4] <- "General"
mydata2$GenderF <- factor(mydata2$Gender, levels = c(1,2), labels = c("M", "F"))
head(mydata2)
## Gender Education Age General Verbal Quantitative GenderF
## 29442 2 3 19 24 500 500 F
## 29457 2 3 23 35 600 500 F
## 29498 2 3 20 21 480 470 F
## 29503 1 4 27 26 550 520 M
## 29504 1 2 33 31 600 550 M
## 29518 1 5 26 28 640 640 M
summary(mydata2)
## Gender Education Age General
## Min. :1.000 Min. :0.000 Min. :13.00 Min. : 3.00
## 1st Qu.:1.000 1st Qu.:3.000 1st Qu.:19.00 1st Qu.:25.00
## Median :2.000 Median :3.000 Median :22.00 Median :29.00
## Mean :1.647 Mean :3.164 Mean :25.59 Mean :28.55
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:29.00 3rd Qu.:32.00
## Max. :2.000 Max. :5.000 Max. :65.00 Max. :36.00
##
## Verbal Quantitative GenderF
## Min. :200.0 Min. :200.0 M:247
## 1st Qu.:550.0 1st Qu.:530.0 F:453
## Median :620.0 Median :620.0
## Mean :612.2 Mean :610.2
## 3rd Qu.:700.0 3rd Qu.:700.0
## Max. :800.0 Max. :800.0
## NA's :13
mydata2F <- mydata2[mydata2$GenderF == "F" , ]
mean(mydata2F$Age)
## [1] 25.44812
describeBy(mydata2$Age, group = mydata2$GenderF)
##
## Descriptive statistics by group
## group: M
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 247 25.86 9.74 22 24.23 5.93 14 58 44 1.43 1.43 0.62
## ------------------------------------------------------------
## group: F
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 453 25.45 9.37 22 23.7 5.93 13 65 52 1.77 3.03 0.44
#install.packages("tidyr")
library(tidyr)
mydata2 <- drop_na(mydata2)
mydata3 <- mydata2[mydata2$Quantitative >=600 & mydata2$Quantitative <= 700 , ]
head(mydata3)
## Gender Education Age General Verbal Quantitative GenderF
## 6 1 5 26 28 640 640 M
## 9 2 4 23 22 400 600 F
## 12 2 4 34 29 710 600 F
## 13 1 4 32 21 600 600 M
## 15 2 3 20 27 640 630 F
## 17 2 3 19 33 640 650 F