# read the data
library(MASS)
data(nlschools, package="MASS")
knitr::kable(head(nlschools))
| 46 |
15.0 |
180 |
29 |
23 |
0 |
| 45 |
14.5 |
180 |
29 |
10 |
0 |
| 33 |
9.5 |
180 |
29 |
15 |
0 |
| 46 |
11.0 |
180 |
29 |
23 |
0 |
| 20 |
8.0 |
180 |
29 |
10 |
0 |
| 30 |
9.5 |
180 |
29 |
10 |
0 |
# check the structural of the data and learn that there are 2287 obs. of 6 variables. The class level is 133.
str(nlschools)
## 'data.frame': 2287 obs. of 6 variables:
## $ lang : int 46 45 33 46 20 30 30 57 36 36 ...
## $ IQ : num 15 14.5 9.5 11 8 9.5 9.5 13 9.5 11 ...
## $ class: Factor w/ 133 levels "180","280","1082",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ GS : int 29 29 29 29 29 29 29 29 29 29 ...
## $ SES : int 23 10 15 23 10 10 23 10 13 15 ...
## $ COMB : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
library(tibble)
dta <- as_tibble(MASS::nlschools)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
#1. group the data by class level
#2. summarize the data by the language mean, lanauge SE, lanaguage lb and lanaguage ub.
#3. mutate a new variable "classID"
dta1 <- dta %>% group_by(class) %>%
dplyr::summarize(IQ_mean=mean(IQ, na.rm=T), language_mean=mean(lang, na.rm=T), language_se=sd(lang, na.rm=T)/sqrt(n()), language_lb=language_mean-1.96*language_se, language_ub=language_mean+1.96*language_se)%>%
mutate(classID=paste0(001:133))
# the number digits is 5
options(digits=5)
# 1. select the column
# 2. check the tail 3 rows
dta1 %>% select(classID,IQ_mean,language_mean, language_lb,language_ub) %>%as.data.frame() %>% tail(., 3)
## classID IQ_mean language_mean language_lb language_ub
## 131 131 11.273 38.091 34.733 41.449
## 132 132 10.550 29.300 21.067 37.533
## 133 133 10.643 28.429 23.263 33.594