#讀取資料
dta<-Ecdat::Caschool
#查看前六筆資料
head(dta)
## distcod county district grspan enrltot teachers
## 1 75119 Alameda Sunol Glen Unified KK-08 195 10.90
## 2 61499 Butte Manzanita Elementary KK-08 240 11.15
## 3 61549 Butte Thermalito Union Elementary KK-08 1550 82.90
## 4 61457 Butte Golden Feather Union Elementary KK-08 243 14.00
## 5 61523 Butte Palermo Union Elementary KK-08 1335 71.50
## 6 62042 Fresno Burrel Union Elementary KK-08 137 6.40
## calwpct mealpct computer testscr compstu expnstu str avginc
## 1 0.5102 2.0408 67 690.80 0.3435898 6384.911 17.88991 22.690001
## 2 15.4167 47.9167 101 661.20 0.4208333 5099.381 21.52466 9.824000
## 3 55.0323 76.3226 169 643.60 0.1090323 5501.955 18.69723 8.978000
## 4 36.4754 77.0492 85 647.70 0.3497942 7101.831 17.35714 8.978000
## 5 33.1086 78.4270 171 640.85 0.1280899 5235.988 18.67133 9.080333
## 6 12.3188 86.9565 25 605.55 0.1824818 5580.147 21.40625 10.415000
## elpct readscr mathscr
## 1 0.000000 691.6 690.0
## 2 4.583333 660.5 661.9
## 3 30.000002 636.3 650.9
## 4 0.000000 651.9 643.5
## 5 13.857677 641.8 639.9
## 6 12.408759 605.7 605.4
#查看資料格式
str(dta)
## 'data.frame': 420 obs. of 17 variables:
## $ distcod : int 75119 61499 61549 61457 61523 62042 68536 63834 62331 67306 ...
## $ county : Factor w/ 45 levels "Alameda","Butte",..: 1 2 2 2 2 6 29 11 6 25 ...
## $ district: Factor w/ 409 levels "Ackerman Elementary",..: 362 214 367 132 270 53 152 383 263 94 ...
## $ grspan : Factor w/ 2 levels "KK-06","KK-08": 2 2 2 2 2 2 2 2 2 1 ...
## $ enrltot : int 195 240 1550 243 1335 137 195 888 379 2247 ...
## $ teachers: num 10.9 11.1 82.9 14 71.5 ...
## $ calwpct : num 0.51 15.42 55.03 36.48 33.11 ...
## $ mealpct : num 2.04 47.92 76.32 77.05 78.43 ...
## $ computer: int 67 101 169 85 171 25 28 66 35 0 ...
## $ testscr : num 691 661 644 648 641 ...
## $ compstu : num 0.344 0.421 0.109 0.35 0.128 ...
## $ expnstu : num 6385 5099 5502 7102 5236 ...
## $ str : num 17.9 21.5 18.7 17.4 18.7 ...
## $ avginc : num 22.69 9.82 8.98 8.98 9.08 ...
## $ elpct : num 0 4.58 30 0 13.86 ...
## $ readscr : num 692 660 636 652 642 ...
## $ mathscr : num 690 662 651 644 640 ...
#抽樣並畫散佈圖
set.seed(13579)
dta2_1<-dta%>%group_by(county)%>%sample_n(size = 1)
ggplot(dta2_1,aes(x=readscr,y=mathscr))+geom_point()
## ----echo=FALSE,eval=TRUE------------------------------------------------
options(continue=" ")#設定換行符號
## ------------------------------------------------------------------------
options(digits=3)#設定小數點位數
options(width=72) # narrow output
ds = read.csv("http://www.amherst.edu/~nhorton/r2/datasets/help.csv")#讀資料
library(dplyr)#載入Package
newds = select(ds, cesd, female, i1, i2, id, treat, f1a, f1b, f1c, f1d, f1e,
f1f, f1g, f1h, f1i, f1j, f1k, f1l, f1m, f1n, f1o, f1p, f1q, f1r, f1s, f1t)#選擇載入變項
## ------------------------------------------------------------------------
names(newds)#查看變項名稱
## [1] "cesd" "female" "i1" "i2" "id" "treat" "f1a"
## [8] "f1b" "f1c" "f1d" "f1e" "f1f" "f1g" "f1h"
## [15] "f1i" "f1j" "f1k" "f1l" "f1m" "f1n" "f1o"
## [22] "f1p" "f1q" "f1r" "f1s" "f1t"
str(newds[,1:10]) # structure of the first 10 variables
## 'data.frame': 453 obs. of 10 variables:
## $ cesd : int 49 30 39 15 39 6 52 32 50 46 ...
## $ female: int 0 0 0 1 0 1 1 0 1 0 ...
## $ i1 : int 13 56 0 5 10 4 13 12 71 20 ...
## $ i2 : int 26 62 0 5 13 4 20 24 129 27 ...
## $ id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ treat : int 1 1 0 0 0 1 0 1 0 1 ...
## $ f1a : int 3 3 3 0 3 1 3 1 3 2 ...
## $ f1b : int 2 2 2 0 0 0 1 1 2 3 ...
## $ f1c : int 3 0 3 1 3 1 3 2 3 3 ...
## $ f1d : int 0 3 0 3 3 3 1 3 1 0 ...
## ------------------------------------------------------------------------
summary(newds[,1:10]) # summary of the first 10 variables
## cesd female i1 i2
## Min. : 1.0 Min. :0.000 Min. : 0.0 Min. : 0.0
## 1st Qu.:25.0 1st Qu.:0.000 1st Qu.: 3.0 1st Qu.: 3.0
## Median :34.0 Median :0.000 Median : 13.0 Median : 15.0
## Mean :32.8 Mean :0.236 Mean : 17.9 Mean : 22.6
## 3rd Qu.:41.0 3rd Qu.:0.000 3rd Qu.: 26.0 3rd Qu.: 32.0
## Max. :60.0 Max. :1.000 Max. :142.0 Max. :184.0
## id treat f1a f1b
## Min. : 1 Min. :0.000 Min. :0.00 Min. :0.00
## 1st Qu.:119 1st Qu.:0.000 1st Qu.:1.00 1st Qu.:0.00
## Median :233 Median :0.000 Median :2.00 Median :1.00
## Mean :233 Mean :0.497 Mean :1.63 Mean :1.39
## 3rd Qu.:348 3rd Qu.:1.000 3rd Qu.:3.00 3rd Qu.:2.00
## Max. :470 Max. :1.000 Max. :3.00 Max. :3.00
## f1c f1d
## Min. :0.00 Min. :0.00
## 1st Qu.:1.00 1st Qu.:0.00
## Median :2.00 Median :1.00
## Mean :1.92 Mean :1.56
## 3rd Qu.:3.00 3rd Qu.:3.00
## Max. :3.00 Max. :3.00
## ------------------------------------------------------------------------
head(newds, n=3)#檢視前三筆資料
## cesd female i1 i2 id treat f1a f1b f1c f1d f1e f1f f1g f1h f1i f1j
## 1 49 0 13 26 1 1 3 2 3 0 2 3 3 0 2 3
## 2 30 0 56 62 2 1 3 2 0 3 3 2 0 0 3 0
## 3 39 0 0 0 3 0 3 2 3 0 2 2 1 3 2 3
## f1k f1l f1m f1n f1o f1p f1q f1r f1s f1t
## 1 3 0 1 2 2 2 2 3 3 2
## 2 3 0 0 3 0 0 0 2 0 0
## 3 1 0 1 3 2 0 0 3 2 0
## ------------------------------------------------------------------------
comment(newds) = "HELP baseline dataset" #添加文字註腳
comment(newds)#查看註腳
## [1] "HELP baseline dataset"
save(ds, file="savedfile")#存擋
## ------------------------------------------------------------------------
write.csv(ds, file="ds.csv")#把ds匯出成.csv格式
## ------------------------------------------------------------------------
library(foreign)#載入package
write.foreign(newds, "file.dat", "file.sas", package="SAS")#會出成SAS可讀的模式
## ------------------------------------------------------------------------
with(newds, cesd[1:10])#查看cesd前10項的數值
## [1] 49 30 39 15 39 6 52 32 50 46
with(newds, head(cesd, 10))#查看cesd前10項的數值
## [1] 49 30 39 15 39 6 52 32 50 46
## ------------------------------------------------------------------------
with(newds, cesd[cesd > 56])#查看cesd大於56的數值
## [1] 57 58 57 60 58 58 57
## ------------------------------------------------------------------------
library(dplyr)#載入package
filter(newds, cesd > 56) %>% select(id, cesd)#選出cesd大於56的id
## id cesd
## 1 71 57
## 2 127 58
## 3 200 57
## 4 228 60
## 5 273 58
## 6 351 58
## 7 13 57
## ------------------------------------------------------------------------
with(newds, sort(cesd)[1:4])#將cesd從小排到大並查看前四筆
## [1] 1 3 3 4
with(newds, which.min(cesd))#查看cesd的最小值在第幾列
## [1] 199
## ------------------------------------------------------------------------
library(mosaic)#載入package
tally(~ is.na(f1g), data=newds)#查看flg的遺漏值並轉成tbl的樣式
## is.na(f1g)
## TRUE FALSE
## 1 452
favstats(~ f1g, data=newds)#描述統計
## min Q1 median Q3 max mean sd n missing
## 0 1 2 3 3 1.73 1.1 452 1
## ------------------------------------------------------------------------
# reverse code f1d, f1h, f1l and f1p
cesditems = with(newds, cbind(f1a, f1b, f1c, (3 - f1d), f1e, f1f, f1g,
(3 - f1h), f1i, f1j, f1k, (3 - f1l), f1m, f1n, f1o, (3 - f1p),
f1q, f1r, f1s, f1t))#選出反向提重新編碼
nmisscesd = apply(is.na(cesditems), 1, sum)#計算NA數量
ncesditems = cesditems#重新指定成另一筆資料
ncesditems[is.na(cesditems)] = 0#讓遺漏值變為0
newcesd = apply(ncesditems, 1, sum)#計算列的平均
imputemeancesd = 20/(20-nmisscesd)*newcesd#以平均值帶入遺漏值
## ------------------------------------------------------------------------
data.frame(newcesd, newds$cesd, nmisscesd, imputemeancesd)[nmisscesd>0,]
## newcesd newds.cesd nmisscesd imputemeancesd
## 4 15 15 1 15.8
## 17 19 19 1 20.0
## 87 44 44 1 46.3
## 101 17 17 1 17.9
## 154 29 29 1 30.5
## 177 44 44 1 46.3
## 229 39 39 1 41.1
## ----createdrink,message=FALSE-------------------------------------------
library(dplyr)#載入package
library(memisc)#載入package
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
##
## Attaching package: 'memisc'
## The following object is masked from 'package:Matrix':
##
## as.array
## The following objects are masked from 'package:dplyr':
##
## collect, recode, rename
## The following objects are masked from 'package:stats':
##
## contr.sum, contr.treatment, contrasts
## The following object is masked from 'package:base':
##
## as.array
newds = mutate(newds, drinkstat=
cases(
"abstinent" = i1==0,
"moderate" = (i1>0 & i1<=1 & i2<=3 & female==1) |
(i1>0 & i1<=2 & i2<=4 & female==0),
"highrisk" = ((i1>1 | i2>3) & female==1) |
((i1>2 | i2>4) & female==0)))#重新設定類別標籤
## ----echo=FALSE----------------------------------------------------------
library(mosaic)#載入package
## ----echo=FALSE----------------------------------------------------------
detach(package:memisc)#解除package
detach(package:MASS)#解除package
## ------------------------------------------------------------------------
library(dplyr)#載入package
tmpds = select(newds, i1, i2, female, drinkstat)#挑選變項
tmpds[365:370,]#查看第365到370項
## i1 i2 female drinkstat
## 365 6 24 0 highrisk
## 366 6 6 0 highrisk
## 367 0 0 0 abstinent
## 368 0 0 1 abstinent
## 369 8 8 0 highrisk
## 370 32 32 0 highrisk
## ------------------------------------------------------------------------
library(dplyr)#載入package
filter(tmpds, drinkstat=="moderate" & female==1)#選出drinkstat是moderate的女性
## i1 i2 female drinkstat
## 1 1 1 1 moderate
## 2 1 3 1 moderate
## 3 1 2 1 moderate
## 4 1 1 1 moderate
## 5 1 1 1 moderate
## 6 1 1 1 moderate
## 7 1 1 1 moderate
## ----message=FALSE-------------------------------------------------------
library(gmodels)#載入package
with(tmpds, CrossTable(drinkstat))#查看次數表
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 453
##
##
## | abstinent | moderate | highrisk |
## |-----------|-----------|-----------|
## | 68 | 28 | 357 |
## | 0.150 | 0.062 | 0.788 |
## |-----------|-----------|-----------|
##
##
##
##
## ------------------------------------------------------------------------
with(tmpds, CrossTable(drinkstat, female,
prop.t=FALSE, prop.c=FALSE, prop.chisq=FALSE))##橫列是喝酒程度、直欄是性別
##
##
## Cell Contents
## |-------------------------|
## | N |
## | N / Row Total |
## |-------------------------|
##
##
## Total Observations in Table: 453
##
##
## | female
## drinkstat | 0 | 1 | Row Total |
## -------------|-----------|-----------|-----------|
## abstinent | 42 | 26 | 68 |
## | 0.618 | 0.382 | 0.150 |
## -------------|-----------|-----------|-----------|
## moderate | 21 | 7 | 28 |
## | 0.750 | 0.250 | 0.062 |
## -------------|-----------|-----------|-----------|
## highrisk | 283 | 74 | 357 |
## | 0.793 | 0.207 | 0.788 |
## -------------|-----------|-----------|-----------|
## Column Total | 346 | 107 | 453 |
## -------------|-----------|-----------|-----------|
##
##
## ------------------------------------------------------------------------
newds = transform(newds,
gender=factor(female, c(0,1), c("Male","Female")))#將性別變項中的0,1對應到male和female
tally(~ female + gender, margin=FALSE, data=newds)
## gender
## female Male Female
## 0 346 0
## 1 0 107
## ------------------------------------------------------------------------
library(dplyr)#載入package
newds = arrange(ds, cesd, i1)#排序
newds[1:5, c("cesd", "i1", "id")]#查看1到5筆資料
## cesd i1 id
## 1 1 3 233
## 2 3 1 139
## 3 3 13 418
## 4 4 4 251
## 5 4 9 95
## ------------------------------------------------------------------------
library(dplyr)#載入package
females = filter(ds, female==1)#選出ds中的女性
with(females, mean(cesd))#計算女性的平均cesd
## [1] 36.9
# an alternative approach
mean(ds$cesd[ds$female==1])#計算女性的平均cesd
## [1] 36.9
## ------------------------------------------------------------------------
with(ds, tapply(cesd, female, mean))#計算平均
## 0 1
## 31.6 36.9
library(mosaic)#載入package
mean(cesd ~ female, data=ds)#以female來界定根據什麼的平均
## 0 1
## 31.6 36.9
#查看資料前六筆以及資料結構
dta4<-HSAUR3::backpain
head(dta4)
## ID status driver suburban
## 1 1 case yes yes
## 2 1 control yes no
## 3 2 case yes yes
## 4 2 control yes yes
## 5 3 case yes no
## 6 3 control yes yes
str(dta4)
## 'data.frame': 434 obs. of 4 variables:
## $ ID : Factor w/ 217 levels "1","2","3","4",..: 1 1 2 2 3 3 4 4 5 5 ...
## $ status : Factor w/ 2 levels "case","control": 1 2 1 2 1 2 1 2 1 2 ...
## $ driver : Factor w/ 2 levels "no","yes": 2 2 2 2 2 2 1 1 2 2 ...
## $ suburban: Factor w/ 2 levels "no","yes": 2 1 2 2 1 2 1 1 1 2 ...
dta4<-dta4 %>%
group_by(status,driver,suburban) %>%
summarise(n = n()) %>%
ungroup() %>%
tidyr::spread(status,n) %>%
mutate(total=case+control); dta4
## # A tibble: 4 x 5
## driver suburban case control total
## <fct> <fct> <int> <int> <int>
## 1 no no 26 47 73
## 2 no yes 6 7 13
## 3 yes no 64 63 127
## 4 yes yes 121 100 221
#載入package
library(datasets)
#合併資料
dta5<-merge(state.x77, USArrests,"row.names")
#計算相關
cor(dta5[,-1])
## Population Income Illiteracy Life Exp Murder.x HS Grad
## Population 1.0000 0.2082 0.1076 -0.0681 0.3436 -0.0985
## Income 0.2082 1.0000 -0.4371 0.3403 -0.2301 0.6199
## Illiteracy 0.1076 -0.4371 1.0000 -0.5885 0.7030 -0.6572
## Life Exp -0.0681 0.3403 -0.5885 1.0000 -0.7808 0.5822
## Murder.x 0.3436 -0.2301 0.7030 -0.7808 1.0000 -0.4880
## HS Grad -0.0985 0.6199 -0.6572 0.5822 -0.4880 1.0000
## Frost -0.3322 0.2263 -0.6719 0.2621 -0.5389 0.3668
## Area 0.0225 0.3633 0.0773 -0.1073 0.2284 0.3335
## Murder.y 0.3202 -0.2152 0.7068 -0.7785 0.9337 -0.5216
## Assault 0.3170 0.0409 0.5110 -0.6267 0.7398 -0.2303
## UrbanPop 0.5126 0.4805 -0.0622 0.2715 0.0164 0.3587
## Rape 0.3052 0.3574 0.1546 -0.2696 0.5800 0.2707
## Frost Area Murder.y Assault UrbanPop Rape
## Population -0.3322 0.0225 0.3202 0.3170 0.5126 0.305
## Income 0.2263 0.3633 -0.2152 0.0409 0.4805 0.357
## Illiteracy -0.6719 0.0773 0.7068 0.5110 -0.0622 0.155
## Life Exp 0.2621 -0.1073 -0.7785 -0.6267 0.2715 -0.270
## Murder.x -0.5389 0.2284 0.9337 0.7398 0.0164 0.580
## HS Grad 0.3668 0.3335 -0.5216 -0.2303 0.3587 0.271
## Frost 1.0000 0.0592 -0.5414 -0.4682 -0.2462 -0.279
## Area 0.0592 1.0000 0.1481 0.2312 -0.0615 0.525
## Murder.y -0.5414 0.1481 1.0000 0.8019 0.0696 0.564
## Assault -0.4682 0.2312 0.8019 1.0000 0.2589 0.665
## UrbanPop -0.2462 -0.0615 0.0696 0.2589 1.0000 0.411
## Rape -0.2792 0.5250 0.5636 0.6652 0.4113 1.000
#圖表
GGally::ggpairs(dta5[,-1])
####從圖中看不太出結論
#載入資料庫
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(tibble)
#整理資料
dta6<-merge(rownames_to_column(mammals),rownames_to_column(Animals), all = TRUE)
#查看是否有重複
duplicated(dta6)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [67] FALSE
#讀資料
dta7<-Ecdat::Caschool
#新增變項並查看新資料樣式
dta7<- dta7%>%
mutate(ratio = enrltot/teachers,
Reading = cut(readscr, breaks = quantile(readscr, probs = c(0, .33, .67, 1)),
label = c("L", "M", "H"), ordered = T))
head(dta7)
## distcod county district grspan enrltot
## 1 75119 Alameda Sunol Glen Unified KK-08 195
## 2 61499 Butte Manzanita Elementary KK-08 240
## 3 61549 Butte Thermalito Union Elementary KK-08 1550
## 4 61457 Butte Golden Feather Union Elementary KK-08 243
## 5 61523 Butte Palermo Union Elementary KK-08 1335
## 6 62042 Fresno Burrel Union Elementary KK-08 137
## teachers calwpct mealpct computer testscr compstu expnstu str avginc
## 1 10.9 0.51 2.04 67 691 0.344 6385 17.9 22.69
## 2 11.1 15.42 47.92 101 661 0.421 5099 21.5 9.82
## 3 82.9 55.03 76.32 169 644 0.109 5502 18.7 8.98
## 4 14.0 36.48 77.05 85 648 0.350 7102 17.4 8.98
## 5 71.5 33.11 78.43 171 641 0.128 5236 18.7 9.08
## 6 6.4 12.32 86.96 25 606 0.182 5580 21.4 10.41
## elpct readscr mathscr ratio Reading
## 1 0.00 692 690 17.9 H
## 2 4.58 660 662 21.5 M
## 3 30.00 636 651 18.7 L
## 4 0.00 652 644 17.4 M
## 5 13.86 642 640 18.7 L
## 6 12.41 606 605 21.4 L
#繪圖
library(lattice)
xyplot(readscr~ratio|Reading,data=dta7,
type=c("p","g","r"),layout=c(3,1),
xlab="Student-Teacher Ratio",
ylab="Reading Score")
#讀資料
dta8<-car::Prestige
head(dta8)
## education income women prestige census type
## gov.administrators 13.1 12351 11.16 68.8 1113 prof
## general.managers 12.3 25879 4.02 69.1 1130 prof
## accountants 12.8 9271 15.70 63.4 1171 prof
## purchasing.officers 11.4 8865 9.11 56.8 1175 prof
## chemists 14.6 8403 11.68 73.5 2111 prof
## physicists 15.6 11030 5.13 77.6 2113 prof
str(dta8)
## 'data.frame': 102 obs. of 6 variables:
## $ education: num 13.1 12.3 12.8 11.4 14.6 ...
## $ income : int 12351 25879 9271 8865 8403 11030 8258 14163 11377 11023 ...
## $ women : num 11.16 4.02 15.7 9.11 11.68 ...
## $ prestige : num 68.8 69.1 63.4 56.8 73.5 77.6 72.6 78.1 73.1 68.8 ...
## $ census : int 1113 1130 1171 1175 2111 2113 2133 2141 2143 2153 ...
## $ type : Factor w/ 3 levels "bc","prof","wc": 2 2 2 2 2 2 2 2 2 2 ...
#Find the median prestige score for each of the three types of occupation, respectively.
dta8_1<-dta8%>%
group_by(type) %>%
summarize(m=median(prestige,na.rm=TRUE));dta8_1
## # A tibble: 4 x 2
## type m
## <fct> <dbl>
## 1 bc 35.9
## 2 prof 68.4
## 3 wc 41.5
## 4 <NA> 35.0
#整理資料
dta8_2<-dta8 %>%
na.omit%>%
group_by(type) %>%
mutate(m=median(prestige),
PrestigeLV=memisc::cases("H"=prestige>=m,"Low"=prestige < m))
#繪圖
xyplot(income~education|type,
group=PrestigeLV,
data=dta8_2,
type=c("p","g","r"),
layout=c(3, 1),
auto.key=list(space="right", row=2),
xlab="Average education of occupational incumbents in 1971",
ylab="Average income of incumbents in 1971")
####三種職業的聲望對於教育和收入之間的效果不一致
#讀資料並查看資料格式
dta9<-mlmRev::Hsb82
dta9$school<-factor(dta9$school)
head(dta9)
## school minrty sx ses mAch meanses sector cses
## 1 1224 No Female -1.528 5.88 -0.434 Public -1.0936
## 2 1224 No Female -0.588 19.71 -0.434 Public -0.1536
## 3 1224 No Male -0.528 20.35 -0.434 Public -0.0936
## 4 1224 No Male -0.668 8.78 -0.434 Public -0.2336
## 5 1224 No Male -0.158 17.90 -0.434 Public 0.2764
## 6 1224 No Male 0.022 4.58 -0.434 Public 0.4564
str(dta9)
## 'data.frame': 7185 obs. of 8 variables:
## $ school : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
## $ minrty : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ sx : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
## $ ses : num -1.528 -0.588 -0.528 -0.668 -0.158 ...
## $ mAch : num 5.88 19.71 20.35 8.78 17.9 ...
## $ meanses: num -0.434 -0.434 -0.434 -0.434 -0.434 ...
## $ sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
## $ cses : num -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...
#計算95CI
dta9_1 <- dta9 %>%
group_by(sector, school) %>%
summarise(m = mean(mAch, na.rm = TRUE),
s = sd(mAch, na.rm = TRUE),
n = n(),
se = s/sqrt(n)) %>%
mutate(lower.ci = m-2*se, upper.ci = m+2*se)
head(dta9_1)
## # A tibble: 6 x 8
## # Groups: sector [1]
## sector school m s n se lower.ci upper.ci
## <fct> <ord> <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1 Public 8367 4.55 4.43 14 1.18 2.19 6.92
## 2 Public 8854 4.24 5.41 32 0.956 2.33 6.15
## 3 Public 4458 5.81 4.48 48 0.646 4.52 7.10
## 4 Public 5762 4.32 4.99 37 0.821 2.68 5.97
## 5 Public 6990 5.98 5.31 53 0.729 4.52 7.44
## 6 Public 5815 7.27 5.59 25 1.12 5.04 9.51
#繪圖
ggplot(dta9_1, aes(x = school, y = m)) +
geom_point() +
geom_errorbar(aes(ymax = upper.ci, ymin = lower.ci))+
coord_flip()+
facet_wrap(~sector)+
labs(x = "School",y = "Math Mean Score by School")