install.packages("moonBook")
install.packages("devtools")
devtools::install_github("cardiomoon/moonBook")
devtools::install_github("cardiomoon/ztable")
require(moonBook) # library(moonBook)
Loading required package: moonBook
if (!file.exists("data")) {
dir.create("data")
}
fileUrl <- "https://data.baltimorecity.gov/api/views/dz54-2aru/rows.csv?accessType=DOWNLOAD"
download.file(fileUrl, destfile = "./data/cameras.csv", method = "curl")
dateDownloaded <- date()
cameraData <- read.table("./data/cameras.csv", sep = ",", header = TRUE)
head(cameraData)
address direction street crossStreet
1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
intersection Location.1
1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
6 Erdman & Macon St (39.3068045671, -76.5593167803)
read.csv sets sep=“,” and header=TRUE
cameraData <- read.csv("./data/cameras.csv")
head(cameraData)
address direction street crossStreet
1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
intersection Location.1
1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
6 Erdman & Macon St (39.3068045671, -76.5593167803)
In my experience, the biggest trouble with reading flat files are quotation marks ` or " placed in data values, setting quote=“” often resolves these.
한글 encoding 은 둘 중의 하나로 되어있다.
한글데이타의 예
names=c("문건웅","박윤정","황유미")
sex=c("M","F","F")
data=data.frame(이름=names,성별=sex)
data
이름 성별
1 문건웅 M
2 박윤정 F
3 황유미 F
write.csv(data,"test.csv")
test=read.csv("test.csv",fileEncoding="euc-kr")
Warning in read.table(file = file, header = header, sep = sep, quote =
quote, : invalid input found on input connection 'test.csv'
Warning in read.table(file = file, header = header, sep = sep, quote =
quote, : incomplete final line found by readTableHeader on 'test.csv'
test
[1] X X...
<0 rows> (or 0-length row.names)
test=read.csv("test.csv",fileEncoding="utf-8")
test
X 이름 성별
1 1 문건웅 M
2 2 박윤정 F
3 3 황유미 F
if(!file.exists("data")){dir.create("data")}
fileUrl <- "https://data.baltimorecity.gov/api/views/dz54-2aru/rows.xlsx?accessType=DOWNLOAD"
download.file(fileUrl,destfile="./data/cameras.xlsx",method="curl")
dateDownloaded <- date()
library(readxl)
cameraData <- read_excel("./data/cameras.xlsx")
head(cameraData)
address direction street crossStreet
1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
intersection Location 1
1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
6 Erdman & Macon St (39.3068045671, -76.5593167803)
library(foreign)
read.spss("file_name")
head(acs,10)
age sex cardiogenicShock entry Dx EF height weight
1 62 Male No Femoral STEMI 18.0 168 72
2 78 Female No Femoral STEMI 18.4 148 48
3 76 Female Yes Femoral STEMI 20.0 NA NA
4 89 Female No Femoral STEMI 21.8 165 50
5 56 Male No Radial NSTEMI 21.8 162 64
6 73 Female No Radial Unstable Angina 22.0 153 59
7 58 Male No Radial Unstable Angina 24.7 167 78
8 62 Male No Femoral STEMI 26.6 160 50
9 59 Female No Radial Unstable Angina 28.5 152 67
10 71 Male No Femoral STEMI 31.1 168 60
BMI obesity TC LDLC HDLC TG DM HBP smoking
1 25.51020 Yes 215 154 35 155 Yes No Smoker
2 21.91381 No NA NA NA 166 No Yes Never
3 NA No NA NA NA NA No Yes Never
4 18.36547 No 121 73 20 89 No No Never
5 24.38653 No 195 151 36 63 Yes Yes Smoker
6 25.20398 Yes 184 112 38 137 Yes Yes Never
7 27.96802 Yes 161 91 34 196 Yes Yes Ex-smoker
8 19.53125 No 136 88 33 30 Yes Yes Ex-smoker
9 28.99931 Yes 239 161 34 118 Yes Yes Never
10 21.25850 No 169 88 54 141 Yes No Smoker
str(acs)
'data.frame': 857 obs. of 17 variables:
$ age : int 62 78 76 89 56 73 58 62 59 71 ...
$ sex : chr "Male" "Female" "Female" "Female" ...
$ cardiogenicShock: chr "No" "No" "Yes" "No" ...
$ entry : chr "Femoral" "Femoral" "Femoral" "Femoral" ...
$ Dx : chr "STEMI" "STEMI" "STEMI" "STEMI" ...
$ EF : num 18 18.4 20 21.8 21.8 22 24.7 26.6 28.5 31.1 ...
$ height : num 168 148 NA 165 162 153 167 160 152 168 ...
$ weight : num 72 48 NA 50 64 59 78 50 67 60 ...
$ BMI : num 25.5 21.9 NA 18.4 24.4 ...
$ obesity : chr "Yes" "No" "No" "No" ...
$ TC : num 215 NA NA 121 195 184 161 136 239 169 ...
$ LDLC : int 154 NA NA 73 151 112 91 88 161 88 ...
$ HDLC : int 35 NA NA 20 36 38 34 33 34 54 ...
$ TG : int 155 166 NA 89 63 137 196 30 118 141 ...
$ DM : chr "Yes" "No" "No" "No" ...
$ HBP : chr "No" "Yes" "Yes" "No" ...
$ smoking : chr "Smoker" "Never" "Never" "Never" ...
summary(acs)
age sex cardiogenicShock entry
Min. :28.00 Length:857 Length:857 Length:857
1st Qu.:55.00 Class :character Class :character Class :character
Median :64.00 Mode :character Mode :character Mode :character
Mean :63.31
3rd Qu.:72.00
Max. :91.00
Dx EF height weight
Length:857 Min. :18.00 Min. :130.0 Min. : 30.00
Class :character 1st Qu.:50.45 1st Qu.:158.0 1st Qu.: 58.00
Mode :character Median :58.10 Median :165.0 Median : 65.00
Mean :55.83 Mean :163.2 Mean : 64.84
3rd Qu.:62.35 3rd Qu.:170.0 3rd Qu.: 72.00
Max. :79.00 Max. :185.0 Max. :112.00
NA's :134 NA's :93 NA's :91
BMI obesity TC LDLC
Min. :15.62 Length:857 Min. : 25.0 Min. : 15.0
1st Qu.:22.13 Class :character 1st Qu.:154.0 1st Qu.: 88.0
Median :24.16 Mode :character Median :183.0 Median :114.0
Mean :24.28 Mean :185.2 Mean :116.6
3rd Qu.:26.17 3rd Qu.:213.0 3rd Qu.:141.0
Max. :41.42 Max. :493.0 Max. :366.0
NA's :93 NA's :23 NA's :24
HDLC TG DM HBP
Min. : 4.00 Min. : 11.0 Length:857 Length:857
1st Qu.:32.00 1st Qu.: 68.0 Class :character Class :character
Median :38.00 Median :105.5 Mode :character Mode :character
Mean :38.24 Mean :125.2
3rd Qu.:45.00 3rd Qu.:154.0
Max. :89.00 Max. :877.0
NA's :23 NA's :15
smoking
Length:857
Class :character
Mode :character
table(acs$sex,acs$DM)
No Yes
Female 173 114
Male 380 190
result=table(acs$sex,acs$DM)
result
No Yes
Female 173 114
Male 380 190
chisq.test(result)
Pearson's Chi-squared test with Yates' continuity correction
data: result
X-squared = 3.1296, df = 1, p-value = 0.07688
chisq.test(result,correct=FALSE)
Pearson's Chi-squared test
data: result
X-squared = 3.403, df = 1, p-value = 0.06508
fisher.test(result)
Fisher's Exact Test for Count Data
data: result
p-value = 0.06965
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
0.5600501 1.0298417
sample estimates:
odds ratio
0.7590381
#xtabs(도수~가로+세로)
result1=xtabs(~sex+DM,data=acs)
result1
DM
sex No Yes
Female 173 114
Male 380 190
addmargins(result1)
DM
sex No Yes Sum
Female 173 114 287
Male 380 190 570
Sum 553 304 857
chisq.test(result1)
Pearson's Chi-squared test with Yates' continuity correction
data: result1
X-squared = 3.1296, df = 1, p-value = 0.07688
fisher.test(result1)
Fisher's Exact Test for Count Data
data: result1
p-value = 0.06965
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
0.5600501 1.0298417
sample estimates:
odds ratio
0.7590381
https://en.wikipedia.org/wiki/Yates%27s_correction_for_continuity
One-sample t-test : 알려져 있는 평균과 비교
paired t-test vs Wilcoxon Signed-rank test
두 군 간의 평균 비교 : t.test vs Wilcoxon Rank-sum test
세 군 이상 의 평균비교 : one-way ANOVA vs Kruskall-Wallis test
2006년 조사에 의하면 한국인의 1인 1일 평균 알콜섭취량이 8.1g 이다. 2008년 대통령 선거로 알코올 섭취량이 달라졌는지 조사하였다.
x=c(15.5,11.21,12.67,8.87,12.15,9.88,2.06,14.50,0,4.97)
mean(x)
[1] 9.181
sd(x)
[1] 5.234965
검정 순서
자료가 정규 분포를 하는지 검정 : shapiro.test()
정규분포 하는 경우 : t.test()
정규분포 하지 않는 경우 : Wilcoxon Signed-rank test
shapiro.test(x)
Shapiro-Wilk normality test
data: x
W = 0.92341, p-value = 0.3863
t.test(x,mu=8.1)
One Sample t-test
data: x
t = 0.653, df = 9, p-value = 0.5301
alternative hypothesis: true mean is not equal to 8.1
95 percent confidence interval:
5.436132 12.925868
sample estimates:
mean of x
9.181
단측검정의 경우 : 귀무가설 mu > 8.1, 유의수준 99%
t.test(x,mu=8.1,conf.level=0.99,alter="greater")
One Sample t-test
data: x
t = 0.653, df = 9, p-value = 0.265
alternative hypothesis: true mean is greater than 8.1
99 percent confidence interval:
4.510276 Inf
sample estimates:
mean of x
9.181
쌍을 이룬 두 변수의 차이를 보는 검정
t-test의 가정 : 자료가 정규분포를 한다
검정 순서
1) 자료가 정규 분포를 하는지 검정(shapiro.test())
2) 정규분포 하는 경우 : t.test
3) 정규분포 하지 않는 경우 : Wilcoxon Signed-rank test
data(sleep)
sleep
extra group ID
1 0.7 1 1
2 -1.6 1 2
3 -0.2 1 3
4 -1.2 1 4
5 -0.1 1 5
6 3.4 1 6
7 3.7 1 7
8 0.8 1 8
9 0.0 1 9
10 2.0 1 10
11 1.9 2 1
12 0.8 2 2
13 1.1 2 3
14 0.1 2 4
15 -0.1 2 5
16 4.4 2 6
17 5.5 2 7
18 1.6 2 8
19 4.6 2 9
20 3.4 2 10
str(sleep)
'data.frame': 20 obs. of 3 variables:
$ extra: num 0.7 -1.6 -0.2 -1.2 -0.1 3.4 3.7 0.8 0 2 ...
$ group: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
$ ID : Factor w/ 10 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
?sleep
A data frame with 20 observations on 3 variables.
[, 1] extra numeric increase in hours of sleep [, 2] group factor drug given [, 3] ID factor patient ID
with(sleep,
shapiro.test(extra[group == 2] - extra[group == 1]))
Shapiro-Wilk normality test
data: extra[group == 2] - extra[group == 1]
W = 0.82987, p-value = 0.03334
정규분포를 따르지 않으므로 wilcoxon test실시
with(sleep,
wilcox.test(extra[group == 2] - extra[group == 1]))
Warning in wilcox.test.default(extra[group == 2] - extra[group == 1]):
cannot compute exact p-value with ties
Warning in wilcox.test.default(extra[group == 2] - extra[group == 1]):
cannot compute exact p-value with zeroes
Wilcoxon signed rank test with continuity correction
data: extra[group == 2] - extra[group == 1]
V = 45, p-value = 0.009091
alternative hypothesis: true location is not equal to 0
비모수방법은 자료의 순서를 이용하는데 동일한 값이 있어 순서를 정하는 데 문제가 있어 warning생김 : exact=FALSE 옵션을 추가한다
with(sleep,
wilcox.test(extra[group == 2] - extra[group == 1],exact=FALSE))
Wilcoxon signed rank test with continuity correction
data: extra[group == 2] - extra[group == 1]
V = 45, p-value = 0.009091
alternative hypothesis: true location is not equal to 0
정규분포를 따르는 경우 Student’s paired t-test 실시
with(sleep,
t.test(extra[group == 1],
extra[group == 2], paired = TRUE))
Paired t-test
data: extra[group == 1] and extra[group == 2]
t = -4.0621, df = 9, p-value = 0.002833
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-2.4598858 -0.7001142
sample estimates:
mean of the differences
-1.58
약을 1에서 2로 바꾸었을때 수면시간이 늘어나는가? The sleep prolongations
sleep1 <- with(sleep, extra[group == 2] - extra[group == 1])
summary(sleep1)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.00 1.05 1.30 1.58 1.70 4.60
stripchart(sleep1, method = "stack", xlab = "hours",
main = "Sleep prolongation (n = 10)")
boxplot(sleep1, horizontal = TRUE, add = TRUE,
at = .6, pars = list(boxwex = 0.5, staplewex = 0.25))
학생 10명이 있다. 수업전 test 결과를 pre, 수업후 test결과를 post라고 하자
id=1:10
pre=c(70,72,80,85,90,90,60,55,80,90)
post=c(80,73,84,90,95,85,60,50,84,100)
data=data.frame(id,pre,post)
data
id pre post
1 1 70 80
2 2 72 73
3 3 80 84
4 4 85 90
5 5 90 95
6 6 90 85
7 7 60 60
8 8 55 50
9 9 80 84
10 10 90 100
이 데이타는 전형적인 wide form이다. 이 데이타를 long form으로 바꾸려면 reshape2::melt()를 사용한다.
require(reshape2)
Loading required package: reshape2
longdata=melt(data,id="id")
longdata
id variable value
1 1 pre 70
2 2 pre 72
3 3 pre 80
4 4 pre 85
5 5 pre 90
6 6 pre 90
7 7 pre 60
8 8 pre 55
9 9 pre 80
10 10 pre 90
11 1 post 80
12 2 post 73
13 3 post 84
14 4 post 90
15 5 post 95
16 6 post 85
17 7 post 60
18 8 post 50
19 9 post 84
20 10 post 100
long form의 데이타를 wide form의 데이타로 바꾸려면 reshape2::dcast()를 사용한다.
widedata=dcast(longdata,id~variable,value.var="value")
widedata
id pre post
1 1 70 80
2 2 72 73
3 3 80 84
4 4 85 90
5 5 90 95
6 6 90 85
7 7 60 60
8 8 55 50
9 9 80 84
10 10 90 100
정규분포 하는경우
분산이 같은지 검정( var.test() )
분산이 다르면 Welch의 t-test 검정(t.test())
분산이 같으면 pooled variance를 이용한 t-test (var.equal=TRUE)
선형모형에 적합시킨후 잔차가 정규분포하는지 검정
model=lm(age~sex,data=acs)
shapiro.test(resid(model))
Shapiro-Wilk normality test
data: resid(model)
W = 0.99343, p-value = 0.000808
result=aggregate(age~sex,data=acs,function(x) { c(mean(x),sd(x))})
colnames(result$age)=c("mean","sd")
result
sex age.mean age.sd
1 Female 68.67596 10.73242
2 Male 60.61053 11.22885
boxplot(age~sex,data=acs)
# 분산이 같은지 F 검정
var.test(age~sex,data=acs)
F test to compare two variances
data: age by sex
F = 0.91353, num df = 286, denom df = 569, p-value = 0.3866
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.7495537 1.1209741
sample estimates:
ratio of variances
0.9135342
# 분산이 같으므로 var.equal=TRUE로 검정
t.test(age~sex,data=acs,var.equal=TRUE)
Two Sample t-test
data: age by sex
t = 10.071, df = 855, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
6.493487 9.637377
sample estimates:
mean in group Female mean in group Male
68.67596 60.61053
# 분산이 다르다면 var.equal=FALSE로 검정
t.test(age~sex,data=acs,var.equal=FALSE)
Welch Two Sample t-test
data: age by sex
t = 10.222, df = 596.99, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
6.515847 9.615016
sample estimates:
mean in group Female mean in group Male
68.67596 60.61053
# 비모수검정
wilcox.test(age~sex,data=acs)
Wilcoxon rank sum test with continuity correction
data: age by sex
W = 115270, p-value < 2.2e-16
alternative hypothesis: true location shift is not equal to 0
aggregate(mpg~cyl,data=mtcars,mean)
cyl mpg
1 4 26.66364
2 6 19.74286
3 8 15.10000
boxplot(mpg~cyl,data=mtcars)
# cyl가 numeric이므로 factor변수로 바꾸어준다
mtcars$group=factor(mtcars$cyl)
out=lm(mpg~group,data=mtcars)
out
Call:
lm(formula = mpg ~ group, data = mtcars)
Coefficients:
(Intercept) group6 group8
26.664 -6.921 -11.564
anova(out)
Analysis of Variance Table
Response: mpg
Df Sum Sq Mean Sq F value Pr(>F)
group 2 824.78 412.39 39.697 4.979e-09 ***
Residuals 29 301.26 10.39
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(out)
# 다중비교
#install.packages("multcomp")
library(multcomp)
Loading required package: mvtnorm
Loading required package: survival
Loading required package: TH.data
tukey=glht(out,linfct=mcp(group="Tukey"))
summary(tukey)
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: lm(formula = mpg ~ group, data = mtcars)
Linear Hypotheses:
Estimate Std. Error t value Pr(>|t|)
6 - 4 == 0 -6.921 1.558 -4.441 <0.001 ***
8 - 4 == 0 -11.564 1.299 -8.905 <0.001 ***
8 - 6 == 0 -4.643 1.492 -3.112 0.0111 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
plot(tukey)
kruskal.test(mpg~group,data=mtcars)
Kruskal-Wallis rank sum test
data: mpg by group
Kruskal-Wallis chi-squared = 25.746, df = 2, p-value = 2.566e-06
boxplot(mpg~group+am,data=mtcars)
boxplot(mpg~am+group,data=mtcars)
with(mtcars,interaction.plot(cyl,am,mpg))
with(mtcars,interaction.plot(am,cyl,mpg))
out=lm(mpg~group+am,data=mtcars)
summary(out)
Call:
lm(formula = mpg ~ group + am, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-5.9618 -1.4971 -0.2057 1.8907 6.5382
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 24.802 1.323 18.752 < 2e-16 ***
group6 -6.156 1.536 -4.009 0.000411 ***
group8 -10.068 1.452 -6.933 1.55e-07 ***
am 2.560 1.298 1.973 0.058457 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 3.073 on 28 degrees of freedom
Multiple R-squared: 0.7651, Adjusted R-squared: 0.7399
F-statistic: 30.4 on 3 and 28 DF, p-value: 5.959e-09
plot(mpg~wt,data=mtcars,col=cyl)
legend("topright",legend=c("4","6","8"),pch=1,col=mtcars$cyl)
out=lm(mpg~wt+group,data=mtcars)
anova(out)
Analysis of Variance Table
Response: mpg
Df Sum Sq Mean Sq F value Pr(>F)
wt 1 847.73 847.73 129.6650 5.079e-12 ***
group 2 95.26 47.63 7.2856 0.002835 **
Residuals 28 183.06 6.54
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(out)
Call:
lm(formula = mpg ~ wt + group, data = mtcars)
Residuals:
Min 1Q Median 3Q Max
-4.5890 -1.2357 -0.5159 1.3845 5.7915
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 33.9908 1.8878 18.006 < 2e-16 ***
wt -3.2056 0.7539 -4.252 0.000213 ***
group6 -4.2556 1.3861 -3.070 0.004718 **
group8 -6.0709 1.6523 -3.674 0.000999 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 2.557 on 28 degrees of freedom
Multiple R-squared: 0.8374, Adjusted R-squared: 0.82
F-statistic: 48.08 on 3 and 28 DF, p-value: 3.594e-11
dunnett=glht(out,linfct=mcp(group="Dunnett"))
summary(dunnett)
Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Dunnett Contrasts
Fit: lm(formula = mpg ~ wt + group, data = mtcars)
Linear Hypotheses:
Estimate Std. Error t value Pr(>|t|)
6 - 4 == 0 -4.256 1.386 -3.070 0.00867 **
8 - 4 == 0 -6.071 1.652 -3.674 0.00188 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Adjusted p values reported -- single-step method)
library(lattice)
Attaching package: 'lattice'
The following object is masked from 'package:moonBook':
densityplot
xyplot(mpg~wt|cyl,data=mtcars)
xyplot(mpg~wt|am,data=mtcars)
그룹간의 비교를 한번에 : moonBook 패키지의 이용
library(moonBook)
library(ztable)
Welcome to package ztable ver 0.1.6
str(acs)
'data.frame': 857 obs. of 17 variables:
$ age : int 62 78 76 89 56 73 58 62 59 71 ...
$ sex : chr "Male" "Female" "Female" "Female" ...
$ cardiogenicShock: chr "No" "No" "Yes" "No" ...
$ entry : chr "Femoral" "Femoral" "Femoral" "Femoral" ...
$ Dx : chr "STEMI" "STEMI" "STEMI" "STEMI" ...
$ EF : num 18 18.4 20 21.8 21.8 22 24.7 26.6 28.5 31.1 ...
$ height : num 168 148 NA 165 162 153 167 160 152 168 ...
$ weight : num 72 48 NA 50 64 59 78 50 67 60 ...
$ BMI : num 25.5 21.9 NA 18.4 24.4 ...
$ obesity : chr "Yes" "No" "No" "No" ...
$ TC : num 215 NA NA 121 195 184 161 136 239 169 ...
$ LDLC : int 154 NA NA 73 151 112 91 88 161 88 ...
$ HDLC : int 35 NA NA 20 36 38 34 33 34 54 ...
$ TG : int 155 166 NA 89 63 137 196 30 118 141 ...
$ DM : chr "Yes" "No" "No" "No" ...
$ HBP : chr "No" "Yes" "Yes" "No" ...
$ smoking : chr "Smoker" "Never" "Never" "Never" ...
mytable(sex~age+DM,data=acs)
Descriptive Statistics by 'sex'
______________________________________
Female Male p
(N=287) (N=570)
--------------------------------------
age 68.7 ± 10.7 60.6 ± 11.2 0.000
DM 0.077
- No 173 (60.3%) 380 (66.7%)
- Yes 114 (39.7%) 190 (33.3%)
--------------------------------------
mytable(sex~.,data=acs)
Descriptive Statistics by 'sex'
____________________________________________________
Female Male p
(N=287) (N=570)
----------------------------------------------------
age 68.7 ± 10.7 60.6 ± 11.2 0.000
cardiogenicShock 0.136
- No 275 (95.8%) 530 (93.0%)
- Yes 12 ( 4.2%) 40 ( 7.0%)
entry 0.035
- Femoral 119 (41.5%) 193 (33.9%)
- Radial 168 (58.5%) 377 (66.1%)
Dx 0.012
- NSTEMI 50 (17.4%) 103 (18.1%)
- STEMI 84 (29.3%) 220 (38.6%)
- Unstable Angina 153 (53.3%) 247 (43.3%)
EF 56.3 ± 10.1 55.6 ± 9.4 0.387
height 153.8 ± 6.2 167.9 ± 6.1 0.000
weight 57.2 ± 9.3 68.7 ± 10.3 0.000
BMI 24.2 ± 3.6 24.3 ± 3.2 0.611
obesity 0.580
- No 194 (67.6%) 373 (65.4%)
- Yes 93 (32.4%) 197 (34.6%)
TC 188.9 ± 51.1 183.3 ± 45.9 0.124
LDLC 117.8 ± 41.2 116.0 ± 41.1 0.561
HDLC 39.0 ± 11.5 37.8 ± 10.9 0.145
TG 119.9 ± 76.2 127.9 ± 97.3 0.195
DM 0.077
- No 173 (60.3%) 380 (66.7%)
- Yes 114 (39.7%) 190 (33.3%)
HBP 0.000
- No 83 (28.9%) 273 (47.9%)
- Yes 204 (71.1%) 297 (52.1%)
smoking 0.000
- Ex-smoker 49 (17.1%) 155 (27.2%)
- Never 209 (72.8%) 123 (21.6%)
- Smoker 29 (10.1%) 292 (51.2%)
----------------------------------------------------
mytable(sex~.-smoking,data=acs)
Descriptive Statistics by 'sex'
____________________________________________________
Female Male p
(N=287) (N=570)
----------------------------------------------------
age 68.7 ± 10.7 60.6 ± 11.2 0.000
cardiogenicShock 0.136
- No 275 (95.8%) 530 (93.0%)
- Yes 12 ( 4.2%) 40 ( 7.0%)
entry 0.035
- Femoral 119 (41.5%) 193 (33.9%)
- Radial 168 (58.5%) 377 (66.1%)
Dx 0.012
- NSTEMI 50 (17.4%) 103 (18.1%)
- STEMI 84 (29.3%) 220 (38.6%)
- Unstable Angina 153 (53.3%) 247 (43.3%)
EF 56.3 ± 10.1 55.6 ± 9.4 0.387
height 153.8 ± 6.2 167.9 ± 6.1 0.000
weight 57.2 ± 9.3 68.7 ± 10.3 0.000
BMI 24.2 ± 3.6 24.3 ± 3.2 0.611
obesity 0.580
- No 194 (67.6%) 373 (65.4%)
- Yes 93 (32.4%) 197 (34.6%)
TC 188.9 ± 51.1 183.3 ± 45.9 0.124
LDLC 117.8 ± 41.2 116.0 ± 41.1 0.561
HDLC 39.0 ± 11.5 37.8 ± 10.9 0.145
TG 119.9 ± 76.2 127.9 ± 97.3 0.195
DM 0.077
- No 173 (60.3%) 380 (66.7%)
- Yes 114 (39.7%) 190 (33.3%)
HBP 0.000
- No 83 (28.9%) 273 (47.9%)
- Yes 204 (71.1%) 297 (52.1%)
----------------------------------------------------
res=mytable(Dx~.,data=acs)
res
Descriptive Statistics by 'Dx'
_________________________________________________________________
NSTEMI STEMI Unstable Angina p
(N=153) (N=304) (N=400)
-----------------------------------------------------------------
age 64.3 ± 12.3 62.1 ± 12.1 63.8 ± 11.0 0.073
sex 0.012
- Female 50 (32.7%) 84 (27.6%) 153 (38.2%)
- Male 103 (67.3%) 220 (72.4%) 247 (61.8%)
cardiogenicShock 0.000
- No 149 (97.4%) 256 (84.2%) 400 (100.0%)
- Yes 4 ( 2.6%) 48 (15.8%) 0 ( 0.0%)
entry 0.001
- Femoral 58 (37.9%) 133 (43.8%) 121 (30.2%)
- Radial 95 (62.1%) 171 (56.2%) 279 (69.8%)
EF 55.0 ± 9.3 52.4 ± 9.5 59.2 ± 8.7 0.000
height 163.3 ± 8.2 165.1 ± 8.2 161.7 ± 9.7 0.000
weight 64.3 ± 10.2 65.7 ± 11.6 64.5 ± 11.6 0.361
BMI 24.1 ± 3.2 24.0 ± 3.3 24.6 ± 3.4 0.064
obesity 0.186
- No 106 (69.3%) 209 (68.8%) 252 (63.0%)
- Yes 47 (30.7%) 95 (31.2%) 148 (37.0%)
TC 193.7 ± 53.6 183.2 ± 43.4 183.5 ± 48.3 0.057
LDLC 126.1 ± 44.7 116.7 ± 39.5 112.9 ± 40.4 0.004
HDLC 38.9 ± 11.9 38.5 ± 11.0 37.8 ± 10.9 0.501
TG 130.1 ± 88.5 106.5 ± 72.0 137.4 ± 101.6 0.000
DM 0.209
- No 96 (62.7%) 208 (68.4%) 249 (62.2%)
- Yes 57 (37.3%) 96 (31.6%) 151 (37.8%)
HBP 0.002
- No 62 (40.5%) 150 (49.3%) 144 (36.0%)
- Yes 91 (59.5%) 154 (50.7%) 256 (64.0%)
smoking 0.000
- Ex-smoker 42 (27.5%) 66 (21.7%) 96 (24.0%)
- Never 50 (32.7%) 97 (31.9%) 185 (46.2%)
- Smoker 61 (39.9%) 141 (46.4%) 119 (29.8%)
-----------------------------------------------------------------
mytable(Dx+sex~.,data=acs)
Descriptive Statistics stratified by 'Dx' and 'sex'
___________________________________________________________________________________________________________________
STEMI NSTEMI Unstable Angina
------------------------------- -------------------------------- --------------------------------
Female Male p Female Male p Female Male p
(N=84) (N=220) (N=50) (N=103) (N=153) (N=247)
-------------------------------------------------------------------------------------------------------------------
age 69.1 ± 10.4 59.4 ± 11.7 0.000 70.9 ± 11.4 61.1 ± 11.6 0.000 67.7 ± 10.7 61.4 ± 10.6 0.000
cardiogenicShock 0.535 1.000
- No 73 (86.9%) 183 (83.2%) 49 (98.0%) 100 (97.1%) 153 (100.0%) 247 (100.0%)
- Yes 11 (13.1%) 37 (16.8%) 1 ( 2.0%) 3 ( 2.9%) 0 ( 0.0%) 0 ( 0.0%)
entry 0.045 0.366 0.243
- Femoral 45 (53.6%) 88 (40.0%) 22 (44.0%) 36 (35.0%) 52 (34.0%) 69 (27.9%)
- Radial 39 (46.4%) 132 (60.0%) 28 (56.0%) 67 (65.0%) 101 (66.0%) 178 (72.1%)
EF 52.3 ± 10.9 52.4 ± 8.9 0.970 54.8 ± 9.1 55.1 ± 9.4 0.891 59.4 ± 8.8 59.1 ± 8.7 0.798
height 155.7 ± 5.4 168.7 ± 6.0 0.000 154.2 ± 5.1 167.5 ± 5.7 0.000 152.6 ± 6.7 167.3 ± 6.4 0.000
weight 57.4 ± 9.0 68.8 ± 10.9 0.000 57.2 ± 10.3 67.5 ± 8.4 0.000 57.1 ± 9.1 69.0 ± 10.6 0.000
BMI 23.6 ± 3.2 24.1 ± 3.4 0.282 24.1 ± 4.3 24.1 ± 2.6 0.967 24.5 ± 3.5 24.6 ± 3.4 0.816
obesity 0.628 1.000 0.653
- No 60 (71.4%) 149 (67.7%) 35 (70.0%) 71 (68.9%) 99 (64.7%) 153 (61.9%)
- Yes 24 (28.6%) 71 (32.3%) 15 (30.0%) 32 (31.1%) 54 (35.3%) 94 (38.1%)
TC 180.7 ± 45.7 184.1 ± 42.6 0.542 196.3 ± 52.7 192.6 ± 54.3 0.697 191.1 ± 53.1 178.7 ± 44.6 0.018
LDLC 111.0 ± 40.0 118.9 ± 39.1 0.128 127.7 ± 39.5 125.4 ± 47.1 0.774 118.3 ± 41.8 109.5 ± 39.2 0.035
HDLC 39.5 ± 11.2 38.1 ± 10.9 0.338 40.1 ± 13.8 38.4 ± 10.9 0.414 38.5 ± 10.8 37.4 ± 10.9 0.338
TG 112.3 ± 87.2 104.3 ± 65.5 0.454 112.5 ± 51.1 138.0 ± 100.2 0.042 126.3 ± 76.0 144.3 ± 114.2 0.060
DM 0.412 0.036 0.875
- No 54 (64.3%) 154 (70.0%) 25 (50.0%) 71 (68.9%) 94 (61.4%) 155 (62.8%)
- Yes 30 (35.7%) 66 (30.0%) 25 (50.0%) 32 (31.1%) 59 (38.6%) 92 (37.2%)
HBP 0.001 0.789 0.000
- No 28 (33.3%) 122 (55.5%) 19 (38.0%) 43 (41.7%) 36 (23.5%) 108 (43.7%)
- Yes 56 (66.7%) 98 (44.5%) 31 (62.0%) 60 (58.3%) 117 (76.5%) 139 (56.3%)
smoking 0.000 0.000 0.000
- Ex-smoker 13 (15.5%) 53 (24.1%) 8 (16.0%) 34 (33.0%) 28 (18.3%) 68 (27.5%)
- Never 57 (67.9%) 40 (18.2%) 37 (74.0%) 13 (12.6%) 115 (75.2%) 70 (28.3%)
- Smoker 14 (16.7%) 127 (57.7%) 5 (10.0%) 56 (54.4%) 10 ( 6.5%) 109 (44.1%)
-------------------------------------------------------------------------------------------------------------------
z=ztable(res)
print(z,type="html")
| p | ||||
|---|---|---|---|---|
| (N=153) | (N=304) | (N=400) | ||
| age | 64.3 ± 12.3 | 62.1 ± 12.1 | 63.8 ± 11.0 | 0.073 |