if (!file.exists("data")) {
dir.create("data")
}
fileUrl <- "https://data.baltimorecity.gov/api/views/dz54-2aru/rows.csv?accessType=DOWNLOAD"
download.file(fileUrl, destfile = "./data/cameras.csv", method = "curl")
dateDownloaded <- date()
cameraData <- read.table("./data/cameras.csv", sep = ",", header = TRUE)
head(cameraData)
## address direction street crossStreet
## 1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
## 2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
## 3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
## 4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
## 5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
## 6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
## intersection Location.1
## 1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
## 2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
## 3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
## 4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
## 5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
## 6 Erdman & Macon St (39.3068045671, -76.5593167803)
read.csv sets sep=“,” and header=TRUE
cameraData <- read.csv("./data/cameras.csv")
head(cameraData)
## address direction street crossStreet
## 1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
## 2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
## 3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
## 4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
## 5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
## 6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
## intersection Location.1
## 1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
## 2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
## 3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
## 4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
## 5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
## 6 Erdman & Macon St (39.3068045671, -76.5593167803)
In my experience, the biggest trouble with reading flat files are quotation marks ` or " placed in data values, setting quote=“” often resolves these.
한글 encoding 은 둘 중의 하나로 되어있다.
한글데이타의 예
names=c("문건웅","박윤정","황유미")
sex=c("M","F","F")
data=data.frame(이름=names,성별=sex)
data
## 이름 성별
## 1 문건웅 M
## 2 박윤정 F
## 3 황유미 F
write.csv(data,"test.csv")
test=read.csv("test.csv",fileEncoding="euc-kr")
## Warning: invalid input found on input connection 'test.csv'
## Warning: incomplete final line found by readTableHeader on 'test.csv'
test
## [1] X X...
## <0 rows> (or 0-length row.names)
test=read.csv("test.csv",fileEncoding="utf-8")
test
## X 이름 성별
## 1 1 문건웅 M
## 2 2 박윤정 F
## 3 3 황유미 F
if(!file.exists("data")){dir.create("data")}
fileUrl <- "https://data.baltimorecity.gov/api/views/dz54-2aru/rows.xlsx?accessType=DOWNLOAD"
download.file(fileUrl,destfile="./data/cameras.xlsx",method="curl")
dateDownloaded <- date()
library(xlsx)
## Loading required package: rJava
## Loading required package: xlsxjars
cameraData <- read.xlsx("./data/cameras.xlsx",sheetIndex=1,header=TRUE)
head(cameraData)
## address direction street crossStreet
## 1 S CATON AVE & BENSON AVE N/B Caton Ave Benson Ave
## 2 S CATON AVE & BENSON AVE S/B Caton Ave Benson Ave
## 3 WILKENS AVE & PINE HEIGHTS AVE E/B Wilkens Ave Pine Heights
## 4 THE ALAMEDA & E 33RD ST S/B The Alameda 33rd St
## 5 E 33RD ST & THE ALAMEDA E/B E 33rd The Alameda
## 6 ERDMAN AVE & N MACON ST E/B Erdman Macon St
## intersection Location.1
## 1 Caton Ave & Benson Ave (39.2693779962, -76.6688185297)
## 2 Caton Ave & Benson Ave (39.2693157898, -76.6689698176)
## 3 Wilkens Ave & Pine Heights (39.2720252302, -76.676960806)
## 4 The Alameda & 33rd St (39.3285013141, -76.5953545714)
## 5 E 33rd & The Alameda (39.3283410623, -76.5953594625)
## 6 Erdman & Macon St (39.3068045671, -76.5593167803)
colIndex <- 2:3
rowIndex <- 1:4
cameraDataSubset <- read.xlsx("./data/cameras.xlsx",sheetIndex=1,
colIndex=colIndex,rowIndex=rowIndex)
cameraDataSubset
## direction street
## 1 N/B Caton Ave
## 2 S/B Caton Ave
## 3 E/B Wilkens Ave
library(foreign)
read.spss("file_name")
data(mtcars)
head(mtcars,10)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
str(mtcars)
## 'data.frame': 32 obs. of 11 variables:
## $ mpg : num 21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
## $ cyl : num 6 6 4 6 8 6 8 4 4 6 ...
## $ disp: num 160 160 108 258 360 ...
## $ hp : num 110 110 93 110 175 105 245 62 95 123 ...
## $ drat: num 3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
## $ wt : num 2.62 2.88 2.32 3.21 3.44 ...
## $ qsec: num 16.5 17 18.6 19.4 17 ...
## $ vs : num 0 0 1 1 0 1 0 1 1 1 ...
## $ am : num 1 1 1 0 0 0 0 0 0 0 ...
## $ gear: num 4 4 4 3 3 3 3 4 4 4 ...
## $ carb: num 4 4 1 1 2 1 4 2 2 4 ...
summary(mtcars)
## mpg cyl disp hp
## Min. :10.4 Min. :4.00 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.4 1st Qu.:4.00 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.2 Median :6.00 Median :196.3 Median :123.0
## Mean :20.1 Mean :6.19 Mean :230.7 Mean :146.7
## 3rd Qu.:22.8 3rd Qu.:8.00 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.9 Max. :8.00 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.76 Min. :1.51 Min. :14.5 Min. :0.000
## 1st Qu.:3.08 1st Qu.:2.58 1st Qu.:16.9 1st Qu.:0.000
## Median :3.69 Median :3.33 Median :17.7 Median :0.000
## Mean :3.60 Mean :3.22 Mean :17.8 Mean :0.438
## 3rd Qu.:3.92 3rd Qu.:3.61 3rd Qu.:18.9 3rd Qu.:1.000
## Max. :4.93 Max. :5.42 Max. :22.9 Max. :1.000
## am gear carb
## Min. :0.000 Min. :3.00 Min. :1.00
## 1st Qu.:0.000 1st Qu.:3.00 1st Qu.:2.00
## Median :0.000 Median :4.00 Median :2.00
## Mean :0.406 Mean :3.69 Mean :2.81
## 3rd Qu.:1.000 3rd Qu.:4.00 3rd Qu.:4.00
## Max. :1.000 Max. :5.00 Max. :8.00
table(mtcars$cyl,mtcars$am)
##
## 0 1
## 4 3 8
## 6 4 3
## 8 12 2
mtcars$tm=factor(mtcars$am,labels=c("automatic","manual"))
# mtcars$tm=ifelse(mtcars$am==0,"automatic","manual")
result=table(mtcars$cyl,mtcars$tm)
result
##
## automatic manual
## 4 3 8
## 6 4 3
## 8 12 2
chisq.test(result)
## Warning: Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: result
## X-squared = 8.741, df = 2, p-value = 0.01265
fisher.test(result)
##
## Fisher's Exact Test for Count Data
##
## data: result
## p-value = 0.009105
## alternative hypothesis: two.sided
#xtabs(도수~가로+세로)
result1=xtabs(~cyl+tm,data=mtcars)
result1
## tm
## cyl automatic manual
## 4 3 8
## 6 4 3
## 8 12 2
addmargins(result1)
## tm
## cyl automatic manual Sum
## 4 3 8 11
## 6 4 3 7
## 8 12 2 14
## Sum 19 13 32
chisq.test(result1)
## Warning: Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: result1
## X-squared = 8.741, df = 2, p-value = 0.01265
fisher.test(result1)
##
## Fisher's Exact Test for Count Data
##
## data: result1
## p-value = 0.009105
## alternative hypothesis: two.sided
One-sample t-test : 알려져 있는 평균과 비교
paired t-test vs Wilcoxon Signed-rank test
두 군 간의 평균 비교 : t.test vs Wilcoxon Rank-sum test
세 군 이상 의 평균비교 : one-way ANOVA vs Kruskall-Wallis test
2006년 조사에 의하면 한국인의 1인 1일 평균 알콜섭취량이 8.1g 이다. 2008년 대통령 선거로 알코올 섭취량이 달라졌는지 조사하였다.
x=c(15.5,11.21,12.67,8.87,12.15,9.88,2.06,14.50,0,4.97)
mean(x)
## [1] 9.181
sd(x)
## [1] 5.235
검정 순서
자료가 정규 분포를 하는지 검정 : shapiro.test()
정규분포 하는 경우 : t.test()
정규분포 하지 않는 경우 : Wilcoxon Signed-rank test
shapiro.test(x)
##
## Shapiro-Wilk normality test
##
## data: x
## W = 0.9234, p-value = 0.3863
t.test(x,mu=8.1)
##
## One Sample t-test
##
## data: x
## t = 0.653, df = 9, p-value = 0.5301
## alternative hypothesis: true mean is not equal to 8.1
## 95 percent confidence interval:
## 5.436 12.926
## sample estimates:
## mean of x
## 9.181
단측검정의 경우 : 귀무가설 mu > 8.1, 유의수준 99%
t.test(x,mu=8.1,conf.level=0.99,alter="greater")
##
## One Sample t-test
##
## data: x
## t = 0.653, df = 9, p-value = 0.265
## alternative hypothesis: true mean is greater than 8.1
## 99 percent confidence interval:
## 4.51 Inf
## sample estimates:
## mean of x
## 9.181
쌍을 이룬 두 변수의 차이를 보는 검정
t-test의 가정 : 자료가 정규분포를 한다
검정 순서
자료가 정규 분포를 하는지 검정(shapiro.test())
정규분포 하는 경우 : t.test
정규분포 하지 않는 경우 : Wilcoxon Signed-rank test
data(sleep)
head(sleep)
## extra group ID
## 1 0.7 1 1
## 2 -1.6 1 2
## 3 -0.2 1 3
## 4 -1.2 1 4
## 5 -0.1 1 5
## 6 3.4 1 6
str(sleep)
## 'data.frame': 20 obs. of 3 variables:
## $ extra: num 0.7 -1.6 -0.2 -1.2 -0.1 3.4 3.7 0.8 0 2 ...
## $ group: Factor w/ 2 levels "1","2": 1 1 1 1 1 1 1 1 1 1 ...
## $ ID : Factor w/ 10 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
정규성 검정
with(sleep,
shapiro.test(extra[group == 2] - extra[group == 1]))
##
## Shapiro-Wilk normality test
##
## data: extra[group == 2] - extra[group == 1]
## W = 0.8299, p-value = 0.03334
정규분포를 따르지 않으므로 wilcoxon test실시
with(sleep,
wilcox.test(extra[group == 2] - extra[group == 1]))
## Warning: cannot compute exact p-value with ties
## Warning: cannot compute exact p-value with zeroes
##
## Wilcoxon signed rank test with continuity correction
##
## data: extra[group == 2] - extra[group == 1]
## V = 45, p-value = 0.009091
## alternative hypothesis: true location is not equal to 0
비모수방법은 자료의 순서를 이용하는데 동일한 값이 있어 순서를 정하는 데 문제가 있어 warning생김 : exact=FALSE 옵션을 추가한다
with(sleep,
wilcox.test(extra[group == 2] - extra[group == 1],exact=FALSE))
##
## Wilcoxon signed rank test with continuity correction
##
## data: extra[group == 2] - extra[group == 1]
## V = 45, p-value = 0.009091
## alternative hypothesis: true location is not equal to 0
정규분포를 따르는 경우 Student’s paired t-test 실시
with(sleep,
t.test(extra[group == 1],
extra[group == 2], paired = TRUE))
##
## Paired t-test
##
## data: extra[group == 1] and extra[group == 2]
## t = -4.062, df = 9, p-value = 0.002833
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.4599 -0.7001
## sample estimates:
## mean of the differences
## -1.58
약을 1에서 2로 바꾸었을때 수면시간이 늘어나는가? The sleep prolongations
sleep1 <- with(sleep, extra[group == 2] - extra[group == 1])
summary(sleep1)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 1.05 1.30 1.58 1.70 4.60
stripchart(sleep1, method = "stack", xlab = "hours",
main = "Sleep prolongation (n = 10)")
boxplot(sleep1, horizontal = TRUE, add = TRUE,
at = .6, pars = list(boxwex = 0.5, staplewex = 0.25))
분산이 같은지 검정( var.test() )
분산이 다르면 Welch의 t-test 검정(t.test())
분산이 같으면 pooled variance를 이용한 t-test (var.equal=TRUE)
data(mtcars)
head(mtcars)
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
result=aggregate(mpg~am,data=mtcars,function(x) { c(mean(x),sd(x))})
colnames(result$mpg)=c("mean","sd")
result
## am mpg.mean mpg.sd
## 1 0 17.147 3.834
## 2 1 24.392 6.167
boxplot(mpg~am,data=mtcars)
# 분산이 같은지 F 검정
var.test(mpg~am,data=mtcars)
##
## F test to compare two variances
##
## data: mpg by am
## F = 0.3866, num df = 18, denom df = 12, p-value = 0.06691
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1244 1.0703
## sample estimates:
## ratio of variances
## 0.3866
# 분산이 같으므로 var.equal=TRUE로 검정
t.test(mpg~am,data=mtcars,var.equal=TRUE)
##
## Two Sample t-test
##
## data: mpg by am
## t = -4.106, df = 30, p-value = 0.000285
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -10.848 -3.642
## sample estimates:
## mean in group 0 mean in group 1
## 17.15 24.39
# 분산이 다르다면 var.equal=FALSE로 검정
t.test(mpg~am,data=mtcars,var.equal=FALSE)
##
## Welch Two Sample t-test
##
## data: mpg by am
## t = -3.767, df = 18.33, p-value = 0.001374
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -11.28 -3.21
## sample estimates:
## mean in group 0 mean in group 1
## 17.15 24.39
# 비모수검정
wilcox.test(mpg~am,data=mtcars)
## Warning: cannot compute exact p-value with ties
##
## Wilcoxon rank sum test with continuity correction
##
## data: mpg by am
## W = 42, p-value = 0.001871
## alternative hypothesis: true location shift is not equal to 0
aggregate(mpg~cyl,data=mtcars,mean)
## cyl mpg
## 1 4 26.66
## 2 6 19.74
## 3 8 15.10
boxplot(mpg~cyl,data=mtcars)
# cyl가 numeric이므로 factor변수로 바꾸어준다
mtcars$group=factor(mtcars$cyl)
out=lm(mpg~group,data=mtcars)
out
##
## Call:
## lm(formula = mpg ~ group, data = mtcars)
##
## Coefficients:
## (Intercept) group6 group8
## 26.66 -6.92 -11.56
anova(out)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 825 412 39.7 5e-09 ***
## Residuals 29 301 10
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(out)
# 다중비교
#install.packages("multcomp")
library(multcomp)
## Warning: package 'multcomp' was built under R version 3.1.1
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: splines
## Loading required package: TH.data
tukey=glht(out,linfct=mcp(group="Tukey"))
summary(tukey)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lm(formula = mpg ~ group, data = mtcars)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 6 - 4 == 0 -6.92 1.56 -4.44 <0.001 ***
## 8 - 4 == 0 -11.56 1.30 -8.90 <0.001 ***
## 8 - 6 == 0 -4.64 1.49 -3.11 0.011 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
plot(tukey)
kruskal.test(mpg~group,data=mtcars)
##
## Kruskal-Wallis rank sum test
##
## data: mpg by group
## Kruskal-Wallis chi-squared = 25.75, df = 2, p-value = 2.566e-06
boxplot(mpg~group+am,data=mtcars)
boxplot(mpg~am+group,data=mtcars)
with(mtcars,interaction.plot(cyl,am,mpg))
with(mtcars,interaction.plot(am,cyl,mpg))
out=lm(mpg~group+am,data=mtcars)
summary(out)
##
## Call:
## lm(formula = mpg ~ group + am, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.962 -1.497 -0.206 1.891 6.538
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 24.80 1.32 18.75 < 2e-16 ***
## group6 -6.16 1.54 -4.01 0.00041 ***
## group8 -10.07 1.45 -6.93 1.5e-07 ***
## am 2.56 1.30 1.97 0.05846 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.07 on 28 degrees of freedom
## Multiple R-squared: 0.765, Adjusted R-squared: 0.74
## F-statistic: 30.4 on 3 and 28 DF, p-value: 5.96e-09
plot(mpg~wt,data=mtcars,col=cyl)
legend("topright",legend=c("4","6","8"),pch=1,col=mtcars$cyl)
out=lm(mpg~wt+group,data=mtcars)
anova(out)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## wt 1 848 848 129.67 5.1e-12 ***
## group 2 95 48 7.29 0.0028 **
## Residuals 28 183 7
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(out)
##
## Call:
## lm(formula = mpg ~ wt + group, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.589 -1.236 -0.516 1.384 5.792
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33.991 1.888 18.01 < 2e-16 ***
## wt -3.206 0.754 -4.25 0.00021 ***
## group6 -4.256 1.386 -3.07 0.00472 **
## group8 -6.071 1.652 -3.67 0.00100 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.56 on 28 degrees of freedom
## Multiple R-squared: 0.837, Adjusted R-squared: 0.82
## F-statistic: 48.1 on 3 and 28 DF, p-value: 3.59e-11
dunnett=glht(out,linfct=mcp(group="Dunnett"))
summary(dunnett)
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Dunnett Contrasts
##
##
## Fit: lm(formula = mpg ~ wt + group, data = mtcars)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## 6 - 4 == 0 -4.26 1.39 -3.07 0.0087 **
## 8 - 4 == 0 -6.07 1.65 -3.67 0.0019 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
library(lattice)
xyplot(mpg~wt|cyl,data=mtcars)
xyplot(mpg~wt|am,data=mtcars)
그룹간의 비교를 한번에 : compareGroups 패키지의 이용
library(compareGroups)
## Loading required package: Hmisc
## Loading required package: grid
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:base':
##
## format.pval, round.POSIXt, trunc.POSIXt, units
##
## Loading required package: xtable
##
## Attaching package: 'xtable'
##
## The following objects are masked from 'package:Hmisc':
##
## label, label<-
##
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
##
## The following object is masked from 'package:Hmisc':
##
## combine
##
## The following object is masked from 'package:stats':
##
## nobs
##
## The following object is masked from 'package:utils':
##
## object.size
##
## Loading required package: SNPassoc
## Loading required package: haplo.stats
## Loading required package: parallel
data(predimed)
분석에 앞서 먼저 생존분석을 하는데 필요한 Surv클래스의 변수를 만들어야 한다. 여기서 만들어진 변수 ** tmain** 은 time-to-death 와 time-to-cardiovascular event로 각각 사용된다.
predimed$tmain <- with(predimed, Surv(toevent, event == "Yes"))
label(predimed$tmain) <- "AMI, stroke, or CV Death"
이것은 이패키의 주된 함수이다. 이 함수가 모든 계산을 수행하여 그결과를 object에 저장한다. 나중에** createTable**함수를 이 object에 적용하면 분석결과의 table 이 만들어진다. 예를 들어 predimed데이타에서 group을 반응변수로 다른 변수들을 설명변수로 하여 단변량분석을 수행하려면 다음과 같이 한다.
res=compareGroups(group ~ . - toevent - event, data = predimed)
res
##
##
## -------- Summary of results by groups of 'Intervention group'---------
##
##
## var N p.value method
## 1 Sex 6324 <0.001** categorical
## 2 Age 6324 0.003** continuous normal
## 3 Smoking 6324 0.444 categorical
## 4 Body mass index 6324 <0.001** continuous normal
## 5 Waist circumference 6324 0.045** continuous normal
## 6 Waist-to-height ratio 6324 <0.001** continuous normal
## 7 Hypertension 6324 0.249 categorical
## 8 Type-2 diabetes 6324 0.017** categorical
## 9 Dyslipidemia 6324 0.423 categorical
## 10 Family history of premature CHD 6324 0.581 categorical
## 11 Hormone-replacement therapy 5661 0.850 categorical
## 12 MeDiet Adherence score 6324 <0.001** continuous normal
## 13 AMI, stroke, or CV Death 6324 0.011** Surv [Tmax=4.79]
## selection
## 1 ALL
## 2 ALL
## 3 ALL
## 4 ALL
## 5 ALL
## 6 ALL
## 7 ALL
## 8 ALL
## 9 ALL
## 10 ALL
## 11 ALL
## 12 ALL
## 13 ALL
## -----
## Signif. codes: 0 '**' 0.05 '*' 0.1 ' ' 1
createTable(res)
##
## --------Summary descriptives table by 'Intervention group'---------
##
## ____________________________________________________________________________________
## Control MedDiet + Nuts MedDiet + VOO p.overall
## N=2042 N=2100 N=2182
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Sex: <0.001
## Male 812 (39.8%) 968 (46.1%) 899 (41.2%)
## Female 1230 (60.2%) 1132 (53.9%) 1283 (58.8%)
## Age 67.3 (6.28) 66.7 (6.02) 67.0 (6.21) 0.003
## Smoking: 0.444
## Never 1282 (62.8%) 1259 (60.0%) 1351 (61.9%)
## Current 270 (13.2%) 296 (14.1%) 292 (13.4%)
## Former 490 (24.0%) 545 (26.0%) 539 (24.7%)
## Body mass index 30.3 (3.96) 29.7 (3.77) 29.9 (3.71) <0.001
## Waist circumference 101 (10.8) 100 (10.6) 100 (10.4) 0.045
## Waist-to-height ratio 0.63 (0.07) 0.62 (0.06) 0.63 (0.06) <0.001
## Hypertension: 0.249
## No 331 (16.2%) 362 (17.2%) 396 (18.1%)
## Yes 1711 (83.8%) 1738 (82.8%) 1786 (81.9%)
## Type-2 diabetes: 0.017
## No 1072 (52.5%) 1150 (54.8%) 1100 (50.4%)
## Yes 970 (47.5%) 950 (45.2%) 1082 (49.6%)
## Dyslipidemia: 0.423
## No 563 (27.6%) 561 (26.7%) 622 (28.5%)
## Yes 1479 (72.4%) 1539 (73.3%) 1560 (71.5%)
## Family history of premature CHD: 0.581
## No 1580 (77.4%) 1640 (78.1%) 1675 (76.8%)
## Yes 462 (22.6%) 460 (21.9%) 507 (23.2%)
## Hormone-replacement therapy: 0.850
## No 1811 (98.3%) 1835 (98.4%) 1918 (98.2%)
## Yes 31 (1.68%) 30 (1.61%) 36 (1.84%)
## MeDiet Adherence score 8.44 (1.94) 8.81 (1.90) 8.77 (1.97) <0.001
## AMI, stroke, or CV Death 5.80% 3.58% 3.76% 0.011
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯