Ong S.M.
April 2017
current working directory
getwd()
[1] "F:/Dropbox/Conference.Seminar.Talks/NIH Monash R Course/03 Module 1"
changing working directory
setwd("F:/Dropbox/")
Load a csv file
Data in WD
read.csv("cvdata.csv")
Data in another folder
read.csv("F:/Dropbox/Conference.Seminar.Talks/NIH Monash R Course/03 Module 1/cvdata.csv")
?! Where is the dataset
?! Where is the dataset
“Assignment” <-
dta <- read.csv("F:/Dropbox/Conference.Seminar.Talks/NIH Monash R Course/03 Module 1/cvdata.csv")
*check the “Environment”
read.table()
and read.csv()
- utils package. Excel file
You need a function from a library
So, let's install the xlsx library
install.packages("xlsx")
after the installation is completed, load the library
library(xlsx)
Let's have a quick look at the library document.
read.xlsx
dta_excel <- read.xlsx("F:/Dropbox/Conference.Seminar.Talks/NIH Monash R Course/03 Module 1/cvdata.xlsx")
You have a “stata_file.dta” in your folder
interface, summary, str, dimension, row & col
summary(dta)
ptsex ptrace ptnationality acsstratum
Min. :1.000 Min. : 1.000 Min. : 0 Min. :1.000
1st Qu.:1.000 1st Qu.: 1.000 1st Qu.: 1 1st Qu.:1.000
Median :1.000 Median : 1.000 Median : 1 Median :2.000
Mean :1.212 Mean : 2.096 Mean :4169 Mean :1.797
3rd Qu.:1.000 3rd Qu.: 3.000 3rd Qu.:9999 3rd Qu.:3.000
Max. :2.000 Max. :18.000 Max. :9999 Max. :3.000
NA's :15
smokingstatus heartrate death30d bpsys
Min. : 1.0 Min. : 40.0 Mode :logical Min. : 60.0
1st Qu.: 1.0 1st Qu.: 70.0 FALSE:123 1st Qu.:118.0
Median : 2.0 Median : 82.0 TRUE :76 Median :137.0
Mean : 450.3 Mean : 84.3 NA's :801 Mean :139.7
3rd Qu.: 3.0 3rd Qu.: 96.0 3rd Qu.:158.0
Max. :8888.0 Max. :213.0 Max. :252.0
NA's :9 NA's :19 NA's :24
weight
Min. : 35.00
1st Qu.: 60.00
Median : 67.00
Mean : 67.18
3rd Qu.: 75.00
Max. :112.00
NA's :464
str(dta)
'data.frame': 1000 obs. of 9 variables:
$ ptsex : int 2 1 1 2 2 1 1 1 1 1 ...
$ ptrace : int 1 2 2 1 1 2 2 2 1 1 ...
$ ptnationality: int 0 1 1 9999 9999 0 1 9999 9999 0 ...
$ acsstratum : int 3 1 1 1 1 1 2 2 1 1 ...
$ smokingstatus: int 1 3 1 1 1 1 8888 1 3 3 ...
$ heartrate : int 81 107 60 74 100 90 61 63 49 91 ...
$ death30d : logi FALSE NA TRUE NA TRUE FALSE ...
$ bpsys : int 135 NA 98 122 157 111 177 135 98 127 ...
$ weight : num 52.3 58 NA 51.5 NA 69 NA NA 68 56 ...
names(dta)
[1] "ptsex" "ptrace" "ptnationality" "acsstratum"
[5] "smokingstatus" "heartrate" "death30d" "bpsys"
[9] "weight"
dim(dta)
[1] 1000 9
nrow(dta)
[1] 1000
ncol(dta)
[1] 9
class(dta)
[1] "data.frame"
Extract a variable(col)
The “$” in R
class(dta$ptsex)
[1] "integer"
or
str(dta$ptsex)
int [1:1000] 2 1 1 2 2 1 1 1 1 1 ...
Mean
mean(dta$heartrate)
[1] NA
?? Why
Because …
mean(dta$heartrate, na.rm = TRUE)
[1] 84.29766
?? Why
Let's read the help file
?mean
Median
median(dta$heartrate,na.rm = TRUE)
Maximum
max(dta$heartrate,na.rm = TRUE)
[1] 213
Minimum
min(dta$heartrate,na.rm = TRUE)
[1] 40
Standard Deviation & IQR
sd(dta$heartrate,na.rm = T)
[1] 21.42964
IQR(dta$heartrate,na.rm = T)
[1] 26
Tabulation & Proportion
table(dta$ptsex)
1 2
788 212
? missing data
788+212
[1] 1000
Tabulation & Proportion
table(dta$ptrace)
1 2 3 5 7 8 9 10 12 13 18
501 230 210 7 1 6 6 6 4 6 8
? missing data
501+230+210+7+1+6+6+6+4+6+8
[1] 985
add margin
addmargins(table(dta$ptrace))
1 2 3 5 7 8 9 10 12 13 18 Sum
501 230 210 7 1 6 6 6 4 6 8 985
?addmargins
Proportion
prop.table(table(dta$ptrace))
1 2 3 5 7 8
0.508629442 0.233502538 0.213197970 0.007106599 0.001015228 0.006091371
9 10 12 13 18
0.006091371 0.006091371 0.004060914 0.006091371 0.008121827
?prop.table
Proportion
addmargins(prop.table(table(dta$ptrace)))
1 2 3 5 7 8
0.508629442 0.233502538 0.213197970 0.007106599 0.001015228 0.006091371
9 10 12 13 18 Sum
0.006091371 0.006091371 0.004060914 0.006091371 0.008121827 1.000000000
IQR(dta$heartrate,na.rm = T)
[1] 26
Tabulation & Proportion
table(dta$ptsex,dta$death30d)
FALSE TRUE
1 87 56
2 36 20
Tabulation & Proportion
prop.table(table(dta$ptsex,dta$death30d))
FALSE TRUE
1 0.4371859 0.2814070
2 0.1809045 0.1005025
??
By row
prop.table(table(dta$ptsex,dta$death30d),1)
FALSE TRUE
1 0.6083916 0.3916084
2 0.6428571 0.3571429
By col
prop.table(table(dta$ptsex,dta$death30d),2)
FALSE TRUE
1 0.7073171 0.7368421
2 0.2926829 0.2631579
prop.table(table(dta$ptsex,dta$smokingstatus,dta$death30d),1)
, , = FALSE
1 2 3 8888
1 0.148936170 0.255319149 0.177304965 0.028368794
2 0.563636364 0.054545455 0.000000000 0.036363636
, , = TRUE
1 2 3 8888
1 0.120567376 0.134751773 0.127659574 0.007092199
2 0.309090909 0.000000000 0.018181818 0.018181818
is.na(dta$death30d)
[1] FALSE TRUE FALSE TRUE FALSE FALSE FALSE TRUE TRUE FALSE TRUE
[12] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE FALSE
[23] TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
[34] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
[45] FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
[56] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE
[67] TRUE TRUE TRUE TRUE FALSE FALSE FALSE FALSE TRUE TRUE TRUE
[78] TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE FALSE FALSE
[89] TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE FALSE
[100] TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[111] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[122] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[133] TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[144] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[155] FALSE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE
[166] FALSE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
[177] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[188] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
[199] TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE FALSE
[210] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[221] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE
[232] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE
[243] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE FALSE TRUE
[254] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[265] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[276] FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[287] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[298] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE FALSE TRUE TRUE
[309] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
[320] TRUE FALSE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE
[331] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[342] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[353] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[364] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[375] TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE
[386] FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE TRUE TRUE
[397] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
[408] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[419] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
[430] TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE FALSE FALSE
[441] TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
[452] TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE FALSE
[463] TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE
[474] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
[485] TRUE TRUE TRUE TRUE FALSE TRUE FALSE FALSE TRUE TRUE TRUE
[496] TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
[507] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE
[518] TRUE FALSE TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE
[529] TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE TRUE FALSE TRUE
[540] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[551] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
[562] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE
[573] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[584] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[595] TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE FALSE
[606] TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[617] FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[628] TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[639] FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[650] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE
[661] FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[672] TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[683] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
[694] FALSE FALSE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE
[705] FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[716] TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE FALSE TRUE FALSE
[727] TRUE FALSE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE
[738] FALSE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE
[749] TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE
[760] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[771] TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
[782] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[793] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE TRUE
[804] TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[815] TRUE TRUE TRUE FALSE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
[826] TRUE TRUE FALSE FALSE FALSE TRUE TRUE TRUE TRUE FALSE TRUE
[837] TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE FALSE TRUE
[848] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE FALSE
[859] TRUE FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE TRUE
[870] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[881] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
[892] TRUE TRUE TRUE TRUE FALSE FALSE TRUE TRUE TRUE TRUE FALSE
[903] FALSE TRUE TRUE TRUE TRUE TRUE FALSE TRUE FALSE TRUE FALSE
[914] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE
[925] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE FALSE
[936] TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE FALSE FALSE
[947] FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE
[958] TRUE FALSE TRUE TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE
[969] TRUE TRUE TRUE TRUE TRUE FALSE TRUE TRUE FALSE FALSE TRUE
[980] TRUE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[991] TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE TRUE TRUE
table(is.na(dta$death30d))
FALSE TRUE
199 801
prop.table(table(is.na(dta$death30d)))
FALSE TRUE
0.199 0.801
head(complete.cases(dta))
[1] TRUE FALSE FALSE FALSE FALSE TRUE
the which
function
which(complete.cases(dta))
use the which
function to extract the complete cases
prop.table(table(is.na(dta$death30d)))
FALSE TRUE
0.199 0.801
the recode()
function in car
package
??recode
install.packages("car")
library(car)
summary(dta$heartrate)
recode(dta$heartrate, "0:82='below.avg' ; 83:213='above.avg'")
[1] "below.avg" "above.avg" "below.avg" "below.avg" "above.avg" "above.avg"
dta$heartrate2 <- recode(dta$heartrate, "0:82='below.avg' ; 83:213='above.avg'")
table(dta$heartrate2)
above.avg below.avg
488 493
Or, you can use the ifelse()
function
dta$heartrate2 <- ifelse(dta$heartrate < 83, "below.avg", "above.avg")
table(dta$heartrate2)
above.avg below.avg
488 493
table(dta$acsstratum)
1 2 3
468 267 265
head(subset(dta,acsstratum==1))
ptsex ptrace ptnationality acsstratum smokingstatus heartrate death30d
2 1 2 1 1 3 107 NA
3 1 2 1 1 1 60 TRUE
4 2 1 9999 1 1 74 NA
5 2 1 9999 1 1 100 TRUE
6 1 2 0 1 1 90 FALSE
9 1 1 9999 1 3 49 NA
bpsys weight heartrate2
2 NA 58.0 above.avg
3 98 NA below.avg
4 122 51.5 below.avg
5 157 NA above.avg
6 111 69.0 above.avg
9 98 68.0 below.avg
UA.cohort <- subset(dta,acsstratum==1)
UA.cohort.male <- subset(dta, acsstratum==1 & ptsex==1)
tableone
package??tableone
install.packages("tableone")
library(tableone)
library(tableone)
?tableone
CreateTableOne(data=dta)
Overall
n 1000
ptsex (mean (sd)) 1.21 (0.41)
ptrace (mean (sd)) 2.10 (2.22)
ptnationality (mean (sd)) 4168.82 (4931.12)
acsstratum (mean (sd)) 1.80 (0.83)
smokingstatus (mean (sd)) 450.29 (1945.96)
heartrate (mean (sd)) 84.30 (21.43)
death30d = TRUE (%) 76 (38.2)
bpsys (mean (sd)) 139.67 (30.40)
weight (mean (sd)) 67.18 (12.17)
heartrate2 = below.avg (%) 493 (50.3)