library(readxl)
library(readr)
library(haven)
library(writexl)
excel <- read_excel("C:/Users/hed2/Downloads/mybook2/mybook2/excel.xlsx")
excel
## # A tibble: 4 × 4
## ID sex ht_in wgt_lbs
## <chr> <chr> <dbl> <dbl>
## 1 001 Male 71 190
## 2 002 Male 69 176
## 3 003 Female 64 130
## 4 004 Female 65 154
excel <- read_excel(
path = "C:/Users/hed2/Downloads/mybook2/mybook2/excel_complex.xlsx",
sheet = "Study Phase 1",
col_names = c("id", "sex", "ht_in", "wgt_lbs", "dob", "income"),
col_types = c(
"text",
"text",
"numeric",
"numeric",
"date",
"numeric",
"skip"
),
skip = 3,
na = c("", "NA", "Missing")
)
excel
## # A tibble: 4 × 6
## id sex ht_in wgt_lbs dob income
## <chr> <chr> <dbl> <dbl> <dttm> <dbl>
## 1 001 Male 71 190 1981-05-20 00:00:00 46000
## 2 002 Male NA 176 1990-08-16 00:00:00 67000
## 3 003 Female 64 130 1980-02-21 00:00:00 49000
## 4 004 Female 65 NA 1983-04-12 00:00:00 89000
Read in from txt
single_space <- read_delim(
file = "C:/Users/hed2/Downloads/mybook2/mybook2/single_delimited.txt",
delim = " "
)
## Rows: 4 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (3): id, sex, ht_in
## dbl (1): wgt_lbs
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(single_space )
## # A tibble: 4 × 4
## id sex ht_in wgt_lbs
## <chr> <chr> <chr> <dbl>
## 1 001 Male 71 190
## 2 002 Male . 176
## 3 003 Female 64 130
## 4 004 Female 65 154
SAS file and csv
# read in from sas file and csv
sas <- read_sas("C:\\Users\\hed2\\Downloads\\mybook2\\mybook2\\class.sas7bdat")
head(sas)
## # A tibble: 6 × 5
## Name Sex Age Height Weight
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Alfred M 14 69 112.
## 2 Alice F 13 56.5 84
## 3 Barbara F 13 65.3 98
## 4 Carol F 14 62.8 102.
## 5 Henry M 14 63.5 102.
## 6 James M 12 57.3 83
csv <- read_csv("C:\\Users\\hed2\\Downloads\\mybook2\\mybook2\\nhefs_an.csv")
## Rows: 1394 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (11): seqn, qsmk, sex, age, income, sbp, dbp, price71, tax71, race, wt82_71
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(csv)
## # A tibble: 6 × 11
## seqn qsmk sex age income sbp dbp price71 tax71 race wt82_71
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 233 0 0 42 19 175 96 2.18 1.10 1 -10.1
## 2 235 0 0 36 18 123 80 2.35 1.36 0 2.60
## 3 244 0 1 56 15 115 75 1.57 0.551 1 9.41
## 4 245 0 0 68 15 148 78 1.51 0.525 1 4.99
## 5 252 0 0 40 18 118 77 2.35 1.36 0 4.99
## 6 257 0 1 43 11 141 83 2.21 1.15 1 4.42
# export data as a csv or xlsx format file
# write_csv(demo, "/Users/bradcannell/Desktop/demo.csv")
mtcars2 <- list(Sheet1=excel, Sheet2=excel)
writexl::write_xlsx(mtcars2, "C:/Users/hed2/Downloads/mybook2/mybook2/demo.xlsx")
require(tableone)
## Loading required package: tableone
# factor category variables
csv$qsmk=as.factor(csv$qsmk)
csv$sex=as.factor(csv$sex)
csv$race=as.factor(csv$race)
tab0 <- CreateTableOne(vars = names(csv)[! names(csv) %in% c("seqn", "qsmk" )],
data = csv,
strata = "qsmk",
test = T)
print(tab0, showAllLevels = FALSE, )
## Stratified by qsmk
## 0 1 p test
## n 1054 340
## sex = 1 (%) 559 (53.0) 160 (47.1) 0.064
## age (mean (SD)) 42.65 (11.77) 45.49 (12.30) <0.001
## income (mean (SD)) 17.92 (2.71) 18.23 (2.48) 0.060
## sbp (mean (SD)) 127.45 (18.47) 130.94 (19.09) 0.003
## dbp (mean (SD)) 77.35 (10.44) 78.53 (10.73) 0.073
## price71 (mean (SD)) 2.14 (0.23) 2.14 (0.22) 0.881
## tax71 (mean (SD)) 1.06 (0.21) 1.06 (0.21) 0.772
## race = 1 (%) 157 (14.9) 27 ( 7.9) 0.001
## wt82_71 (mean (SD)) 2.00 (7.51) 4.64 (8.72) <0.001
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
pvalue <- function(x, ...) {
# Construct vectors of data y, and groups (strata) g
y <- unlist(x)
g <- factor(rep(1:length(x), times=sapply(x, length)))
if (is.numeric(y)) {
# For numeric variables, perform a standard 2-sample t-test
p <- t.test(y ~ g)$p.value
} else {
# For categorical variables, perform a chi-squared test of independence
p <- chisq.test(table(y, g))$p.value
}
# Format the p-value, using an HTML entity for the less-than sign.
# The initial empty string places the output on the line below the variable label.
c("", sub("<", "<", format.pval(p, digits=3, eps=0.001)))
}
table1(~ factor(sex) + age + factor(race) + sbp | (qsmk),
overall=F, extra.col=list(`P-value`=pvalue), data=csv)
0 (N=1054) |
1 (N=340) |
P-value | |
---|---|---|---|
factor(sex) | |||
0 | 495 (47.0%) | 180 (52.9%) | 0.0636 |
1 | 559 (53.0%) | 160 (47.1%) | |
age | |||
Mean (SD) | 42.6 (11.8) | 45.5 (12.3) | <0.001 |
Median [Min, Max] | 42.0 [25.0, 72.0] | 45.0 [25.0, 74.0] | |
factor(race) | |||
0 | 897 (85.1%) | 313 (92.1%) | 0.00136 |
1 | 157 (14.9%) | 27 (7.9%) | |
sbp | |||
Mean (SD) | 127 (18.5) | 131 (19.1) | 0.00327 |
Median [Min, Max] | 125 [87.0, 222] | 129 [91.0, 211] |