Data read and write

library(readxl)
library(readr)
library(haven)
library(writexl)

Importing data

  • Read in from excel
excel <- read_excel("C:/Users/hed2/Downloads/mybook2/mybook2/excel.xlsx")
excel
## # A tibble: 4 × 4
##   ID    sex    ht_in wgt_lbs
##   <chr> <chr>  <dbl>   <dbl>
## 1 001   Male      71     190
## 2 002   Male      69     176
## 3 003   Female    64     130
## 4 004   Female    65     154
  • Reading from excel spreadsheet
excel <- read_excel(
  path = "C:/Users/hed2/Downloads/mybook2/mybook2/excel_complex.xlsx",
  
  sheet = "Study Phase 1",
  
  col_names = c("id", "sex", "ht_in", "wgt_lbs", "dob", "income"),
  col_types = c(
    "text",
    "text",
    "numeric",
    "numeric",
    "date",
    "numeric",
    "skip"
  ),
  skip = 3,
  na = c("", "NA", "Missing")
)

excel
## # A tibble: 4 × 6
##   id    sex    ht_in wgt_lbs dob                 income
##   <chr> <chr>  <dbl>   <dbl> <dttm>               <dbl>
## 1 001   Male      71     190 1981-05-20 00:00:00  46000
## 2 002   Male      NA     176 1990-08-16 00:00:00  67000
## 3 003   Female    64     130 1980-02-21 00:00:00  49000
## 4 004   Female    65      NA 1983-04-12 00:00:00  89000

Read in from txt

single_space <- read_delim(
  file = "C:/Users/hed2/Downloads/mybook2/mybook2/single_delimited.txt",
  
  delim = " "
)
## Rows: 4 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: " "
## chr (3): id, sex, ht_in
## dbl (1): wgt_lbs
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(single_space )
## # A tibble: 4 × 4
##   id    sex    ht_in wgt_lbs
##   <chr> <chr>  <chr>   <dbl>
## 1 001   Male   71        190
## 2 002   Male   .         176
## 3 003   Female 64        130
## 4 004   Female 65        154

SAS file and csv

# read in from sas file and csv
sas <- read_sas("C:\\Users\\hed2\\Downloads\\mybook2\\mybook2\\class.sas7bdat")
head(sas)
## # A tibble: 6 × 5
##   Name    Sex     Age Height Weight
##   <chr>   <chr> <dbl>  <dbl>  <dbl>
## 1 Alfred  M        14   69     112.
## 2 Alice   F        13   56.5    84 
## 3 Barbara F        13   65.3    98 
## 4 Carol   F        14   62.8   102.
## 5 Henry   M        14   63.5   102.
## 6 James   M        12   57.3    83
csv <- read_csv("C:\\Users\\hed2\\Downloads\\mybook2\\mybook2\\nhefs_an.csv")
## Rows: 1394 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (11): seqn, qsmk, sex, age, income, sbp, dbp, price71, tax71, race, wt82_71
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(csv)
## # A tibble: 6 × 11
##    seqn  qsmk   sex   age income   sbp   dbp price71 tax71  race wt82_71
##   <dbl> <dbl> <dbl> <dbl>  <dbl> <dbl> <dbl>   <dbl> <dbl> <dbl>   <dbl>
## 1   233     0     0    42     19   175    96    2.18 1.10      1  -10.1 
## 2   235     0     0    36     18   123    80    2.35 1.36      0    2.60
## 3   244     0     1    56     15   115    75    1.57 0.551     1    9.41
## 4   245     0     0    68     15   148    78    1.51 0.525     1    4.99
## 5   252     0     0    40     18   118    77    2.35 1.36      0    4.99
## 6   257     0     1    43     11   141    83    2.21 1.15      1    4.42

Exporting data

# export data as a csv or xlsx format file 
# write_csv(demo, "/Users/bradcannell/Desktop/demo.csv")

mtcars2 <- list(Sheet1=excel, Sheet2=excel)
writexl::write_xlsx(mtcars2, "C:/Users/hed2/Downloads/mybook2/mybook2/demo.xlsx")

Creat table one

require(tableone)
## Loading required package: tableone
# factor category variables
csv$qsmk=as.factor(csv$qsmk)
csv$sex=as.factor(csv$sex)
csv$race=as.factor(csv$race)

tab0 <- CreateTableOne(vars = names(csv)[! names(csv) %in% c("seqn", "qsmk" )],
                       data = csv, 
                       strata = "qsmk", 
                       test = T)
print(tab0, showAllLevels = FALSE, )
##                      Stratified by qsmk
##                       0              1              p      test
##   n                     1054            340                    
##   sex = 1 (%)            559 (53.0)     160 (47.1)   0.064     
##   age (mean (SD))      42.65 (11.77)  45.49 (12.30) <0.001     
##   income (mean (SD))   17.92 (2.71)   18.23 (2.48)   0.060     
##   sbp (mean (SD))     127.45 (18.47) 130.94 (19.09)  0.003     
##   dbp (mean (SD))      77.35 (10.44)  78.53 (10.73)  0.073     
##   price71 (mean (SD))   2.14 (0.23)    2.14 (0.22)   0.881     
##   tax71 (mean (SD))     1.06 (0.21)    1.06 (0.21)   0.772     
##   race = 1 (%)           157 (14.9)      27 ( 7.9)   0.001     
##   wt82_71 (mean (SD))   2.00 (7.51)    4.64 (8.72)  <0.001
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
pvalue <- function(x, ...) {
    # Construct vectors of data y, and groups (strata) g
    y <- unlist(x)
    g <- factor(rep(1:length(x), times=sapply(x, length)))
    if (is.numeric(y)) {
        # For numeric variables, perform a standard 2-sample t-test
        p <- t.test(y ~ g)$p.value
    } else {
        # For categorical variables, perform a chi-squared test of independence
        p <- chisq.test(table(y, g))$p.value
    }
    # Format the p-value, using an HTML entity for the less-than sign.
    # The initial empty string places the output on the line below the variable label.
    c("", sub("<", "&lt;", format.pval(p, digits=3, eps=0.001)))
}

 table1(~ factor(sex) + age + factor(race) + sbp | (qsmk), 
                       overall=F, extra.col=list(`P-value`=pvalue), data=csv)
0
(N=1054)
1
(N=340)
P-value
factor(sex)
0 495 (47.0%) 180 (52.9%) 0.0636
1 559 (53.0%) 160 (47.1%)
age
Mean (SD) 42.6 (11.8) 45.5 (12.3) <0.001
Median [Min, Max] 42.0 [25.0, 72.0] 45.0 [25.0, 74.0]
factor(race)
0 897 (85.1%) 313 (92.1%) 0.00136
1 157 (14.9%) 27 (7.9%)
sbp
Mean (SD) 127 (18.5) 131 (19.1) 0.00327
Median [Min, Max] 125 [87.0, 222] 129 [91.0, 211]