CSV files
blood.csv <- read.csv("BLOOD.DAT.txt", quote = "'")
head(blood.csv)
ID matchid case curpmh ageblood estradol estrone testost prolactn
1 100013 164594 0 1 46 57 65 25 11.12
2 100241 107261 0 0 65 11 26 999 2.80
3 100696 110294 0 1 66 3 999 8 38.00
4 101266 101266 1 0 57 4 18 6 8.90
5 101600 101600 1 0 66 6 18 25 6.90
6 102228 155717 0 1 57 10 999 31 13.94
Excel file using xlsx package
library(xlsx)
nhefs.xls <- read.xlsx("nhefs_book.xls", sheetIndex = 1)
head(nhefs.xls)
seqn qsmk death yrdth sbp dbp sex age race income marital school ht wt71 wt82 wt82_71 birthplace
1 233 0 0 NA 175 96 0 42 1 19 2 7 174.1875 79.04 68.94604 -10.093960 47
2 235 0 0 NA 123 80 0 36 0 18 2 9 159.3750 58.63 61.23497 2.604970 42
3 244 0 0 NA 115 75 1 56 1 15 3 11 168.5000 56.81 66.22449 9.414486 51
4 245 0 1 85 148 78 0 68 1 15 3 5 170.1875 59.42 64.41012 4.990117 37
5 252 0 0 NA 118 77 0 40 0 18 2 11 181.8750 87.09 92.07925 4.989251 42
6 257 0 0 NA 141 83 1 43 1 11 4 9 162.1875 99.00 103.41906 4.419060 34
smokeintensity smkintensity82_71 smokeyrs asthma bronch tb hf hbp pepticulcer colitis hepatitis chroniccough
1 30 -10 29 0 0 0 0 1 1 0 0 0
2 20 -10 24 0 0 0 0 0 0 0 0 0
3 20 -14 26 0 0 0 0 0 0 0 0 0
4 3 4 53 0 0 0 0 1 0 0 0 0
5 20 0 19 0 0 0 0 0 0 0 0 0
6 10 10 21 0 0 0 0 0 0 0 0 0
hayfever diabetes polio tumor nervousbreak alcoholpy alcoholfreq alcoholtype alcoholhowmuch pica headache
1 0 1 0 0 0 1 1 3 7 0 1
2 0 0 0 0 0 1 0 1 4 0 1
3 1 0 0 1 0 1 3 4 NA 0 1
4 0 0 0 0 0 1 2 3 4 0 0
5 0 0 0 0 0 1 2 1 2 0 1
6 0 0 0 0 0 1 3 2 1 0 1
otherpain weakheart allergies nerves lackpep hbpmed boweltrouble wtloss infection active exercise birthcontrol
1 0 0 0 0 0 1 0 0 0 0 2 2
2 0 0 0 0 0 0 0 0 1 0 0 2
3 1 0 0 1 0 0 0 0 0 0 2 0
4 1 1 0 0 0 0 0 0 0 1 2 2
5 0 0 0 0 0 0 1 0 0 1 1 2
6 0 0 0 0 0 0 0 0 0 1 1 0
pregnancies cholesterol hightax82 price71 price82 tax71 tax82 price71_82 tax71_82
1 NA 197 0 2.183594 1.739990 1.1022949 0.4619751 0.44378662 0.6403809
2 NA 301 0 2.346680 1.797363 1.3649902 0.5718994 0.54931641 0.7929688
3 2 157 0 1.569580 1.513428 0.5512695 0.2309875 0.05619812 0.3202515
4 NA 174 0 1.506592 1.451904 0.5249023 0.2199707 0.05479431 0.3049927
5 NA 216 0 2.346680 1.797363 1.3649902 0.5718994 0.54931641 0.7929688
6 1 212 1 2.209961 2.025879 1.1547852 0.7479248 0.18408203 0.4069824
SAS native file using sas7bdat package
library(sas7bdat)
nhefs.sas <- read.sas7bdat("nhefs_book.sas7bdat")
head(nhefs.sas)
seqn qsmk death yrdth sbp dbp sex age race income marital school ht wt71 wt82 wt82_71 birthplace
1 233 0 0 NaN 175 96 0 42 1 19 2 7 174.1875 79.04 68.94604 -10.093960 47
2 235 0 0 NaN 123 80 0 36 0 18 2 9 159.3750 58.63 61.23497 2.604970 42
3 244 0 0 NaN 115 75 1 56 1 15 3 11 168.5000 56.81 66.22449 9.414486 51
4 245 0 1 85 148 78 0 68 1 15 3 5 170.1875 59.42 64.41012 4.990117 37
5 252 0 0 NaN 118 77 0 40 0 18 2 11 181.8750 87.09 92.07925 4.989251 42
6 257 0 0 NaN 141 83 1 43 1 11 4 9 162.1875 99.00 103.41906 4.419060 34
smokeintensity smkintensity82_71 smokeyrs asthma bronch tb hf hbp pepticulcer colitis hepatitis chroniccough
1 30 -10 29 0 0 0 0 1 1 0 0 0
2 20 -10 24 0 0 0 0 0 0 0 0 0
3 20 -14 26 0 0 0 0 0 0 0 0 0
4 3 4 53 0 0 0 0 1 0 0 0 0
5 20 0 19 0 0 0 0 0 0 0 0 0
6 10 10 21 0 0 0 0 0 0 0 0 0
hayfever diabetes polio tumor nervousbreak alcoholpy alcoholfreq alcoholtype alcoholhowmuch pica headache
1 0 1 0 0 0 1 1 3 7 0 1
2 0 0 0 0 0 1 0 1 4 0 1
3 1 0 0 1 0 1 3 4 NaN 0 1
4 0 0 0 0 0 1 2 3 4 0 0
5 0 0 0 0 0 1 2 1 2 0 1
6 0 0 0 0 0 1 3 2 1 0 1
otherpain weakheart allergies nerves lackpep hbpmed boweltrouble wtloss infection active exercise birthcontrol
1 0 0 0 0 0 1 0 0 0 0 2 2
2 0 0 0 0 0 0 0 0 1 0 0 2
3 1 0 0 1 0 0 0 0 0 0 2 0
4 1 1 0 0 0 0 0 0 0 1 2 2
5 0 0 0 0 0 0 1 0 0 1 1 2
6 0 0 0 0 0 0 0 0 0 1 1 0
pregnancies cholesterol hightax82 price71 price82 tax71 tax82 price71_82 tax71_82
1 NaN 197 0 2.183594 1.739990 1.1022949 0.4619751 0.44378662 0.6403809
2 NaN 301 0 2.346680 1.797363 1.3649902 0.5718994 0.54931641 0.7929688
3 2 157 0 1.569580 1.513428 0.5512695 0.2309875 0.05619812 0.3202515
4 NaN 174 0 1.506592 1.451904 0.5249023 0.2199707 0.05479431 0.3049927
5 NaN 216 0 2.346680 1.797363 1.3649902 0.5718994 0.54931641 0.7929688
6 1 212 1 2.209961 2.025879 1.1547852 0.7479248 0.18408203 0.4069824
Stata file using foreign package
library(foreign)
blood.dta <- read.dta("BLOOD.DAT.dta")
head(blood.dta)
id matchid case curpmh ageblood estradol estrone testost prolactn
1 100013 164594 0 1 46 57 65 25 11.12
2 100241 107261 0 0 65 11 26 999 2.80
3 100696 110294 0 1 66 3 999 8 38.00
4 101266 101266 1 0 57 4 18 6 8.90
5 101600 101600 1 0 66 6 18 25 6.90
6 102228 155717 0 1 57 10 999 31 13.94
## A numeric vector created by combining 4 numbers
vec1 <- c(2013, 2, 15, -10)
vec1
[1] 2013 2 15 -10
## integers 1 to 16
vec2 <- 1:16
vec2
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## Create a vector (single dimesion)
vec3 <- 1:16
vec3
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## Give 4 x 4 two-dimensional structure
dim(vec3) <- c(4, 4)
vec3
[,1] [,2] [,3] [,4]
[1,] 1 5 9 13
[2,] 2 6 10 14
[3,] 3 7 11 15
[4,] 4 8 12 16
## Give a 2 x 2 x 4 three-dimensional structure
dim(vec3) <- c(2, 2, 4)
vec3
, , 1
[,1] [,2]
[1,] 1 3
[2,] 2 4
, , 2
[,1] [,2]
[1,] 5 7
[2,] 6 8
, , 3
[,1] [,2]
[1,] 9 11
[2,] 10 12
, , 4
[,1] [,2]
[1,] 13 15
[2,] 14 16
## Directly create an array
arr1 <- array(1:60, dim = c(3,4,5))
arr1
, , 1
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
, , 2
[,1] [,2] [,3] [,4]
[1,] 13 16 19 22
[2,] 14 17 20 23
[3,] 15 18 21 24
, , 3
[,1] [,2] [,3] [,4]
[1,] 25 28 31 34
[2,] 26 29 32 35
[3,] 27 30 33 36
, , 4
[,1] [,2] [,3] [,4]
[1,] 37 40 43 46
[2,] 38 41 44 47
[3,] 39 42 45 48
, , 5
[,1] [,2] [,3] [,4]
[1,] 49 52 55 58
[2,] 50 53 56 59
[3,] 51 54 57 60
## List of a vector and a matrix
list1 <- list(first = 1:17, second = matrix(letters, 13,2))
list1
$first
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
$second
[,1] [,2]
[1,] "a" "n"
[2,] "b" "o"
[3,] "c" "p"
[4,] "d" "q"
[5,] "e" "r"
[6,] "f" "s"
[7,] "g" "t"
[8,] "h" "u"
[9,] "i" "v"
[10,] "j" "w"
[11,] "k" "x"
[12,] "l" "y"
[13,] "m" "z"
## List of two vectors of the same length
list2 <- list(alpha = c(1,4,5,7), beta = c("h","s","p","h"))
list2
$alpha
[1] 1 4 5 7
$beta
[1] "h" "s" "p" "h"
## Convert a list to a data frame
df1 <- data.frame(list2)
df1
alpha beta
1 1 h
2 4 s
3 5 p
4 7 h
## Create a list with vectors of different classes
list3 <- list(small = letters, large = LETTERS, number = 1:26)
list3
$small
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
$large
[1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z"
$number
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## Convert to a data frame
df2 <- data.frame(list3)
df2
small large number
1 a A 1
2 b B 2
3 c C 3
4 d D 4
5 e E 5
6 f F 6
7 g G 7
8 h H 8
9 i I 9
10 j J 10
11 k K 11
12 l L 12
13 m M 13
14 n N 14
15 o O 15
16 p P 16
17 q Q 17
18 r R 18
19 s S 19
20 t T 20
21 u U 21
22 v V 22
23 w W 23
24 x X 24
25 y Y 25
26 z Z 26
## Directly create a data frame
df3 <- data.frame(small = letters, large = LETTERS, number = 1:26)
df3
small large number
1 a A 1
2 b B 2
3 c C 3
4 d D 4
5 e E 5
6 f F 6
7 g G 7
8 h H 8
9 i I 9
10 j J 10
11 k K 11
12 l L 12
13 m M 13
14 n N 14
15 o O 15
16 p P 16
17 q Q 17
18 r R 18
19 s S 19
20 t T 20
21 u U 21
22 v V 22
23 w W 23
24 x X 24
25 y Y 25
26 z Z 26
letters
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
letters[3] # 1-dimensional object
[1] "c"
arr1
, , 1
[,1] [,2] [,3] [,4]
[1,] 1 4 7 10
[2,] 2 5 8 11
[3,] 3 6 9 12
, , 2
[,1] [,2] [,3] [,4]
[1,] 13 16 19 22
[2,] 14 17 20 23
[3,] 15 18 21 24
, , 3
[,1] [,2] [,3] [,4]
[1,] 25 28 31 34
[2,] 26 29 32 35
[3,] 27 30 33 36
, , 4
[,1] [,2] [,3] [,4]
[1,] 37 40 43 46
[2,] 38 41 44 47
[3,] 39 42 45 48
, , 5
[,1] [,2] [,3] [,4]
[1,] 49 52 55 58
[2,] 50 53 56 59
[3,] 51 54 57 60
arr1[1,2,3] # 3-dimensional object
[1] 28
arr1[1, ,3] # implies 1,(all),3
[1] 25 28 31 34
df1
alpha beta
1 1 h
2 4 s
3 5 p
4 7 h
df1[2, ] # implies 2,(all)
alpha beta
2 4 s
list1
$first
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
$second
[,1] [,2]
[1,] "a" "n"
[2,] "b" "o"
[3,] "c" "p"
[4,] "d" "q"
[5,] "e" "r"
[6,] "f" "s"
[7,] "g" "t"
[8,] "h" "u"
[9,] "i" "v"
[10,] "j" "w"
[11,] "k" "x"
[12,] "l" "y"
[13,] "m" "z"
list1[[1]] # list needs [[ ]]
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
list3
$small
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
$large
[1] "A" "B" "C" "D" "E" "F" "G" "H" "I" "J" "K" "L" "M" "N" "O" "P" "Q" "R" "S" "T" "U" "V" "W" "X" "Y" "Z"
$number
[1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
list3$small
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
list3[["small"]]
[1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q" "r" "s" "t" "u" "v" "w" "x" "y" "z"
df1
alpha beta
1 1 h
2 4 s
3 5 p
4 7 h
df1$alpha
[1] 1 4 5 7
df1[, "beta"]
[1] h s p h
Levels: h p s