require(ggplot2)
## Loading required package: ggplot2
require(reshape2)
## Loading required package: reshape2
Data_R_SAS_SPSS_Pubs <- read.csv('https://umich.instructure.com/files/2361245/download?download_frd=1', header=T)
df <- data.frame(Data_R_SAS_SPSS_Pubs)
# convert to long format (http://www.cookbook-r.com/Manipulating_data/Converting_data_between_wide_and_long_format/)
df <- melt(df , id.vars = 'Year', variable.name = 'Software')
ggplot(data=df, aes(x=Year,
y=value,
color=Software,
group = Software)) + geom_line() + geom_line(size=4) + labs(x='Year', y='Citations')
rawdata_wide <- read.table(header=TRUE, text='
CaseID Gender Age Condition1 Condition2
1 M 5 13 10.5
2 F 6 16 11.2
3 F 8 10 18.3
4 M 9 9.5 18.1
5 M 10 12.1 19
')
# Make the CaseID column a factor
rawdata_wide$subject <- factor(rawdata_wide$CaseID)
rawdata_wide
## CaseID Gender Age Condition1 Condition2 subject
## 1 1 M 5 13.0 10.5 1
## 2 2 F 6 16.0 11.2 2
## 3 3 F 8 10.0 18.3 3
## 4 4 M 9 9.5 18.1 4
## 5 5 M 10 12.1 19.0 5
library(reshape2)
# Specify id.vars: the variables to keep (don't split apart on!)
melt(rawdata_wide, id.vars=c("CaseID", "Gender"))
## Warning: attributes are not identical across measure variables; they will
## be dropped
## CaseID Gender variable value
## 1 1 M Age 5
## 2 2 F Age 6
## 3 3 F Age 8
## 4 4 M Age 9
## 5 5 M Age 10
## 6 1 M Condition1 13
## 7 2 F Condition1 16
## 8 3 F Condition1 10
## 9 4 M Condition1 9.5
## 10 5 M Condition1 12.1
## 11 1 M Condition2 10.5
## 12 2 F Condition2 11.2
## 13 3 F Condition2 18.3
## 14 4 M Condition2 18.1
## 15 5 M Condition2 19
## 16 1 M subject 1
## 17 2 F subject 2
## 18 3 F subject 3
## 19 4 M subject 4
## 20 5 M subject 5
data_long <- melt(rawdata_wide,
# ID variables - all the variables to keep but not split apart on
id.vars=c("CaseID", "Gender"),
# The source columns
measure.vars=c("Age", "Condition1", "Condition2" ),
# Name of the destination column that will identify the original
# column that the measurement came from
variable.name="Feature",
value.name="Measurement"
)
data_long
## CaseID Gender Feature Measurement
## 1 1 M Age 5.0
## 2 2 F Age 6.0
## 3 3 F Age 8.0
## 4 4 M Age 9.0
## 5 5 M Age 10.0
## 6 1 M Condition1 13.0
## 7 2 F Condition1 16.0
## 8 3 F Condition1 10.0
## 9 4 M Condition1 9.5
## 10 5 M Condition1 12.1
## 11 1 M Condition2 10.5
## 12 2 F Condition2 11.2
## 13 3 F Condition2 18.3
## 14 4 M Condition2 18.1
## 15 5 M Condition2 19.0
Popular data generation functions are c(), seq(), rep(), and data.frame(). Sometimes, we may also use list() and array() to generate data.
a<-c(1, 2, 3, 5, 6, 7, 10, 1, 4)
a
## [1] 1 2 3 5 6 7 10 1 4
c(list(A = c(Z = 1, Y = 2), B = c(X = 7), C = c(W = 7, V=3, U=-1.9)), recursive = TRUE)
## A.Z A.Y B.X C.W C.V C.U
## 1.0 2.0 7.0 7.0 3.0 -1.9
seq(1, 20, by=0.5)
## [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5
## [15] 8.0 8.5 9.0 9.5 10.0 10.5 11.0 11.5 12.0 12.5 13.0 13.5 14.0 14.5
## [29] 15.0 15.5 16.0 16.5 17.0 17.5 18.0 18.5 19.0 19.5 20.0
seq(1, 20, length=9)
## [1] 1.000 3.375 5.750 8.125 10.500 12.875 15.250 17.625 20.000
seq(along=c(5, 4, 5, 6))
## [1] 1 2 3 4
rep(c(1, 2, 3), 4)
## [1] 1 2 3 1 2 3 1 2 3 1 2 3
rep(c(1, 2, 3), each=4)
## [1] 1 1 1 1 2 2 2 2 3 3 3 3
X <- seq(along=c(1, 2, 3)); replicate(4, X+1)
## [,1] [,2] [,3] [,4]
## [1,] 2 2 2 2
## [2,] 3 3 3 3
## [3,] 4 4 4 4
data.frame(v=1:4, ch=c("a", "B", "C", "d"), n=c(10, 11))
## v ch n
## 1 1 a 10
## 2 2 B 11
## 3 3 C 10
## 4 4 d 11
l<-list(a=c(1, 2), b="hi", c=-3+3i)
l
## $a
## [1] 1 2
##
## $b
## [1] "hi"
##
## $c
## [1] -3+3i
l$a[[2]]
## [1] 2
l$b
## [1] "hi"
ar <- array(1:24, dim=c(3, 4, 2)); ar
## , , 1
##
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
##
## , , 2
##
## [,1] [,2] [,3] [,4]
## [1,] 13 16 19 22
## [2,] 14 17 20 23
## [3,] 15 18 21 24
ar[2, 3, 1]
## [1] 8
ar[2, ,1]
## [1] 2 5 8 11
x <- seq(1, 10, by=0.5)
y <- list(a = 1, b = TRUE, c = "oops")
save(x, y, file="xy.RData")
load("xy.RData")
data("iris")
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
data.txt<-read.table("https://umich.instructure.com/files/1628628/download?download_frd=1", header=T, as.is = T) # 01a_data.txt
summary(data.txt)
## Name Team Position Height
## Length:1034 Length:1034 Length:1034 Min. :67.0
## Class :character Class :character Class :character 1st Qu.:72.0
## Mode :character Mode :character Mode :character Median :74.0
## Mean :73.7
## 3rd Qu.:75.0
## Max. :83.0
## Weight Age
## Min. :150.0 Min. :20.90
## 1st Qu.:187.0 1st Qu.:25.44
## Median :200.0 Median :27.93
## Mean :201.7 Mean :28.74
## 3rd Qu.:215.0 3rd Qu.:31.23
## Max. :290.0 Max. :48.52
data.csv<-read.csv("https://umich.instructure.com/files/1628650/download?download_frd=1", header = T) # 01_hdp.csv
summary(data.csv)
## tumorsize co2 pain wound
## Min. : 33.97 Min. :1.222 Min. :1.000 Min. :1.000
## 1st Qu.: 62.49 1st Qu.:1.519 1st Qu.:4.000 1st Qu.:5.000
## Median : 70.07 Median :1.601 Median :5.000 Median :6.000
## Mean : 70.88 Mean :1.605 Mean :5.473 Mean :5.732
## 3rd Qu.: 79.02 3rd Qu.:1.687 3rd Qu.:6.000 3rd Qu.:7.000
## Max. :116.46 Max. :2.128 Max. :9.000 Max. :9.000
## mobility ntumors nmorphine remission
## Min. :1.00 Min. :0.000 Min. : 0.000 Min. :0.0000
## 1st Qu.:5.00 1st Qu.:1.000 1st Qu.: 2.000 1st Qu.:0.0000
## Median :6.00 Median :3.000 Median : 3.000 Median :0.0000
## Mean :6.08 Mean :3.066 Mean : 3.624 Mean :0.2957
## 3rd Qu.:7.00 3rd Qu.:5.000 3rd Qu.: 5.000 3rd Qu.:1.0000
## Max. :9.00 Max. :9.000 Max. :18.000 Max. :1.0000
## lungcapacity Age Married FamilyHx SmokingHx
## Min. :0.01612 Min. :26.32 Min. :0.0 no :6820 current:1705
## 1st Qu.:0.67647 1st Qu.:46.69 1st Qu.:0.0 yes:1705 former :1705
## Median :0.81560 Median :50.93 Median :1.0 never :5115
## Mean :0.77409 Mean :50.97 Mean :0.6
## 3rd Qu.:0.91150 3rd Qu.:55.27 3rd Qu.:1.0
## Max. :0.99980 Max. :74.48 Max. :1.0
## Sex CancerStage LengthofStay WBC RBC
## female:5115 I :2558 Min. : 1.000 Min. :2131 Min. :3.919
## male :3410 II :3409 1st Qu.: 5.000 1st Qu.:5323 1st Qu.:4.802
## III:1705 Median : 5.000 Median :6007 Median :4.994
## IV : 853 Mean : 5.492 Mean :5998 Mean :4.995
## 3rd Qu.: 6.000 3rd Qu.:6663 3rd Qu.:5.190
## Max. :10.000 Max. :9776 Max. :6.065
## BMI IL6 CRP DID
## Min. :18.38 Min. : 0.03521 Min. : 0.0451 Min. : 1.0
## 1st Qu.:24.20 1st Qu.: 1.93039 1st Qu.: 2.6968 1st Qu.:100.0
## Median :27.73 Median : 3.34400 Median : 4.3330 Median :199.0
## Mean :29.07 Mean : 4.01698 Mean : 4.9730 Mean :203.3
## 3rd Qu.:32.54 3rd Qu.: 5.40551 3rd Qu.: 6.5952 3rd Qu.:309.0
## Max. :58.00 Max. :23.72777 Max. :28.7421 Max. :407.0
## Experience School Lawsuits HID
## Min. : 7.00 average:6405 Min. :0.000 Min. : 1.00
## 1st Qu.:15.00 top :2120 1st Qu.:1.000 1st Qu.: 9.00
## Median :18.00 Median :2.000 Median :17.00
## Mean :17.64 Mean :1.866 Mean :17.76
## 3rd Qu.:21.00 3rd Qu.:3.000 3rd Qu.:27.00
## Max. :29.00 Max. :9.000 Max. :35.00
## Medicaid
## Min. :0.1416
## 1st Qu.:0.3369
## Median :0.5215
## Mean :0.5125
## 3rd Qu.:0.7083
## Max. :0.8187
match(c(1, 2, 4, 5), c(1, 4, 4, 5, 6, 7))
## [1] 1 NA 2 4
length(x) gives us the number of elements in x.
x<-c(1, 3, 10, 23, 1, 3)
length(x)
## [1] 6
dim(x) retrieves or sets the dimension of an object.
x<-1:12
dim(x)<-c(3, 4)
x
## [,1] [,2] [,3] [,4]
## [1,] 1 4 7 10
## [2,] 2 5 8 11
## [3,] 3 6 9 12
dimnames(x) retrieves or sets the dimension names of an object. For higher dimensional objects like matrix or arrays we can combine dimnames() with list.
dimnames(x)<-list(c("R1", "R2", "R3"), c("C1", "C2", "C3", "C4")); x
## C1 C2 C3 C4
## R1 1 4 7 10
## R2 2 5 8 11
## R3 3 6 9 12
nrow(x) number of rows; ncol(x) number of columns.
nrow(x)
## [1] 3
ncol(x)
## [1] 4
class(x) get or set the class of x. Note that we can use unclass(x) to remove the class attribute of x.
class(x)
## [1] "matrix"
class(x)<-"myclass"
x<-unclass(x)
x
## C1 C2 C3 C4
## R1 1 4 7 10
## R2 2 5 8 11
## R3 3 6 9 12
attr(x, "class")
## NULL
attr(x, "dim")<-c(2, 6)
x
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] 1 3 5 7 9 11
## [2,] 2 4 6 8 10 12
attributes(x) <- list(mycomment = "really special", dim = 3:4,
dimnames = list(LETTERS[1:3],
letters[1:4]),
names = paste(1:12))
x
## a b c d
## A 1 4 7 10
## B 2 5 8 11
## C 3 6 9 12
## attr(,"mycomment")
## [1] "really special"
## attr(,"names")
## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"
x<-c(1, 5, 2, 1, 10, 40, 3)
which.max(x)
## [1] 6
which.min(x)
## [1] 1
rev(x)
## [1] 3 40 10 1 2 5 1
sort(x)
## [1] 1 1 2 3 5 10 40
rev(sort(x))
## [1] 40 10 5 3 2 1 1
cut(x, breaks) divides x into intervals with same length (sometimes factors). breaks is the number of cut intervals or a vector of cut points.
x
## [1] 1 5 2 1 10 40 3
cut(x, 3)
## [1] (0.961,14] (0.961,14] (0.961,14] (0.961,14] (0.961,14] (27,40]
## [7] (0.961,14]
## Levels: (0.961,14] (14,27] (27,40]
cut(x, c(0, 5, 20, 30))
## [1] (0,5] (0,5] (0,5] (0,5] (5,20] <NA> (0,5]
## Levels: (0,5] (5,20] (20,30]
which(x ¼¼ a) returns a vector of the indices of x if the comparison operation is true (TRUE).
x
## [1] 1 5 2 1 10 40 3
which(x==2)
## [1] 3
na.omit(x) suppresses the observations with missing data (NA). It suppresses the corresponding line if x is a matrix or a data frame.
df<-data.frame(a=1:5, b=c(1, 3, NA, 9, 8)); df
## a b
## 1 1 1
## 2 2 3
## 3 3 NA
## 4 4 9
## 5 5 8
na.omit(df)
## a b
## 1 1 1
## 2 2 3
## 4 4 9
## 5 5 8
unique(x) If x is a vector or a data frame, it returns a similar object but with the duplicate elements suppressed.
df1<-data.frame(a=c(1, 1, 7, 6, 8), b=c(1, 1, NA, 9, 8))
df1
## a b
## 1 1 1
## 2 1 1
## 3 7 NA
## 4 6 9
## 5 8 8
unique(df1)
## a b
## 1 1 1
## 3 7 NA
## 4 6 9
## 5 8 8
table(x) returns a table with the different values of x and their frequencies (typically for integers or factors).
v<-c(1, 2, 4, 2, 2, 5, 6, 4, 7, 8, 8)
table(v)
## v
## 1 2 4 5 6 7 8
## 1 3 2 1 1 1 2
subset(x, …) returns a selection of x with respect to criteria
sub<-subset(df1, df1$a>5); sub
## a b
## 3 7 NA
## 4 6 9
## 5 8 8
sub<-subset(df1, select=-a)
sub
## b
## 1 1
## 2 1
## 3 NA
## 4 9
## 5 8
sample(x, size) resamples randomly and without replacement size elements in the vector x, the option replace ¼ TRUE allows to resample with replacement.
v
## [1] 1 2 4 2 2 5 6 4 7 8 8
sample(df1$a, 20, replace = T)
## [1] 7 7 7 8 7 1 1 1 8 6 7 7 8 1 1 1 8 8 8 1
prop.table(x, margin¼) table entries as fraction of marginal table.
prop.table(table(v))
## v
## 1 2 4 5 6 7
## 0.09090909 0.27272727 0.18181818 0.09090909 0.09090909 0.09090909
## 8
## 0.18181818
mat1 <- cbind(c(1, -1/5), c(-1/3, 1))
mat1.inv <- solve(mat1)
mat1.identity <- mat1.inv %*% mat1
mat1.identity
## [,1] [,2]
## [1,] 1 0
## [2,] 0 1
b <- c(1, 2)
x <- solve (mat1, b)
x
## [1] 1.785714 2.357143
apply(X, INDEX, FUN¼) a vector or array or list of values obtained by applying a function FUN to margins (INDEX ¼ 1 means row, INDEX ¼ 2 means column) of X.
df1
## a b
## 1 1 1
## 2 1 1
## 3 7 NA
## 4 6 9
## 5 8 8
apply(df1, 2, mean, na.rm=T)
## a b
## 4.60 4.75
lapply(X, FUN) apply FUN to each member of the list X. If X is a data frame, it will apply the FUN to each column and return a list.
lapply(df1, mean, na.rm=T)
## $a
## [1] 4.6
##
## $b
## [1] 4.75
lapply(list(a=c(1, 23, 5, 6, 1), b=c(9, 90, 999)), median)
## $a
## [1] 5
##
## $b
## [1] 90
tapply(X, INDEX, FUN¼) apply FUN to each cell of a ragged array given by X with indexes equals to INDEX. Note that X is an atomic object, typically a vector.
v
## [1] 1 2 4 2 2 5 6 4 7 8 8
fac <- factor(rep(1:3, length = 11), levels = 1:3)
table(fac)
## fac
## 1 2 3
## 4 4 3
tapply(v, fac, sum)
## 1 2 3
## 17 16 16
by(data, INDEX, FUN) apply FUN to data frame data subsetted by INDEX.
by(df1, df1[, 1], sum)
## df1[, 1]: 1
## [1] 4
## --------------------------------------------------------
## df1[, 1]: 6
## [1] 15
## --------------------------------------------------------
## df1[, 1]: 7
## [1] NA
## --------------------------------------------------------
## df1[, 1]: 8
## [1] 16
merge(a, b) merge two data frames by common columns or row names. We can use option by = to specify the index column.
df2<-data.frame(a=c(1, 1, 7, 6, 8), c=1:5)
df2
## a c
## 1 1 1
## 2 1 2
## 3 7 3
## 4 6 4
## 5 8 5
df3<-merge(df1, df2, by="a")
df3
## a b c
## 1 1 1 1
## 2 1 1 2
## 3 1 1 1
## 4 1 1 2
## 5 6 9 4
## 6 7 NA 3
## 7 8 8 5
xtabs(a ~ b, data ¼ x) a contingency table from cross-classifying factors.
DF <- as.data.frame(UCBAdmissions)
## 'DF' is a data frame with a grid of the factors and the counts in variable 'Freq'.
DF
## Admit Gender Dept Freq
## 1 Admitted Male A 512
## 2 Rejected Male A 313
## 3 Admitted Female A 89
## 4 Rejected Female A 19
## 5 Admitted Male B 353
## 6 Rejected Male B 207
## 7 Admitted Female B 17
## 8 Rejected Female B 8
## 9 Admitted Male C 120
## 10 Rejected Male C 205
## 11 Admitted Female C 202
## 12 Rejected Female C 391
## 13 Admitted Male D 138
## 14 Rejected Male D 279
## 15 Admitted Female D 131
## 16 Rejected Female D 244
## 17 Admitted Male E 53
## 18 Rejected Male E 138
## 19 Admitted Female E 94
## 20 Rejected Female E 299
## 21 Admitted Male F 22
## 22 Rejected Male F 351
## 23 Admitted Female F 24
## 24 Rejected Female F 317
## Nice for taking margins ...
xtabs(Freq ~ Gender + Admit, DF)
## Admit
## Gender Admitted Rejected
## Male 1198 1493
## Female 557 1278
## And for testing independence ...
summary(xtabs(Freq ~ ., DF))
## Call: xtabs(formula = Freq ~ ., data = DF)
## Number of cases in table: 4526
## Number of factors: 3
## Test for independence of all factors:
## Chisq = 2000.3, df = 16, p-value = 0
aggregate(x, by, FUN) splits the data frame x into subsets, computes summary statistics for each, and returns the result in a convenient form. by is a list of grouping elements, that each have the same length as the variables in x.
list(rep(1:3, length=7))
## [[1]]
## [1] 1 2 3 1 2 3 1
aggregate(df3, by=list(rep(1:3, length=7)), sum)
## Group.1 a b c
## 1 1 10 10 8
## 2 2 7 10 6
## 3 3 8 NA 4
stack(x, …) transform data, stored as separate columns in a data frame or a list, into a single column and unstack(x, …) is the inverse of stack().
stack(df3)
## values ind
## 1 1 a
## 2 1 a
## 3 1 a
## 4 1 a
## 5 6 a
## 6 7 a
## 7 8 a
## 8 1 b
## 9 1 b
## 10 1 b
## 11 1 b
## 12 9 b
## 13 NA b
## 14 8 b
## 15 1 c
## 16 2 c
## 17 1 c
## 18 2 c
## 19 4 c
## 20 3 c
## 21 5 c
unstack(stack(df3))
## a b c
## 1 1 1 1
## 2 1 1 2
## 3 1 1 1
## 4 1 1 2
## 5 6 9 4
## 6 7 NA 3
## 7 8 8 5
reshape(x, …) reshapes a data frame between “wide” format with repeated measurements in separate columns of the same record and “long” format with the repeated measurements in separate records. Use direction = “wide” or direction = “long”.
df4 <- data.frame(school = rep(1:3, each = 4), class = rep(9:10, 6),
time = rep(c(1, 1, 2, 2), 3),
score = rnorm(12))
wide <- reshape(df4,
idvar = c("school", "class"),
direction = "wide")
wide
## school class score.1 score.2
## 1 1 9 -0.3709484 0.8752090
## 2 1 10 -0.7630987 1.2038424
## 5 2 9 1.1932622 -1.7511004
## 6 2 10 0.3346709 0.1487124
## 9 3 9 -0.8803714 1.5405514
## 10 3 10 1.9414497 1.4827459
long <- reshape(wide, idvar = c("school", "class"), direction = "long")
long
## school class time score.1
## 1.9.1 1 9 1 -0.3709484
## 1.10.1 1 10 1 -0.7630987
## 2.9.1 2 9 1 1.1932622
## 2.10.1 2 10 1 0.3346709
## 3.9.1 3 9 1 -0.8803714
## 3.10.1 3 10 1 1.9414497
## 1.9.2 1 9 2 0.8752090
## 1.10.2 1 10 2 1.2038424
## 2.9.2 2 9 2 -1.7511004
## 2.10.2 2 10 2 0.1487124
## 3.9.2 3 9 2 1.5405514
## 3.10.2 3 10 2 1.4827459
paste(…) concatenates vectors after converting to character. It has a few options. Sep = is the string to separate terms (a single space is the default). collapse = is an optional string to separate “collapsed” results.
a<-"today"
b<-"is a good day"
paste(a, b)
## [1] "today is a good day"
paste(a, b, sep=", ")
## [1] "today, is a good day"
substr(x, start, stop) substrings in a character vector. It can also assign values (with the same length) to part of a string, as substr(x, start, stop) <- value.
a<-"When the going gets tough, the tough get going!"
substr(a, 10, 40)
## [1] "going gets tough, the tough get"
substr(a, 1, 9)<-"........."
a
## [1] ".........going gets tough, the tough get going!"
**strsplit(x, split) split x according to the substring split. Use fixed = TRUE for non-regular expressions.
strsplit("a.b.c", ".", fixed = TRUE)
## [[1]]
## [1] "a" "b" "c"
grep(pattern, x) searches for matches to pattern within x. It will return a vector of the indices of the elements of x that yielded a match.
letters
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
grep("[a-z]", letters)
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26
gsub(pattern, replacement, x) replacement of matches determined by regular expression matching. sub() is the same but only replaces the first occurrence.
a<-c("e", 0, "kj", 10, ";")
gsub("[a-z]", "letters", a)
## [1] "letters" "0" "lettersletters" "10"
## [5] ";"
sub("[a-z]", "letters", a)
## [1] "letters" "0" "lettersj" "10" ";"
match(x, table) a vector of the positions of first matches for the elements of x among table, with a short hand x %in% table, which returns a logical vector.pmatch(x, table) partial matches for the elements of x among table.
x<-c(1, 2, 10, 19, 29)
match(x, c(1, 10))
## [1] 1 NA 2 NA NA
x %in% c(1, 10)
## [1] TRUE FALSE TRUE FALSE FALSE
pmatch("m", c("mean", "median", "mode")) # returns NA
## [1] NA
pmatch("med", c("mean", "median", "mode")) # returns 2
## [1] 2
#QQ Normal Probability Plot
X <- rnorm(1000)
Y <- rcauchy(1500)
# compare X to StdNormal distribution
qqnorm(X,
main="Normal Q-Q Plot of the data",
xlab="Theoretical Quantiles of the Normal",
ylab="Sample Quantiles of the X (Normal) Data")
qqline(X)
qqplot(X, Y)
# Y against StdNormal
qqnorm(Y,
main="Normal Q-Q Plot of the data",
xlab="Theoretical Quantiles of the Normal",
ylab="Sample Quantiles of the Y (Cauchy) Data", ylim= range(-4, 4))
# Why is the y-range specified here?
qqline(Y)
# Q-Q plot data (X) vs. simulation(Y)
myQQ <- function(x, y, ...) {
#rang <- range(x, y, na.rm=T)
rang <- range(-4, 4, na.rm=T)
qqplot(x, y, xlim=rang, ylim=rang)
}
myQQ(X, Y) # where the Y is the newly simulated data for X
qqline(X)
# Subsampling
x <- matrix(rnorm(100), ncol = 5)
y <- c(1, seq(19))
z <- cbind(x, y)
z.df <- data.frame(z)
z.df
## V1 V2 V3 V4 V5 y
## 1 -1.22509468 0.82955659 -1.092971055 0.5878067 1.86982070 1
## 2 0.99245033 0.32499812 -0.436002389 1.5217486 0.98448510 1
## 3 0.97150642 0.80425693 3.062870216 0.4735737 -0.68743995 2
## 4 -0.25781948 -0.32469366 1.469630084 -0.2721595 -0.69735728 3
## 5 -0.49775853 0.04524268 -0.878116986 0.1905110 -0.48990186 4
## 6 1.61538522 0.27183953 0.072936409 0.6678561 0.58405016 5
## 7 2.29011285 0.08029258 -0.956966761 0.6911037 -0.56486222 6
## 8 -1.32503061 -0.63084685 -0.192148196 -1.1622565 -0.58877876 7
## 9 0.60489299 -0.69430928 1.069026019 -0.7955919 0.44487870 8
## 10 -0.57948663 0.18461281 0.001370878 0.3544221 0.56358930 9
## 11 1.18262147 -0.67733420 0.029742922 -1.1426595 0.40858927 10
## 12 -0.05163258 1.78901696 -0.250644104 1.2256983 0.22124809 11
## 13 2.21563405 0.61208738 0.406798982 -0.6885104 -0.82273583 12
## 14 -1.73642202 -0.95839720 -0.427841126 -0.6460477 0.60175773 13
## 15 -0.37743926 0.15708628 2.320528256 0.8858242 -0.38251903 14
## 16 0.41046832 -0.18634933 1.206240937 -1.0122277 -0.07589161 15
## 17 -2.55631033 0.20541021 0.502399456 -0.4503726 0.05881044 16
## 18 0.03435792 1.36978932 0.330742552 0.6657285 0.67296838 17
## 19 0.67374652 0.62210218 -0.843005080 1.2102319 -0.34578502 18
## 20 -0.17829250 -0.28242693 1.550986760 1.6681200 -1.32492210 19
names(z.df)
## [1] "V1" "V2" "V3" "V4" "V5" "y"
# subsetting rows
z.sub <- subset(z.df, y > 2 & (y<10 | V1>0))
z.sub
## V1 V2 V3 V4 V5 y
## 4 -0.25781948 -0.32469366 1.469630084 -0.2721595 -0.69735728 3
## 5 -0.49775853 0.04524268 -0.878116986 0.1905110 -0.48990186 4
## 6 1.61538522 0.27183953 0.072936409 0.6678561 0.58405016 5
## 7 2.29011285 0.08029258 -0.956966761 0.6911037 -0.56486222 6
## 8 -1.32503061 -0.63084685 -0.192148196 -1.1622565 -0.58877876 7
## 9 0.60489299 -0.69430928 1.069026019 -0.7955919 0.44487870 8
## 10 -0.57948663 0.18461281 0.001370878 0.3544221 0.56358930 9
## 11 1.18262147 -0.67733420 0.029742922 -1.1426595 0.40858927 10
## 13 2.21563405 0.61208738 0.406798982 -0.6885104 -0.82273583 12
## 16 0.41046832 -0.18634933 1.206240937 -1.0122277 -0.07589161 15
## 18 0.03435792 1.36978932 0.330742552 0.6657285 0.67296838 17
## 19 0.67374652 0.62210218 -0.843005080 1.2102319 -0.34578502 18
z.sub1 <- z.df[z.df$y == 1, ]
z.sub1
## V1 V2 V3 V4 V5 y
## 1 -1.2250947 0.8295566 -1.0929711 0.5878067 1.8698207 1
## 2 0.9924503 0.3249981 -0.4360024 1.5217486 0.9844851 1
z.sub2 <- z.df[z.df$y %in% c(1, 4), ]
z.sub2
## V1 V2 V3 V4 V5 y
## 1 -1.2250947 0.82955659 -1.0929711 0.5878067 1.8698207 1
## 2 0.9924503 0.32499812 -0.4360024 1.5217486 0.9844851 1
## 5 -0.4977585 0.04524268 -0.8781170 0.1905110 -0.4899019 4
#subsetting columns
z.sub6 <- z.df[, 1:2]
z.sub6
## V1 V2
## 1 -1.22509468 0.82955659
## 2 0.99245033 0.32499812
## 3 0.97150642 0.80425693
## 4 -0.25781948 -0.32469366
## 5 -0.49775853 0.04524268
## 6 1.61538522 0.27183953
## 7 2.29011285 0.08029258
## 8 -1.32503061 -0.63084685
## 9 0.60489299 -0.69430928
## 10 -0.57948663 0.18461281
## 11 1.18262147 -0.67733420
## 12 -0.05163258 1.78901696
## 13 2.21563405 0.61208738
## 14 -1.73642202 -0.95839720
## 15 -0.37743926 0.15708628
## 16 0.41046832 -0.18634933
## 17 -2.55631033 0.20541021
## 18 0.03435792 1.36978932
## 19 0.67374652 0.62210218
## 20 -0.17829250 -0.28242693
The standard setting for defining new functions is: function.name<-function(x) { expr(an expression) return(value) }, where x is the parameter in the expression.
adding<-function(x=0, y=0){z<-x+y
return(z)}
adding(x=5, y=10)
## [1] 15
conditions setting: if(cond) {expr} or if(cond) cons.expr else alt.expr
x<-10
if(x>10) z="T" else z="F"
z
## [1] "F"
Alternatively, ifelse represents a vectorized and extremely efficient conditional mechanism that provides one of the main advantages of R.
For loop:for(var in seq) expr
x<-c()
for(i in 1:10) x[i]=i
x
## [1] 1 2 3 4 5 6 7 8 9 10
Data simulation
data_1 <- read.csv("https://umich.instructure.com/files/1628625/download?dow
nload_frd=1", as.is=T, header=T)
# data_1 = read.csv(file.choose( ))
attach(data_1)
# to ensure all variables are accessible within R, e.g., using "age" instead of data_1$age
# i2 maximum number of drinks (standard units) consumed per day (in the past 30 days range 0-184) see also i1
# treat randomization group (0=usual care, 1=HELP clinic)
# pcs SF-36 Physical Component Score (range 14-75)
# mcs SF-36 Mental Component Score(range 7-62)
# cesd Center for Epidemiologic Studies Depression scale (range 0-60)
# indtot Inventory of Drug Use Consequences (InDUC) total score (range 4-45)
# pss_fr perceived social supports (friends, range 0-14) see also dayslink
# drugrisk Risk-Assessment Battery(RAB) drug risk score (range0-21)
# satreat any BSAS substance abuse treatment at baseline (0=no, 1=yes)
summary(data_1)
## ID i2 age treat
## Min. : 1.00 Min. : 0.00 Min. : 3.00 Min. :0.0000
## 1st Qu.: 24.25 1st Qu.: 1.00 1st Qu.:27.00 1st Qu.:0.0000
## Median : 50.50 Median : 15.50 Median :34.00 Median :0.0000
## Mean : 50.29 Mean : 27.08 Mean :34.31 Mean :0.1222
## 3rd Qu.: 74.75 3rd Qu.: 39.00 3rd Qu.:43.00 3rd Qu.:0.0000
## Max. :100.00 Max. :137.00 Max. :65.00 Max. :2.0000
## homeless pcs mcs cesd
## Min. :0.0000 Min. : 6.00 Min. : 0.00 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.:41.25 1st Qu.:20.25 1st Qu.:17.25
## Median :0.0000 Median :48.50 Median :29.00 Median :30.00
## Mean :0.1444 Mean :47.61 Mean :30.49 Mean :30.21
## 3rd Qu.:0.0000 3rd Qu.:57.00 3rd Qu.:39.75 3rd Qu.:43.00
## Max. :1.0000 Max. :76.00 Max. :93.00 Max. :68.00
## indtot pss_fr drugrisk sexrisk
## Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. : 0.000
## 1st Qu.:31.25 1st Qu.: 2.000 1st Qu.: 0.000 1st Qu.: 1.250
## Median :36.00 Median : 6.000 Median : 0.000 Median : 5.000
## Mean :37.03 Mean : 6.533 Mean : 2.578 Mean : 4.922
## 3rd Qu.:45.00 3rd Qu.:10.000 3rd Qu.: 3.000 3rd Qu.: 7.750
## Max. :60.00 Max. :20.000 Max. :23.000 Max. :13.000
## satreat female substance racegrp
## Min. :0.00000 Min. :0.00000 Length:90 Length:90
## 1st Qu.:0.00000 1st Qu.:0.00000 Class :character Class :character
## Median :0.00000 Median :0.00000 Mode :character Mode :character
## Mean :0.07778 Mean :0.05556
## 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000
x.norm <- rnorm(n=200, m=10, sd=20)
hist(x.norm, main='N(10, 20) Histogram')
mean(data_1$age)
## [1] 34.31111
sd(data_1$age)
## [1] 11.68947
Next, we will simulate new synthetic data to match the properties/characteristics of the observed data (using Uniform, Normal, and Poisson distributions).
# i2 [0: 184]
# age m=34, sd=12
# treat {0, 1}
# homeless {0, 1}
# pcs 14-75
# mcs 7-62
# cesd 0-60
# indtot 4-45
# pss_fr 0-14
# drugrisk 0-21
# sexrisk
# satreat (0=no, 1=yes)
# female (0=no, 1=yes)
# racegrp (black, white, other)
# Demographics variables
# Define number of subjects
NumSubj <- 282
NumTime <- 4
# Define data elements
# Cases
Cases <- c(2, 3, 6, 7, 8, 10, 11, 12, 13, 14, 17, 18, 20, 21, 22, 23, 24,
25, 26, 28, 29, 30, 31, 32, 33, 34, 35, 37, 41, 42, 43, 44, 45, 53, 55, 58,
60, 62, 67, 69, 71, 72, 74, 79, 80, 85, 87, 90, 95, 97, 99, 100, 101, 106,
107, 109, 112, 120, 123, 125, 128, 129, 132, 134, 136, 139, 142, 147, 149,
153, 158, 160, 162, 163, 167, 172, 174, 178, 179, 180, 182, 192, 195, 201,
208, 211, 215, 217, 223, 227, 228, 233, 235, 236, 240, 245, 248, 250, 251,
254, 257, 259, 261, 264, 268, 269, 272, 273, 275, 279, 288, 289, 291, 296,
298, 303, 305, 309, 314, 318, 324, 325, 326, 328, 331, 332, 333, 334, 336,
338, 339, 341, 344, 346, 347, 350, 353, 354, 359, 361, 363, 364, 366, 367,
368, 369, 370, 371, 372, 374, 375, 376, 377, 378, 381, 382, 384, 385, 386,
387, 389, 390, 393, 395, 398, 400, 410, 421, 423, 428, 433, 435, 443, 447,
449, 450, 451, 453, 454, 455, 456, 457, 458, 459, 460, 461, 465, 466, 467,
470, 471, 472, 476, 477, 478, 479, 480, 481, 483, 484, 485, 486, 487, 488,
489, 492, 493, 494, 496, 498, 501, 504, 507, 510, 513, 515, 528, 530, 533,
537, 538, 542, 545, 546, 549, 555, 557, 559, 560, 566, 572, 573, 576, 582,
586, 590, 592, 597, 603, 604, 611, 619, 621, 623, 624, 625, 631, 633, 634,
635, 637, 640, 641, 643, 644, 645, 646, 647, 648, 649, 650, 652, 654, 656,
658, 660, 664, 665, 670, 673, 677, 678, 679, 680, 682, 683, 686, 687, 688,
689, 690, 692)
# Imaging Biomarkers
L_caudate_ComputeArea <- rpois(NumSubj, 600)
L_caudate_Volume <- rpois(NumSubj, 800)
R_caudate_ComputeArea <- rpois(NumSubj, 893)
R_caudate_Volume <- rpois(NumSubj, 1000)
L_putamen_ComputeArea <- rpois(NumSubj, 900)
L_putamen_Volume <- rpois(NumSubj, 1400)
R_putamen_ComputeArea <- rpois(NumSubj, 1300)
R_putamen_Volume <- rpois(NumSubj, 3000)
L_hippocampus_ComputeArea <- rpois(NumSubj, 1300)
L_hippocampus_Volume <- rpois(NumSubj, 3200)
R_hippocampus_ComputeArea <- rpois(NumSubj, 1500)
R_hippocampus_Volume <- rpois(NumSubj, 3800)
cerebellum_ComputeArea <- rpois(NumSubj, 16700)
cerebellum_Volume <- rpois(NumSubj, 14000)
L_lingual_gyrus_ComputeArea <- rpois(NumSubj, 3300)
L_lingual_gyrus_Volume <- rpois(NumSubj, 11000)
R_lingual_gyrus_ComputeArea <- rpois(NumSubj, 3300)
R_lingual_gyrus_Volume <- rpois(NumSubj, 12000)
L_fusiform_gyrus_ComputeArea <- rpois(NumSubj, 3600)
L_fusiform_gyrus_Volume <- rpois(NumSubj, 11000)
R_fusiform_gyrus_ComputeArea <- rpois(NumSubj, 3300)
R_fusiform_gyrus_Volume <- rpois(NumSubj, 10000)
Sex <- ifelse(runif(NumSubj)<.5, 0, 1)
Weight <- as.integer(rnorm(NumSubj, 80, 10))
Age <- as.integer(rnorm(NumSubj, 62, 10))
# Diagnostic labels (DX):
Dx <- c(rep("PD", 100), rep("HC", 100), rep("SWEDD", 82))
# Genetics traits
chr12_rs34637584_GT <- c(ifelse(runif(100)<.3, 0, 1), ifelse(runif(100)<.6,
0, 1), ifelse(runif(82)<.4, 0, 1)) # NumSubj Bernoulli trials
chr17_rs11868035_GT <- c(ifelse(runif(100)<.7, 0, 1), ifelse(runif(100)<.4,
0, 1), ifelse(runif(82)<.5, 0, 1)) # NumSubj Bernoulli trials
# Clinical # rpois(NumSubj, 15) + rpois(NumSubj, 6)
UPDRS_part_I <- c(ifelse(runif(100)<.7,0,1) + ifelse(runif(100) < .7, 0, 1),
ifelse(runif(100)<.6, 0, 1)+ ifelse(runif(100)<.6, 0, 1),
ifelse(runif(82)<.4, 0, 1)+ ifelse(runif(82)<.4, 0, 1) )
UPDRS_part_II <- c(sample.int(20, 100, replace=T), sample.int(14, 100,
replace=T),
sample.int(18, 82, replace=T) )
UPDRS_part_III <- c(sample.int(30, 100, replace=T), sample.int(20, 100,
replace=T), sample.int(25, 82, replace=T) )
# Time: VisitTime - done automatically below in aggregator
# Data (putting all components together)
sim_PD_Data <- cbind(
rep(Cases, each= NumTime), # Cases
rep(L_caudate_ComputeArea, each= NumTime), # Imaging
rep(Sex, each= NumTime), # Demographics
rep(Weight, each= NumTime),
rep(Age, each= NumTime),
rep(Dx, each= NumTime), # Dx
rep(chr12_rs34637584_GT, each= NumTime), # Genetics
rep(chr17_rs11868035_GT, each= NumTime),
rep(UPDRS_part_I, each= NumTime), # Clinical
rep(UPDRS_part_II, each= NumTime),
rep(UPDRS_part_III, each= NumTime),
rep(c(0, 6, 12, 18), NumSubj) # Time
)
# Assign the column names
colnames(sim_PD_Data) <- c(
"Cases","L_caudate_ComputeArea",
"Sex", "Weight", "Age",
"Dx", "chr12_rs34637584_GT", "chr17_rs11868035_GT",
"UPDRS_part_I", "UPDRS_part_II", "UPDRS_part_III", "Time"
)
# some QC
summary(sim_PD_Data)
## Cases L_caudate_ComputeArea Sex Weight Age
## 10 : 4 604 : 40 0:620 80 : 64 61 : 68
## 100 : 4 592 : 32 1:508 81 : 64 59 : 56
## 101 : 4 594 : 28 84 : 56 60 : 56
## 106 : 4 597 : 28 72 : 52 63 : 56
## 107 : 4 575 : 24 76 : 52 56 : 52
## 109 : 4 580 : 24 70 : 48 58 : 52
## (Other):1104 (Other):952 (Other):792 (Other):788
## Dx chr12_rs34637584_GT chr17_rs11868035_GT UPDRS_part_I
## HC :400 0:468 0:616 0:380
## PD :400 1:660 1:512 1:548
## SWEDD:328 2:200
##
##
##
##
## UPDRS_part_II UPDRS_part_III Time
## 10 : 80 12 : 84 0 :282
## 3 : 80 17 : 64 12:282
## 13 : 76 10 : 60 18:282
## 1 : 72 1 : 56 6 :282
## 12 : 72 16 : 52
## 14 : 72 8 : 52
## (Other):676 (Other):760
dim(sim_PD_Data)
## [1] 1128 12
head(sim_PD_Data)
## Cases L_caudate_ComputeArea Sex Weight Age Dx chr12_rs34637584_GT
## [1,] "2" "592" "0" "94" "62" "PD" "1"
## [2,] "2" "592" "0" "94" "62" "PD" "1"
## [3,] "2" "592" "0" "94" "62" "PD" "1"
## [4,] "2" "592" "0" "94" "62" "PD" "1"
## [5,] "3" "628" "0" "73" "43" "PD" "1"
## [6,] "3" "628" "0" "73" "43" "PD" "1"
## chr17_rs11868035_GT UPDRS_part_I UPDRS_part_II UPDRS_part_III Time
## [1,] "0" "1" "14" "3" "0"
## [2,] "0" "1" "14" "3" "6"
## [3,] "0" "1" "14" "3" "12"
## [4,] "0" "1" "14" "3" "18"
## [5,] "0" "0" "18" "17" "0"
## [6,] "0" "0" "18" "17" "6"
hist(data_1$age, freq=FALSE, right=FALSE, ylim = c(0,0.05))
lines(density(as.numeric(as.data.frame(sim_PD_Data)$Age)),lwd=2, col="blue")
legend("topright", c("Raw Data", "Simulated Data"), fill=c("black", "blue"))
# Save Results
# Write out (save) the result to a file that can be shared
write.table(sim_PD_Data, "output_data.csv", sep=", ", row.names=FALSE, col.names=TRUE)
SOCR Datasets can automatically be downloaded into the R environment using the following protocol.
library(rvest)
## Loading required package: xml2
wiki_url <- read_html("http://wiki.socr.umich.edu/index.php/SOCR_Data_PD_BiomedBigMetadata")
html_nodes(wiki_url, "#content")
## {xml_nodeset (1)}
## [1] <div id="content" class="mw-body-primary" role="main">\n\t<a id="top ...
pd_data <- html_table(html_nodes(wiki_url, "table")[[1]])
head(pd_data); summary(pd_data)
## Cases L_caudate_ComputeArea L_caudate_Volume R_caudate_ComputeArea
## 1 2 597 767 855
## 2 2 597 767 855
## 3 2 597 767 855
## 4 2 597 767 855
## 5 3 604 873 935
## 6 3 604 873 935
## R_caudate_Volume L_putamen_ComputeArea L_putamen_Volume
## 1 968 842 1357
## 2 968 842 1357
## 3 968 842 1357
## 4 968 842 1357
## 5 1043 892 1366
## 6 1043 892 1366
## R_putamen_ComputeArea R_putamen_Volume L_hippocampus_ComputeArea
## 1 1285 3052 1306
## 2 1285 3052 1306
## 3 1285 3052 1306
## 4 1285 3052 1306
## 5 1305 2920 1292
## 6 1305 2920 1292
## L_hippocampus_Volume R_hippocampus_ComputeArea R_hippocampus_Volume
## 1 3238 1513 3759
## 2 3238 1513 3759
## 3 3238 1513 3759
## 4 3238 1513 3759
## 5 3079 1516 3827
## 6 3079 1516 3827
## cerebellum_ComputeArea cerebellum_Volume L_lingual_gyrus_ComputeArea
## 1 16845 13949 3268
## 2 16845 13949 3268
## 3 16845 13949 3268
## 4 16845 13949 3268
## 5 16698 14076 3243
## 6 16698 14076 3243
## L_lingual_gyrus_Volume R_lingual_gyrus_ComputeArea
## 1 11130 3294
## 2 11130 3294
## 3 11130 3294
## 4 11130 3294
## 5 11033 3190
## 6 11033 3190
## R_lingual_gyrus_Volume L_fusiform_gyrus_ComputeArea
## 1 12221 3625
## 2 12221 3625
## 3 12221 3625
## 4 12221 3625
## 5 12187 3631
## 6 12187 3631
## L_fusiform_gyrus_Volume R_fusiform_gyrus_ComputeArea
## 1 11087 3232
## 2 11087 3232
## 3 11087 3232
## 4 11087 3232
## 5 11116 3302
## 6 11116 3302
## R_fusiform_gyrus_Volume Sex Weight Age Dx chr12_rs34637584_GT
## 1 10122 1 84 67 PD 1
## 2 10122 1 84 67 PD 1
## 3 10122 1 84 67 PD 1
## 4 10122 1 84 67 PD 1
## 5 10162 0 97 39 PD 1
## 6 10162 0 97 39 PD 1
## chr17_rs11868035_GT UPDRS_part_I UPDRS_part_II UPDRS_part_III Time
## 1 0 1 12 1 0
## 2 0 1 12 1 6
## 3 0 1 12 1 12
## 4 0 1 12 1 18
## 5 1 0 19 22 0
## 6 1 0 19 22 6
## Cases L_caudate_ComputeArea L_caudate_Volume
## Min. : 2.0 Min. :525.0 Min. :719.0
## 1st Qu.:158.0 1st Qu.:582.0 1st Qu.:784.0
## Median :363.5 Median :600.0 Median :800.0
## Mean :346.1 Mean :600.4 Mean :800.3
## 3rd Qu.:504.0 3rd Qu.:619.0 3rd Qu.:819.0
## Max. :692.0 Max. :667.0 Max. :890.0
## R_caudate_ComputeArea R_caudate_Volume L_putamen_ComputeArea
## Min. :795.0 Min. : 916 Min. : 815.0
## 1st Qu.:875.0 1st Qu.: 979 1st Qu.: 879.0
## Median :897.0 Median : 998 Median : 897.5
## Mean :894.5 Mean :1001 Mean : 898.9
## 3rd Qu.:916.0 3rd Qu.:1022 3rd Qu.: 919.0
## Max. :977.0 Max. :1094 Max. :1003.0
## L_putamen_Volume R_putamen_ComputeArea R_putamen_Volume
## Min. :1298 Min. :1198 Min. :2846
## 1st Qu.:1376 1st Qu.:1276 1st Qu.:2959
## Median :1400 Median :1302 Median :3000
## Mean :1400 Mean :1300 Mean :3000
## 3rd Qu.:1427 3rd Qu.:1321 3rd Qu.:3039
## Max. :1507 Max. :1392 Max. :3148
## L_hippocampus_ComputeArea L_hippocampus_Volume R_hippocampus_ComputeArea
## Min. :1203 Min. :3036 Min. :1414
## 1st Qu.:1277 1st Qu.:3165 1st Qu.:1479
## Median :1300 Median :3200 Median :1504
## Mean :1302 Mean :3198 Mean :1504
## 3rd Qu.:1325 3rd Qu.:3228 3rd Qu.:1529
## Max. :1422 Max. :3381 Max. :1602
## R_hippocampus_Volume cerebellum_ComputeArea cerebellum_Volume
## Min. :3634 Min. :16378 Min. :13680
## 1st Qu.:3761 1st Qu.:16617 1st Qu.:13933
## Median :3802 Median :16699 Median :13996
## Mean :3799 Mean :16700 Mean :14002
## 3rd Qu.:3833 3rd Qu.:16784 3rd Qu.:14077
## Max. :4013 Max. :17096 Max. :14370
## L_lingual_gyrus_ComputeArea L_lingual_gyrus_Volume
## Min. :3136 Min. :10709
## 1st Qu.:3262 1st Qu.:10943
## Median :3299 Median :11007
## Mean :3300 Mean :11010
## 3rd Qu.:3333 3rd Qu.:11080
## Max. :3469 Max. :11488
## R_lingual_gyrus_ComputeArea R_lingual_gyrus_Volume
## Min. :3135 Min. :11679
## 1st Qu.:3258 1st Qu.:11935
## Median :3294 Median :12001
## Mean :3296 Mean :12008
## 3rd Qu.:3338 3rd Qu.:12079
## Max. :3490 Max. :12324
## L_fusiform_gyrus_ComputeArea L_fusiform_gyrus_Volume
## Min. :3446 Min. :10682
## 1st Qu.:3554 1st Qu.:10947
## Median :3594 Median :11016
## Mean :3598 Mean :11011
## 3rd Qu.:3637 3rd Qu.:11087
## Max. :3763 Max. :11394
## R_fusiform_gyrus_ComputeArea R_fusiform_gyrus_Volume Sex
## Min. :3094 Min. : 9736 Min. :0.0000
## 1st Qu.:3260 1st Qu.: 9928 1st Qu.:0.0000
## Median :3296 Median : 9994 Median :1.0000
## Mean :3299 Mean : 9996 Mean :0.5851
## 3rd Qu.:3332 3rd Qu.:10058 3rd Qu.:1.0000
## Max. :3443 Max. :10235 Max. :1.0000
## Weight Age Dx chr12_rs34637584_GT
## Min. : 51.00 Min. :31.00 Length:1128 Min. :0.000
## 1st Qu.: 71.00 1st Qu.:54.00 Class :character 1st Qu.:0.000
## Median : 78.50 Median :61.00 Mode :character Median :1.000
## Mean : 78.45 Mean :60.64 Mean :0.539
## 3rd Qu.: 84.00 3rd Qu.:68.00 3rd Qu.:1.000
## Max. :109.00 Max. :87.00 Max. :1.000
## chr17_rs11868035_GT UPDRS_part_I UPDRS_part_II UPDRS_part_III
## Min. :0.0000 Min. :0.000 Min. : 1.000 Min. : 1.00
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.: 5.000 1st Qu.: 6.00
## Median :0.0000 Median :1.000 Median : 9.000 Median :13.00
## Mean :0.4184 Mean :0.773 Mean : 8.879 Mean :13.02
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:13.000 3rd Qu.:18.00
## Max. :1.0000 Max. :2.000 Max. :20.000 Max. :30.00
## Time
## Min. : 0.0
## 1st Qu.: 4.5
## Median : 9.0
## Mean : 9.0
## 3rd Qu.:13.5
## Max. :18.0