library(reshape)
## Loading required package: plyr
##
## Attaching package: 'reshape'
##
## The following objects are masked from 'package:plyr':
##
## rename, round_any
library(ggplot2)
clindat <- read.table("~/scripts/day2/GSE7390.clindat.txt", header = T, sep = "\t")
clindat <- clindat[complete.cases(clindat), ]
str(clindat)
## 'data.frame': 80 obs. of 28 variables:
## $ Patient : Factor w/ 198 levels "GSM177885","GSM177886",..: 1 2 3 4 5 6 7 8 10 11 ...
## $ AOL_os_10y : num 62.7 69 66.2 84.9 80.2 83.1 80.6 82.1 81.5 77.8 ...
## $ Angioinv : int 1 1 0 1 1 1 1 1 2 1 ...
## $ Histtype : int 1 2 1 1 1 2 1 2 2 1 ...
## $ Lymp_infil : int 2 3 2 3 2 2 3 2 3 2 ...
## $ NPI : num 4.6 4.6 4.5 4.36 3.6 3.4 4.4 2.5 3.5 4.4 ...
## $ Surgery_type: int 0 0 1 0 0 0 1 1 0 1 ...
## $ age : int 57 57 48 42 46 58 44 58 38 59 ...
## $ e.dmfs : int 1 0 1 1 1 0 0 1 1 1 ...
## $ e.os : int 1 0 1 1 1 0 0 1 1 1 ...
## $ e.rfs : int 1 1 1 1 1 0 1 1 1 1 ...
## $ e.tdm : int 1 0 1 0 1 0 0 0 1 1 ...
## $ er : int 0 1 0 1 1 1 0 1 1 1 ...
## $ filename : Factor w/ 198 levels "1-4105_GUYA.CEL.gz",..: 2 140 169 191 116 68 59 135 160 99 ...
## $ grade : int 3 3 3 3 2 2 3 1 2 3 ...
## $ hospital : Factor w/ 5 levels "GUY","IGR","JRH",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ id : Factor w/ 197 levels "1000","1006",..: 142 143 144 145 146 147 148 149 151 152 ...
## $ node : int 0 0 0 0 0 0 0 0 0 0 ...
## $ risk_AOL : int 1 1 1 1 1 1 1 1 1 1 ...
## $ risknpi : int 2 2 2 2 2 2 2 1 2 2 ...
## $ risksg : int 2 2 2 2 2 2 2 2 2 2 ...
## $ samplename : Factor w/ 198 levels "VDXGUYU_4002",..: 1 2 3 4 5 6 7 8 10 11 ...
## $ size : num 3 3 2.5 1.8 3 2 2 2.5 2.5 2 ...
## $ t.dmfs : int 723 6591 524 6255 3822 6507 5947 5816 1233 1136 ...
## $ t.os : int 937 6591 922 6255 4133 6507 5947 5816 1484 1911 ...
## $ t.rfs : int 723 183 524 2192 3822 6507 709 5816 422 1136 ...
## $ t.tdm : int 723 6591 524 6255 3822 6507 5947 5816 1233 1136 ...
## $ veridex_risk: Factor w/ 2 levels "Good","Poor": 2 2 2 2 2 1 2 1 2 2 ...
summary(clindat)
## Patient AOL_os_10y Angioinv Histtype Lymp_infil
## GSM177885: 1 Min. :62.1 Min. :0.00 Min. :1.00 Min. :1.0
## GSM177886: 1 1st Qu.:74.6 1st Qu.:1.00 1st Qu.:1.00 1st Qu.:1.0
## GSM177887: 1 Median :81.3 Median :1.00 Median :1.00 Median :2.0
## GSM177888: 1 Mean :80.2 Mean :1.24 Mean :1.48 Mean :1.9
## GSM177889: 1 3rd Qu.:87.2 3rd Qu.:1.00 3rd Qu.:1.00 3rd Qu.:2.0
## GSM177890: 1 Max. :94.3 Max. :3.00 Max. :9.00 Max. :3.0
## (Other) :74
## NPI Surgery_type age e.dmfs
## Min. :2.16 Min. :0.000 Min. :30.0 Min. :0.000
## 1st Qu.:3.30 1st Qu.:0.000 1st Qu.:42.0 1st Qu.:0.000
## Median :3.51 Median :0.000 Median :46.0 Median :0.000
## Mean :3.67 Mean :0.263 Mean :46.2 Mean :0.338
## 3rd Qu.:4.40 3rd Qu.:1.000 3rd Qu.:50.2 3rd Qu.:1.000
## Max. :4.90 Max. :1.000 Max. :59.0 Max. :1.000
##
## e.os e.rfs e.tdm er
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :1.000
## Mean :0.338 Mean :0.487 Mean :0.287 Mean :0.688
## 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
##
## filename grade hospital id node
## 1-4105_GUYA.CEL.gz : 1 Min. :1.00 GUY:34 166154 : 1 Min. :0
## 10-4002_GUYA.CEL.gz: 1 1st Qu.:2.00 IGR:45 171150 : 1 1st Qu.:0
## 11-4076_GUYA.CEL.gz: 1 Median :2.00 JRH: 0 171260 : 1 Median :0
## 12-4073_GUYA.CEL.gz: 1 Mean :2.21 KAR: 0 171558 : 1 Mean :0
## 14-4088_GUYA.CEL.gz: 1 3rd Qu.:3.00 RH : 1 171620 : 1 3rd Qu.:0
## 15-4074_GUYA.CEL.gz: 1 Max. :3.00 172086 : 1 Max. :0
## (Other) :74 (Other):74
## risk_AOL risknpi risksg samplename
## Min. :0.000 Min. :1.00 Min. :1.00 VDXGUYU_4002: 1
## 1st Qu.:1.000 1st Qu.:1.00 1st Qu.:2.00 VDXGUYU_4008: 1
## Median :1.000 Median :2.00 Median :2.00 VDXGUYU_4011: 1
## Mean :0.812 Mean :1.73 Mean :1.91 VDXGUYU_4014: 1
## 3rd Qu.:1.000 3rd Qu.:2.00 3rd Qu.:2.00 VDXGUYU_4022: 1
## Max. :1.000 Max. :2.00 Max. :2.00 VDXGUYU_4033: 1
## (Other) :74
## size t.dmfs t.os t.rfs
## Min. :0.60 Min. : 421 Min. : 667 Min. : 121
## 1st Qu.:1.90 1st Qu.:1679 1st Qu.:2123 1st Qu.:1007
## Median :2.20 Median :4146 Median :4422 Median :3064
## Mean :2.31 Mean :3705 Mean :3924 Mean :3039
## 3rd Qu.:2.62 3rd Qu.:5454 3rd Qu.:5542 3rd Qu.:4969
## Max. :4.50 Max. :7057 Max. :7057 Max. :6507
##
## t.tdm veridex_risk
## Min. : 421 Good:25
## 1st Qu.:1679 Poor:55
## Median :4146
## Mean :3705
## 3rd Qu.:5454
## Max. :7057
##
198 rows
28 variablen
60
How old is he/she? Which one is the youngest? How old is he/she? 24
11
###Which patients are older than 59 year, please show the sample name, which is given in column samplename. hint: function which()
clindat[which(clindat$age > 59), ]$samplename
## factor(0)
## 198 Levels: VDXGUYU_4002 VDXGUYU_4008 VDXGUYU_4011 ... VDXRHU_535
from hospital IGR?
nrow(clindat[which(clindat$hospital == "IGR"), ])
## [1] 45
different grades in our patients? hint: function unique()
unique(clindat$grade)
## [1] 3 2 1
The column e.tdm means “to distal metastasis”. This column contains binary value 0 and 1, namely, non- metastasis and metastasis samples.
nrow(clindat[which(clindat$e.tdm == 1), ])
## [1] 23
nrow(clindat[which(clindat$e.tdm == 0), ])
## [1] 57
plot(data = clindat, age ~ size)
pairs(clindat[, c(2, 6, 8, 23, 24, 25, 26, 27)])
clindat.mx <- cor(clindat[, c("age", "AOL_os_10y", "NPI", "size")], method = "spearman")
par(omd = c(0.1, 0.9, 0.1, 0.9))
heatmap(clindat.mx)
variables.
df2 <- data.frame(Surgery_type = clindat$Surgery_type, hospital = clindat$hospital,
age = clindat$age, NPI = clindat$NPI, AOL_os_10y = clindat$AOL_os_10y, grade = clindat$grade)
df2.melt <- melt(df2, id = "hospital")
ggplot(df2.melt, aes(x = variable, y = value, fill = hospital)) + geom_boxplot() +
scale_y_log10()
## Warning: Removed 59 rows containing non-finite values (stat_boxplot).
boxplot(data = df2, age ~ hospital)
meltData <- melt(clindat)
## Using Patient, filename, hospital, id, samplename, veridex_risk as id variables
boxplot(data = meltData, value ~ variable)
p <- ggplot(meltData, aes(factor(variable), value))
p + geom_boxplot() + facet_wrap(~variable, scale = "free")