library(e1071)
setwd("C:/Users/Maria Elena Morinigo/Desktop/MSDA/DA 6813 - Data Analytics Applications/Week 5")
df <- read.csv(file = "SVR_SpineD2.csv", header = TRUE, sep = ",")
str(df)
## 'data.frame': 69 obs. of 19 variables:
## $ Tum.cross.section.Area : num 81.7 100.4 60.5 57 63.2 ...
## $ Tumor.Volume..cc. : num 1765 1865 1490 1432 1606 ...
## $ Tumor.Spread..cm. : num 22 24.4 20.7 25.2 21 ...
## $ Tumor.Surface.area : num 865 969 1227 844 989 ...
## $ Cord.Tum.Min.Dist : num 0.6 0.7 0.825 0.539 0.849 ...
## $ Cord.Tum.Dist : num 3.92 5.14 4.03 6.48 5.81 ...
## $ Cord.cross.section.area: num 0.809 1.121 1.749 0.712 1.061 ...
## $ Cord.Volume..cc. : num 24.5 21.6 25.5 17.3 25.3 ...
## $ Cord.Spread..cm. : num 19.8 21.9 16.2 21.3 18 ...
## $ Cord.Surface.area : num 123 133 127 106 112 ...
## $ Tum.D2 : num 81.2 76.4 61.5 76.9 77 69.4 72.2 79.5 76.3 76.5 ...
## $ Tum.D20 : num 77.7 73.9 59.3 74.2 73.7 64.1 69.3 76.8 73.7 74.3 ...
## $ Tum.D40 : num 76 72.5 58.6 73 70.5 58.2 68 70.7 70.7 73.3 ...
## $ Tum.D60 : num 69.6 68.5 57.8 69.7 64.4 57 65.1 59.7 59.8 69.5 ...
## $ Tum.D80 : num 56 60.9 56.6 60.1 56.8 55.2 55.8 48.2 50.6 58.9 ...
## $ Tum.D98 : num 43.9 36.7 51.1 47.1 49.3 46.8 48.9 41.6 47.5 47.1 ...
## $ Concurrent.Chemotherapy: int 1 1 1 1 1 0 1 1 1 1 ...
## $ Surgery : int 0 1 0 1 1 0 0 0 1 0 ...
## $ D2 : num 57.1 48.3 47.9 52.7 45.7 42.3 47.1 45.5 38.9 42.1 ...
df$Concurrent.Chemotherapy <- as.factor(df$Concurrent.Chemotherapy)
df$Surgery <- as.factor(df$Surgery)
For now simply computing for D2 for spine and brainstem. Split 50 patients as training and 19 as testing**
set.seed(123)
train <- sample(1:nrow(df), 50)
train.data <- as.data.frame(df[train,])
test.data <- as.data.frame(df[-train,])
set.seed(123)
tuned1 <- tune.svm(D2~., data = train.data, gamma = seq(.01, 0.1, by = .01), cost = seq(0.1, 1, by = 0.1), scale(TRUE))
resultsSVM <- svm(formula = D2 ~ ., data = train.data, gamma = tuned1$best.parameters$gamma, cost = tuned1$best.parameters$cost)
summary(resultsSVM)
##
## Call:
## svm(formula = D2 ~ ., data = train.data, gamma = tuned1$best.parameters$gamma,
## cost = tuned1$best.parameters$cost)
##
##
## Parameters:
## SVM-Type: eps-regression
## SVM-Kernel: radial
## cost: 0.4
## gamma: 0.04
## epsilon: 0.1
##
##
## Number of Support Vectors: 47
predSVM <- predict(resultsSVM, test.data, type = "response")
mean((predSVM - test.data$D2)^2)
## [1] 29.27505
resultsLM = lm(D2 ~ ., data = train.data)
summary(resultsLM)
##
## Call:
## lm(formula = D2 ~ ., data = train.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.3839 -1.6608 0.4359 2.0896 4.1640
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 48.920658 16.708663 2.928 0.00634 **
## Tum.cross.section.Area -0.062196 0.042167 -1.475 0.15030
## Tumor.Volume..cc. 0.006067 0.002834 2.141 0.04028 *
## Tumor.Spread..cm. -0.310167 0.395982 -0.783 0.43940
## Tumor.Surface.area -0.007883 0.003369 -2.340 0.02591 *
## Cord.Tum.Min.Dist -12.530541 3.983982 -3.145 0.00365 **
## Cord.Tum.Dist -0.431544 0.541978 -0.796 0.43195
## Cord.cross.section.area -2.274603 3.052377 -0.745 0.46177
## Cord.Volume..cc. -0.136425 0.171806 -0.794 0.43319
## Cord.Spread..cm. -0.110274 0.360600 -0.306 0.76180
## Cord.Surface.area 0.015219 0.026823 0.567 0.57453
## Tum.D2 0.425388 0.400806 1.061 0.29674
## Tum.D20 0.024194 0.634144 0.038 0.96981
## Tum.D40 -0.074288 0.503149 -0.148 0.88358
## Tum.D60 0.352077 0.382553 0.920 0.36451
## Tum.D80 -0.381967 0.206100 -1.853 0.07338 .
## Tum.D98 -0.088720 0.086507 -1.026 0.31303
## Concurrent.Chemotherapy1 -3.256423 3.095977 -1.052 0.30101
## Surgery1 1.063002 1.217746 0.873 0.38942
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.452 on 31 degrees of freedom
## Multiple R-squared: 0.5342, Adjusted R-squared: 0.2637
## F-statistic: 1.975 on 18 and 31 DF, p-value: 0.04664
predLM = predict(resultsLM, newdata = test.data)
mean((predLM - test.data$D2)^2)
## [1] 25.25422