Uni-variable Data Analysis

summary(Cancer)
##        X                id             radius         texture     
##  Min.   :  1.00   Min.   :  1.00   Min.   : 9.00   Min.   :11.00  
##  1st Qu.: 25.75   1st Qu.: 25.75   1st Qu.:12.00   1st Qu.:14.00  
##  Median : 50.50   Median : 50.50   Median :17.00   Median :17.50  
##  Mean   : 50.50   Mean   : 50.50   Mean   :16.85   Mean   :18.23  
##  3rd Qu.: 75.25   3rd Qu.: 75.25   3rd Qu.:21.00   3rd Qu.:22.25  
##  Max.   :100.00   Max.   :100.00   Max.   :25.00   Max.   :27.00  
##    perimeter           area          smoothness      compactness    
##  Min.   : 52.00   Min.   : 202.0   Min.   :0.0700   Min.   :0.0380  
##  1st Qu.: 82.50   1st Qu.: 476.8   1st Qu.:0.0935   1st Qu.:0.0805  
##  Median : 94.00   Median : 644.0   Median :0.1020   Median :0.1185  
##  Mean   : 96.78   Mean   : 702.9   Mean   :0.1027   Mean   :0.1267  
##  3rd Qu.:114.25   3rd Qu.: 917.0   3rd Qu.:0.1120   3rd Qu.:0.1570  
##  Max.   :172.00   Max.   :1878.0   Max.   :0.1430   Max.   :0.3450  
##     symmetry      fractional.dimension   outcome            diagnosis   
##  Min.   :0.1350   Min.   :0.05300      Length:100         Min.   :0.00  
##  1st Qu.:0.1720   1st Qu.:0.05900      Class :character   1st Qu.:0.00  
##  Median :0.1900   Median :0.06300      Mode  :character   Median :1.00  
##  Mean   :0.1932   Mean   :0.06469                         Mean   :0.62  
##  3rd Qu.:0.2090   3rd Qu.:0.06900                         3rd Qu.:1.00  
##  Max.   :0.3040   Max.   :0.09700                         Max.   :1.00
attach(Cancer)
hist(Cancer$radius)

hist(Cancer$area)

hist(Cancer$symmetry)

hist(Cancer$compactness)

hist(Cancer$smoothness)

hist(Cancer$fractional.dimension)

hist(Cancer$perimeter)

hist(Cancer$texture)

plot(Cancer$radius)

plot(Cancer$area)

plot(Cancer$symmetry)

plot(Cancer$compactness)

plot(Cancer$smoothness)

plot(Cancer$fractional.dimension)

plot(Cancer$perimeter)

plot(Cancer$texture)

Bivariate Data Analysis

res<-aov(radius~diagnosis)
summary(res)
##             Df Sum Sq Mean Sq F value Pr(>F)  
## diagnosis    1   73.8   73.81   3.168 0.0782 .
## Residuals   98 2282.9   23.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res1<-aov(texture~diagnosis)
summary(res1)
##             Df Sum Sq Mean Sq F value Pr(>F)
## diagnosis    1   13.4   13.36   0.493  0.484
## Residuals   98 2656.4   27.11
res2<-aov(smoothness~diagnosis)
summary(res2)
##             Df   Sum Sq   Mean Sq F value Pr(>F)  
## diagnosis    1 0.000829 0.0008288   3.983 0.0487 *
## Residuals   98 0.020395 0.0002081                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res3<-aov(perimeter~diagnosis)
summary(res3)
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## diagnosis    1  20481   20481   57.32 2.06e-11 ***
## Residuals   98  35014     357                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res4<-aov(area~diagnosis)
summary(res4)
##             Df  Sum Sq Mean Sq F value   Pr(>F)    
## diagnosis    1 3201167 3201167   45.35 1.13e-09 ***
## Residuals   98 6918123   70593                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res5<-aov(compactness~diagnosis)
summary(res5)
##             Df  Sum Sq Mean Sq F value  Pr(>F)    
## diagnosis    1 0.09711 0.09711   34.86 5.1e-08 ***
## Residuals   98 0.27300 0.00279                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res6<-aov(symmetry~diagnosis)
summary(res6)
##             Df  Sum Sq  Mean Sq F value Pr(>F)  
## diagnosis    1 0.00509 0.005095   5.627 0.0196 *
## Residuals   98 0.08873 0.000905                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res7<-aov(Cancer$fractional.dimension~diagnosis)
summary(res7)
##             Df   Sum Sq   Mean Sq F value Pr(>F)
## diagnosis    1 0.000000 4.400e-07   0.007  0.936
## Residuals   98 0.006577 6.711e-05

Multivariate Analysis

model<-glm(diagnosis~Cancer$radius+Cancer$texture+Cancer$perimeter+Cancer$area+Cancer$smoothness+Cancer$compactness+Cancer$symmetry+Cancer$fractional.dimension, family=binomial, data=Cancer)
summary(model)
## 
## Call:
## glm(formula = diagnosis ~ Cancer$radius + Cancer$texture + Cancer$perimeter + 
##     Cancer$area + Cancer$smoothness + Cancer$compactness + Cancer$symmetry + 
##     Cancer$fractional.dimension, family = binomial, data = Cancer)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.4909  -0.3416   0.1959   0.4555   1.5749  
## 
## Coefficients:
##                               Estimate Std. Error z value Pr(>|z|)  
## (Intercept)                  8.783e-01  1.470e+01   0.060   0.9524  
## Cancer$radius               -2.006e-02  6.969e-02  -0.288   0.7735  
## Cancer$texture               7.915e-02  6.970e-02   1.136   0.2561  
## Cancer$perimeter             9.481e-02  2.057e-01   0.461   0.6448  
## Cancer$area                 -3.468e-03  1.324e-02  -0.262   0.7934  
## Cancer$smoothness           -2.014e+01  2.915e+01  -0.691   0.4897  
## Cancer$compactness           4.622e+01  2.324e+01   1.989   0.0467 *
## Cancer$symmetry             -4.738e+00  1.911e+01  -0.248   0.8042  
## Cancer$fractional.dimension -1.615e+02  1.271e+02  -1.270   0.2040  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 132.81  on 99  degrees of freedom
## Residual deviance:  66.24  on 91  degrees of freedom
## AIC: 84.24
## 
## Number of Fisher Scoring iterations: 6

END