library(MASS)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
## 
##     select
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data(Melanoma)
head(Melanoma)
##   time status sex age year thickness ulcer
## 1   10      3   1  76 1972      6.76     1
## 2   30      3   1  56 1968      0.65     0
## 3   35      2   1  41 1977      1.34     0
## 4   99      3   0  71 1968      2.90     0
## 5  185      1   1  52 1965     12.08     1
## 6  204      1   1  28 1971      4.84     1
glimpse(Melanoma)
## Observations: 205
## Variables: 7
## $ time      <int> 10, 30, 35, 99, 185, 204, 210, 232, 232, 279, 295, 3...
## $ status    <int> 3, 3, 2, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1...
## $ sex       <int> 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1...
## $ age       <int> 76, 56, 41, 71, 52, 28, 77, 60, 49, 68, 53, 64, 68, ...
## $ year      <int> 1972, 1968, 1977, 1968, 1965, 1971, 1972, 1974, 1968...
## $ thickness <dbl> 6.76, 0.65, 1.34, 2.90, 12.08, 4.84, 5.16, 3.22, 12....
## $ ulcer     <int> 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...

Exercise 1

Melanoma1 <- mutate(Melanoma, sex = ifelse(sex=="1", "MALE","FEMALE"))
Melanoma1$ulcer = as.factor(Melanoma1$ulcer)
Melanoma1$sex <- as.factor(Melanoma1$sex)
str(Melanoma1$sex)
##  Factor w/ 2 levels "FEMALE","MALE": 2 2 2 1 2 2 2 1 2 1 ...
str(Melanoma1$ulcer)
##  Factor w/ 2 levels "0","1": 2 1 1 1 2 2 2 2 2 2 ...

Exercise 2

Melanoma1 <- mutate(Melanoma1, died = ifelse(status <= 1, 1,0))
Melanoma1$died <- as.factor(Melanoma1$died)
str(Melanoma1$died)
##  Factor w/ 2 levels "0","1": 1 1 1 1 2 2 2 1 2 2 ...

Exercise 3

Melanoma1 %>%
  group_by(ulcer) %>%
  filter(age, age > 65) %>% 
  summarise (n = n(), age = mean(age), thickness = mean(thickness), died = mean(died)) %>%
  arrange(desc(n))
## # A tibble: 2 x 5
##    ulcer     n      age thickness  died
##   <fctr> <int>    <dbl>     <dbl> <dbl>
## 1      1    25 74.60000   5.22720    NA
## 2      0    23 71.47826   3.06913    NA

Exercise 4 In this model we can see that having an ulcer is positively correlated with death, the law of odds will go up by .4 if you are a male rather than a female, yet the pvaule is very high so we cannot say for sure this is true. Age is not worth using to predict because it is so low. Ulcer is probably the best one t use in this model to help predict death.

Melanoma2 <- select(Melanoma1, sex, age,thickness, ulcer,died)

Melanoma2GLM <- glm(died ~ sex + age + thickness + ulcer,
           data=Melanoma2, family=binomial)
summary(Melanoma2GLM)
## 
## Call:
## glm(formula = died ~ sex + age + thickness + ulcer, family = binomial, 
##     data = Melanoma2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6320  -0.7841  -0.4995   1.0105   2.1021  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -2.39860    0.60347  -3.975 7.05e-05 ***
## sexMALE      0.40767    0.34669   1.176 0.239634    
## age          0.00402    0.01033   0.389 0.697060    
## thickness    0.11253    0.05900   1.907 0.056477 .  
## ulcer1       1.31314    0.36899   3.559 0.000373 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 242.35  on 204  degrees of freedom
## Residual deviance: 210.31  on 200  degrees of freedom
## AIC: 220.31
## 
## Number of Fisher Scoring iterations: 4