library(MASS)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data(Melanoma)
head(Melanoma)
## time status sex age year thickness ulcer
## 1 10 3 1 76 1972 6.76 1
## 2 30 3 1 56 1968 0.65 0
## 3 35 2 1 41 1977 1.34 0
## 4 99 3 0 71 1968 2.90 0
## 5 185 1 1 52 1965 12.08 1
## 6 204 1 1 28 1971 4.84 1
glimpse(Melanoma)
## Observations: 205
## Variables: 7
## $ time <int> 10, 30, 35, 99, 185, 204, 210, 232, 232, 279, 295, 3...
## $ status <int> 3, 3, 2, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1, 1, 3, 1, 1...
## $ sex <int> 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1...
## $ age <int> 76, 56, 41, 71, 52, 28, 77, 60, 49, 68, 53, 64, 68, ...
## $ year <int> 1972, 1968, 1977, 1968, 1965, 1971, 1972, 1974, 1968...
## $ thickness <dbl> 6.76, 0.65, 1.34, 2.90, 12.08, 4.84, 5.16, 3.22, 12....
## $ ulcer <int> 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
Exercise 1
Melanoma1 <- mutate(Melanoma, sex = ifelse(sex=="1", "MALE","FEMALE"))
Melanoma1$ulcer = as.factor(Melanoma1$ulcer)
Melanoma1$sex <- as.factor(Melanoma1$sex)
str(Melanoma1$sex)
## Factor w/ 2 levels "FEMALE","MALE": 2 2 2 1 2 2 2 1 2 1 ...
str(Melanoma1$ulcer)
## Factor w/ 2 levels "0","1": 2 1 1 1 2 2 2 2 2 2 ...
Exercise 2
Melanoma1 <- mutate(Melanoma1, died = ifelse(status <= 1, 1,0))
Melanoma1$died <- as.factor(Melanoma1$died)
str(Melanoma1$died)
## Factor w/ 2 levels "0","1": 1 1 1 1 2 2 2 1 2 2 ...
Exercise 3
Melanoma1 %>%
group_by(ulcer) %>%
filter(age, age > 65) %>%
summarise (n = n(), age = mean(age), thickness = mean(thickness), died = mean(died)) %>%
arrange(desc(n))
## # A tibble: 2 x 5
## ulcer n age thickness died
## <fctr> <int> <dbl> <dbl> <dbl>
## 1 1 25 74.60000 5.22720 NA
## 2 0 23 71.47826 3.06913 NA
Exercise 4 In this model we can see that having an ulcer is positively correlated with death, the law of odds will go up by .4 if you are a male rather than a female, yet the pvaule is very high so we cannot say for sure this is true. Age is not worth using to predict because it is so low. Ulcer is probably the best one t use in this model to help predict death.
Melanoma2 <- select(Melanoma1, sex, age,thickness, ulcer,died)
Melanoma2GLM <- glm(died ~ sex + age + thickness + ulcer,
data=Melanoma2, family=binomial)
summary(Melanoma2GLM)
##
## Call:
## glm(formula = died ~ sex + age + thickness + ulcer, family = binomial,
## data = Melanoma2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6320 -0.7841 -0.4995 1.0105 2.1021
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.39860 0.60347 -3.975 7.05e-05 ***
## sexMALE 0.40767 0.34669 1.176 0.239634
## age 0.00402 0.01033 0.389 0.697060
## thickness 0.11253 0.05900 1.907 0.056477 .
## ulcer1 1.31314 0.36899 3.559 0.000373 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 242.35 on 204 degrees of freedom
## Residual deviance: 210.31 on 200 degrees of freedom
## AIC: 220.31
##
## Number of Fisher Scoring iterations: 4