Zelig 5

Redoing the in-class code with Zelig 5 syntax

library(magrittr)
library(radiant.data)
## Loading required package: ggplot2
## Loading required package: lubridate
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
## Loading required package: tidyr
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:magrittr':
## 
##     extract
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'radiant.data'
## The following objects are masked from 'package:lubridate':
## 
##     month, wday
## The following object is masked from 'package:ggplot2':
## 
##     diamonds
library(readr)
library(dplyr)
library(Zelig)
## Loading required package: survival
## 
## Attaching package: 'Zelig'
## The following object is masked from 'package:ggplot2':
## 
##     stat
titanic <- read_csv("/Users/Jessica/Desktop/titanic.csv")
## Parsed with column specification:
## cols(
##   PassengerId = col_double(),
##   Survived = col_double(),
##   Pclass = col_double(),
##   Name = col_character(),
##   Sex = col_character(),
##   Age = col_double(),
##   SibSp = col_double(),
##   Parch = col_double(),
##   Ticket = col_character(),
##   Fare = col_double(),
##   Cabin = col_character(),
##   Embarked = col_character()
## )
head(titanic)
## # A tibble: 6 x 12
##   PassengerId Survived Pclass Name  Sex     Age SibSp Parch Ticket  Fare
##         <dbl>    <dbl>  <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr>  <dbl>
## 1           1        0      3 Brau… male     22     1     0 A/5 2…  7.25
## 2           2        1      1 Cumi… fema…    38     1     0 PC 17… 71.3 
## 3           3        1      3 Heik… fema…    26     0     0 STON/…  7.92
## 4           4        1      1 Futr… fema…    35     1     0 113803 53.1 
## 5           5        0      3 Alle… male     35     0     0 373450  8.05
## 6           6        0      3 Mora… male     NA     0     0 330877  8.46
## # … with 2 more variables: Cabin <chr>, Embarked <chr>
titanic=radiant.data::titanic
titanic <- titanic %>% 
  mutate(surv = as.integer(survived),survival = sjmisc::rec(surv, rec = "2=0; 1=1"))%>%
  select(pclass, survived, survival, everything())
head(titanic)
## # A tibble: 6 x 12
##   pclass survived survival sex      age sibsp parch  fare name  cabin
##   <fct>  <fct>       <dbl> <fct>  <dbl> <int> <int> <dbl> <chr> <chr>
## 1 1st    Yes             1 fema… 29         0     0 211.  Alle… B5   
## 2 1st    Yes             1 male   0.917     1     2 152.  Alli… C22 …
## 3 1st    No              0 fema…  2         1     2 152.  Alli… C22 …
## 4 1st    No              0 male  30         1     2 152.  Alli… C22 …
## 5 1st    No              0 fema… 25         1     2 152.  Alli… C22 …
## 6 1st    Yes             1 male  48         0     0  26.5 Ande… E12  
## # … with 2 more variables: embarked <fct>, surv <int>
z5=zlogit$new()
z5$zelig(survival ~ age + sex * pclass + fare, data = titanic)

Age Effect

##Mode(titanic$sex)
a.range = min(titanic$age):max(titanic$age)
z5$setrange(age=a.range)
z5$sim()
z5$graph()

The expected value of survival in the titanic decreases as age increases with all other features set to their default values.

Fare Effect

f.range = min(titanic$fare):max(titanic$fare)
z5$setrange(fare=f.range)
z5$sim()
z5$graph()

The expected value of surviving as fare increases seems to remain almost constant. There is a small linear downward sloping trend, but it is very minor. Maybe from fare=0 to fare=500 the expected value of survival decreases by .075.

z5=zlogit$new()
z5$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5$setx(sex="male")
z5$setx1(sex="female")
z5$sim()
z5$graph()

Looking at the expected value and predicted value of survival between male and female, we can tell that there is a greater chance of survival for females in both categories. Females have about an expected value that is greater by .25 as E(Y|Male) is about .13 and E(Y|Female) is about .38.

#1st class
z5=zlogit$new()
z5$zelig(survival ~ age + sex * pclass + fare, data = titanic)
z5$setx(sex="male",pclass="1st")
z5$setx1(sex="female",pclass="1st")
z5$sim()
z5$graph()

For 1st class, the difference in expected value of survival between female and male is about .53.

#2nd class
z5$setx(sex="male",pclass="2nd")
z5$setx1(sex="female",pclass="2nd")
z5$sim()
z5$graph()

For 2nd class, we see the male survival rate decrease to about .13 and the woman survival rate decrease to about .9. The first difference increases to about .76 or so which is huge!

#3rd class
z5$setx(sex="male",pclass="3rd")
z5$setx1(sex="female",pclass="3rd")
z5$sim()
z5$graph()

For 3rd class, the difference between female and male survival rate drops to about .25 as women’s expected survival decreases as the class ranks moving down.

z_tit <- zelig(survival ~ age + sex*pclass + fare, model = "logit", data = titanic, cite = F)
c1x <- setx(z_tit, sex = "male", pclass = "1st")
c1x1 <- setx(z_tit, sex = "female", pclass = "1st")
c1s <- sim(z_tit, x = c1x, x1 = c1x1)
c2x <- setx(z_tit, sex = "male", pclass = "2nd")
c2x1 <- setx(z_tit, sex = "female", pclass = "2nd")
c2s <- sim(z_tit, x = c2x, x1 = c2x1)
c3x <- setx(z_tit, sex = "male", pclass = "3rd")
c3x1 <- setx(z_tit, sex = "female", pclass = "3rd")
c3s <- sim(z_tit, x = c3x, x1 = c3x1)
d1 <- c1s$get_qi(xvalue="x1", qi="fd")
d2 <- c2s$get_qi(xvalue="x1", qi="fd")
d3 <- c3s$get_qi(xvalue="x1", qi="fd")

dfd <- as.data.frame(cbind(d1, d2, d3))
tidd <- dfd %>% 
  gather(class, simv, 1:3)
tidd %>% 
  group_by(class) %>% 
  summarise(mean = mean(simv), sd = sd(simv))
## # A tibble: 3 x 3
##   class  mean     sd
##   <chr> <dbl>  <dbl>
## 1 V1    0.521 0.0502
## 2 V2    0.749 0.0421
## 3 V3    0.256 0.0443
ggplot(tidd, aes(simv)) + geom_histogram() + facet_grid(~class)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Here we can see the expected 1st difference between female and male passengers across all 3 classes put side by side. We can see how the first difference is the lowest in 3rd class, where male and female passengers have closer expected survival rates.

Hurricane Survival

While facing these natural hazard, we always want to have a better chance of survival. In this homework, I am trying to if what the relationship is between the chance of survival and the driving experience.

library(magrittr)
library(readr)
library(dplyr)
library(Zelig)
safe <- read_csv("/Users/Jessica/Desktop/safe.csv")
## Parsed with column specification:
## cols(
##   ID = col_double(),
##   target = col_double(),
##   Gender = col_character(),
##   EngineHP = col_double(),
##   credit_history = col_double(),
##   Years_Experience = col_double(),
##   annual_claims = col_double(),
##   Marital_Status = col_character(),
##   Vehical_type = col_character(),
##   Miles_driven_annually = col_double(),
##   size_of_family = col_double(),
##   Age_bucket = col_character(),
##   EngineHP_bucket = col_character(),
##   Years_Experience_bucket = col_character(),
##   Miles_driven_annually_bucket = col_character(),
##   credit_history_bucket = col_character(),
##   State = col_character()
## )
safe<-sjlabelled::remove_all_labels(safe) 
head(safe)
##   ID target Gender EngineHP credit_history Years_Experience annual_claims
## 1  1      1      F      522            656                1             0
## 2  2      1      F      691            704               16             0
## 3  3      1      M      133            691               15             0
## 4  4      1      M      146            720                9             0
## 5  5      1      M      128            771               33             1
## 6  6      1      F      144            722               18             1
##   Marital_Status Vehical_type Miles_driven_annually size_of_family
## 1        Married          Car                 14749              5
## 2        Married          Car                 15389              6
## 3        Married          Van                  9956              3
## 4        Married          Van                 77323              3
## 5        Married          Van                 14183              4
## 6        Married        Truck                 12208              8
##   Age_bucket EngineHP_bucket Years_Experience_bucket
## 1        <18            >350                      <3
## 2      28-34            >350                   15-30
## 3        >40          90-160                   15-30
## 4      18-27          90-160                   9-14'
## 5        >40          90-160                     >30
## 6        >40          90-160                   15-30
##   Miles_driven_annually_bucket credit_history_bucket State
## 1                         <15k                  Fair    IL
## 2                      15k-25k                  Good    NJ
## 3                         <15k                  Good    CT
## 4                         >25k                  Good    CT
## 5                         <15k             Very Good    WY
## 6                         <15k                  Good    DE
safe1<-rename(safe)%>%
 mutate(marital=
          recode(Marital_Status, 'Married'=1,'Singel'=2),
        claim=
          recode(annual_claims, '0'=0, '1'=0, '2'=1, '3'=1, '4'=1),
        credit=
          recode(credit_history_bucket,'Very Poor'=1, 'Poor'=2, 'Fair'=3, 'Good'=4, 'Very Good'=5, 'Exceptional'=6),
        age=
          recode(Age_bucket, '<18'=1,'18-27'=2, '28-34'=3, '34-40'=4, '>40'=5),
         sex=recode(Gender, 'M'=0, 'F'=1)
        )%>%
select(marital,claim,sex,size_of_family,age,target,credit_history,credit,Years_Experience,Miles_driven_annually)
head(safe1)
##   marital claim sex size_of_family age target credit_history credit
## 1       1     0   1              5   1      1            656      3
## 2       1     0   1              6   3      1            704      4
## 3       1     0   0              3   5      1            691      4
## 4       1     0   0              3   2      1            720      4
## 5       1     0   0              4   5      1            771      5
## 6       1     0   1              8   5      1            722      4
##   Years_Experience Miles_driven_annually
## 1                1                 14749
## 2               16                 15389
## 3               15                  9956
## 4                9                 77323
## 5               33                 14183
## 6               18                 12208
z5=zlogit$new()
z5$zelig(claim ~ age + sex * marital + Years_Experience, model="logit", data = safe1)
## Argument model is only valid for the Zelig wrapper, but not the Zelig method, and will be ignored.

Driving Experience Effect

As we can see, the expected value of hurricane survival follows an almost logarithmic function curve as decrease trend, it will drastically decrease the expected value in hurricane surviving with the driving experience increasing.

As the experience increasing, the uncertainty of surviving from hurricane decreased. With 20 years of experience or more, the uncentainty getting close to 0.

##Mode(safe1$Experience)
z5=zlogit$new()
z5$zelig(claim ~ age + sex * Years_Experience, model="logit", data = safe1)
## Argument model is only valid for the Zelig wrapper, but not the Zelig method, and will be ignored.
y.range = min(safe1$Years_Experience):max(safe1$Years_Experience)
z5$setrange(Years_Experience=y.range)
z5$sim()
z5$graph()
## Years_Experience chosen as the x-axis variable. Use the var argument to specify a different variable.

Looking at the first difference comparison between male and female, we can see that the expected value of both male and female are almost the same. However, the predicted value for female is greater than male.

z5=zlogit$new()
z5$zelig(claim ~ age + sex * Years_Experience, data = safe1)
z5$setx(sex=0)
z5$setx1(sex=1)
z5$sim()
z5$graph()