require(UsingR)
require(ggplot2)
require(tidyr)
require(dplyr)
require(plotly)
require(coefplot)

1 ロジスティック回帰

acs <- read.table("http://jaredlander.com/data/acs_ny.csv" , sep = "," ,
  header = TRUE , stringsAsFactors = FALSE)
head(acs)
##   Acres FamilyIncome  FamilyType NumBedrooms NumChildren NumPeople
## 1  1-10          150     Married           4           1         3
## 2  1-10          180 Female Head           3           2         4
## 3  1-10          280 Female Head           4           0         2
## 4  1-10          330 Female Head           2           1         2
## 5  1-10          330   Male Head           3           1         2
## 6  1-10          480   Male Head           0           3         4
##   NumRooms        NumUnits NumVehicles NumWorkers  OwnRent   YearBuilt
## 1        9 Single detached           1          0 Mortgage   1950-1959
## 2        6 Single detached           2          0   Rented Before 1939
## 3        8 Single detached           3          1 Mortgage   2000-2004
## 4        4 Single detached           1          0   Rented   1950-1959
## 5        5 Single attached           1          0 Mortgage Before 1939
## 6        1 Single detached           0          0   Rented Before 1939
##   HouseCosts ElectricBill FoodStamp HeatingFuel Insurance       Language
## 1       1800           90        No         Gas      2500        English
## 2        850           90        No         Oil         0        English
## 3       2600          260        No         Oil      6600 Other European
## 4       1800          140        No         Oil         0        English
## 5        860          150        No         Gas       660        Spanish
## 6        700          140        No         Gas         0        English
ggplot(acs , aes(x = FamilyIncome)) +
 geom_density(fill = "grey" , color = "grey") +
 geom_vline(xintercept = 150000) 

acs$income <- with(acs , FamilyIncome >= 150000)
income1 <- glm(income ~ HouseCosts + NumWorkers + OwnRent +
 NumBedrooms + FamilyType ,
 data = acs , family = binomial(link = "logit"))
summary(income1)
## 
## Call:
## glm(formula = income ~ HouseCosts + NumWorkers + OwnRent + NumBedrooms + 
##     FamilyType, family = binomial(link = "logit"), data = acs)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.8452  -0.6246  -0.4231  -0.1743   2.9503  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -5.738e+00  1.185e-01 -48.421   <2e-16 ***
## HouseCosts           7.398e-04  1.724e-05  42.908   <2e-16 ***
## NumWorkers           5.611e-01  2.588e-02  21.684   <2e-16 ***
## OwnRentOutright      1.772e+00  2.075e-01   8.541   <2e-16 ***
## OwnRentRented       -8.886e-01  1.002e-01  -8.872   <2e-16 ***
## NumBedrooms          2.339e-01  1.683e-02  13.895   <2e-16 ***
## FamilyTypeMale Head  3.336e-01  1.472e-01   2.266   0.0235 *  
## FamilyTypeMarried    1.405e+00  8.704e-02  16.143   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 22808  on 22744  degrees of freedom
## Residual deviance: 18073  on 22737  degrees of freedom
## AIC: 18089
## 
## Number of Fisher Scoring iterations: 6

2 ポアソン回帰

ggplot(acs , aes(x = NumChildren)) + geom_histogram(binwidth = 1)

children1 <- glm(NumChildren ~ FamilyIncome + FamilyType + OwnRent ,
 data = acs , family = poisson(lin = "log"))
summary(children1)
## 
## Call:
## glm(formula = NumChildren ~ FamilyIncome + FamilyType + OwnRent, 
##     family = poisson(lin = "log"), data = acs)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9950  -1.3235  -1.2045   0.9464   6.3781  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         -3.257e-01  2.103e-02 -15.491  < 2e-16 ***
## FamilyIncome         5.420e-07  6.572e-08   8.247  < 2e-16 ***
## FamilyTypeMale Head -6.298e-02  3.847e-02  -1.637    0.102    
## FamilyTypeMarried    1.440e-01  2.147e-02   6.707 1.98e-11 ***
## OwnRentOutright     -1.974e+00  2.292e-01  -8.611  < 2e-16 ***
## OwnRentRented        4.086e-01  2.067e-02  19.773  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 35240  on 22744  degrees of freedom
## Residual deviance: 34643  on 22739  degrees of freedom
## AIC: 61370
## 
## Number of Fisher Scoring iterations: 5
coefplot(children1)

# 生存時間分析