Abstract
みんなのR、16章のお勉強。require(UsingR)
require(ggplot2)
require(tidyr)
require(dplyr)
require(plotly)
require(coefplot)
acs <- read.table("http://jaredlander.com/data/acs_ny.csv" , sep = "," ,
header = TRUE , stringsAsFactors = FALSE)
head(acs)
## Acres FamilyIncome FamilyType NumBedrooms NumChildren NumPeople
## 1 1-10 150 Married 4 1 3
## 2 1-10 180 Female Head 3 2 4
## 3 1-10 280 Female Head 4 0 2
## 4 1-10 330 Female Head 2 1 2
## 5 1-10 330 Male Head 3 1 2
## 6 1-10 480 Male Head 0 3 4
## NumRooms NumUnits NumVehicles NumWorkers OwnRent YearBuilt
## 1 9 Single detached 1 0 Mortgage 1950-1959
## 2 6 Single detached 2 0 Rented Before 1939
## 3 8 Single detached 3 1 Mortgage 2000-2004
## 4 4 Single detached 1 0 Rented 1950-1959
## 5 5 Single attached 1 0 Mortgage Before 1939
## 6 1 Single detached 0 0 Rented Before 1939
## HouseCosts ElectricBill FoodStamp HeatingFuel Insurance Language
## 1 1800 90 No Gas 2500 English
## 2 850 90 No Oil 0 English
## 3 2600 260 No Oil 6600 Other European
## 4 1800 140 No Oil 0 English
## 5 860 150 No Gas 660 Spanish
## 6 700 140 No Gas 0 English
ggplot(acs , aes(x = FamilyIncome)) +
geom_density(fill = "grey" , color = "grey") +
geom_vline(xintercept = 150000)
acs$income <- with(acs , FamilyIncome >= 150000)
income1 <- glm(income ~ HouseCosts + NumWorkers + OwnRent +
NumBedrooms + FamilyType ,
data = acs , family = binomial(link = "logit"))
summary(income1)
##
## Call:
## glm(formula = income ~ HouseCosts + NumWorkers + OwnRent + NumBedrooms +
## FamilyType, family = binomial(link = "logit"), data = acs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.8452 -0.6246 -0.4231 -0.1743 2.9503
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.738e+00 1.185e-01 -48.421 <2e-16 ***
## HouseCosts 7.398e-04 1.724e-05 42.908 <2e-16 ***
## NumWorkers 5.611e-01 2.588e-02 21.684 <2e-16 ***
## OwnRentOutright 1.772e+00 2.075e-01 8.541 <2e-16 ***
## OwnRentRented -8.886e-01 1.002e-01 -8.872 <2e-16 ***
## NumBedrooms 2.339e-01 1.683e-02 13.895 <2e-16 ***
## FamilyTypeMale Head 3.336e-01 1.472e-01 2.266 0.0235 *
## FamilyTypeMarried 1.405e+00 8.704e-02 16.143 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 22808 on 22744 degrees of freedom
## Residual deviance: 18073 on 22737 degrees of freedom
## AIC: 18089
##
## Number of Fisher Scoring iterations: 6
ggplot(acs , aes(x = NumChildren)) + geom_histogram(binwidth = 1)
children1 <- glm(NumChildren ~ FamilyIncome + FamilyType + OwnRent ,
data = acs , family = poisson(lin = "log"))
summary(children1)
##
## Call:
## glm(formula = NumChildren ~ FamilyIncome + FamilyType + OwnRent,
## family = poisson(lin = "log"), data = acs)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9950 -1.3235 -1.2045 0.9464 6.3781
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.257e-01 2.103e-02 -15.491 < 2e-16 ***
## FamilyIncome 5.420e-07 6.572e-08 8.247 < 2e-16 ***
## FamilyTypeMale Head -6.298e-02 3.847e-02 -1.637 0.102
## FamilyTypeMarried 1.440e-01 2.147e-02 6.707 1.98e-11 ***
## OwnRentOutright -1.974e+00 2.292e-01 -8.611 < 2e-16 ***
## OwnRentRented 4.086e-01 2.067e-02 19.773 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 35240 on 22744 degrees of freedom
## Residual deviance: 34643 on 22739 degrees of freedom
## AIC: 61370
##
## Number of Fisher Scoring iterations: 5
coefplot(children1)
# 生存時間分析