install.packages("tidyverse", repos = "http://cloud.r-project.org/", dependencies = TRUE)
#install.packages("readxl", repos = "http://cloud.r-project.org/", dependencies = TRUE)
install.packages("devtools", repos = "http://cloud.r-project.org/", dependencies = TRUE)
devtools::install_github("tidyverse/readxl")
install.packages("SDMTools", repos = "http://cloud.r-project.org/", dependencies = TRUE)
#install.packages("RColorBrewer", repos = "http://cloud.r-project.org/", dependencies = TRUE)
install.packages("foreign", repos = "http://cloud.r-project.org/", dependencies = TRUE)
library(tidyverse)
library(readxl)
library(SDMTools)
#library(RColorBrewer)
library(foreign)
choice00 <- read_excel("data/Model03_LCCMtable.xlsx", sheet = "choice")
choice01 <- choice00 %>%
select("Model for Choices", X__1, X__4, X__7) %>%
filter(!is.na("Model for Choices") == TRUE, !is.na(X__7) == TRUE, X__1 != "Class1")
choice02 <- tibble(
var = choice01[["Model for Choices"]][1:18],
coef.choice.1 = as.numeric(choice01$X__1[1:18]),
coef.choice.2 = as.numeric(choice01$X__4[1:18]),
coef.choice.3 = as.numeric(choice01$X__7[1:18])
)
# https://stat.ethz.ch/pipermail/r-help/2003-December/043695.html
alt00 <- as.tibble(
read.spss("data/alt_07082018.sav",
to.data.frame = TRUE,
use.value.labesl = FALSE)
)
alt01 <- alt00 %>%
mutate(
id = as.character(alt %/% 100),
altid = as.character(alt %% 100),
Central = ifelse(CA_Regions == 1, 1, 0),
MTC = ifelse(CA_Regions == 2, 1, 0),
NorCal = ifelse(CA_Regions == 3, 1, 0),
SACOG = ifelse(CA_Regions == 4, 1, 0),
SANDAG = ifelse(CA_Regions == 5, 1, 0),
SCAG = ifelse(CA_Regions == 6, 1, 0)
) %>%
select(id, altid,
pctnhw_fornhw, pctasn_forasn, pcthis_forhis, pctoldmill, # 4
zlnincome, home_ratio, rent_ratio, schoolrate, # +4
lndist, Amenities, LUmix, Density, # +4
Central, MTC, NorCal, SACOG, SANDAG, SCAG, # +6
CA_Regions, ccity, urban, suburb, rural_in_urban, rural)
alt01$util1 <- exp(data.matrix(as.data.frame(alt01)[, 3:20]) %*% choice02$coef.choice.1)
alt01$util2 <- exp(data.matrix(as.data.frame(alt01)[, 3:20]) %*% choice02$coef.choice.2)
alt01$util3 <- exp(data.matrix(as.data.frame(alt01)[, 3:20]) %*% choice02$coef.choice.3)
alt02 <- alt01 %>%
group_by(id) %>%
summarize(
util1_sum = as.numeric(sum(util1)),
util2_sum = as.numeric(sum(util2)),
util3_sum = as.numeric(sum(util3)), # strange behavior
n = n()
)
# check observations with alternatives fewer than ten.
alt02 %>%
filter(n < 10) %>%
ggplot() +
geom_histogram(aes(x = n)) # 8 + 7 + 1 = 16, 7290-16 = 7274
alt03 <- alt01 %>%
left_join(alt02) %>%
select(id, altid, util1, util2, util3,
util1_sum, util2_sum, util3_sum, # from alt02
ccity, urban, suburb, rural_in_urban, rural) %>%
mutate(
prob1 = as.numeric(util1/util1_sum*100),
prob2 = as.numeric(util2/util2_sum*100),
prob3 = as.numeric(util3/util3_sum*100)
) %>%
select(id, altid, prob1, prob2, prob3,
ccity, urban, suburb, rural_in_urban, rural)
# https://readxl.tidyverse.org/
# The below excel file is copied from M drive. No additional changes are made.
df00 <- read_excel("data/Model03_barcharts.xlsx", sheet = "resp_07102018_3round_data03")
df01 <- df00 %>%
select(pid, case_weight, work_school, nhwhite, education, ownkidsunder6, ownkidsunder12, ownkidsunder17,
income, own_house, carlevel, Zpro_suburban_18F, Zcar_as_atool_18F, Zpro_env_policies_18F) %>%
mutate(
nhwhite_no = ifelse(nhwhite == 1, 0, 1),
edu1 = ifelse(education == 1, 1, 0),
edu2 = ifelse(education == 2, 1, 0),
edu3 = ifelse(education == 3, 1, 0),
edu4 = ifelse(education == 4, 1, 0),
inc1 = ifelse(income == 1, 1, 0),
inc2 = ifelse(income == 2, 1, 0),
inc3 = ifelse(income == 3, 1, 0),
house_own = own_house,
house_not = ifelse(house_own ==1, 0, 1),
car0 = ifelse(carlevel == 0, 1, 0),
car1 = ifelse(carlevel == 1, 1, 0),
car2 = ifelse(carlevel == 2, 1, 0),
car3 = ifelse(carlevel == 3, 1, 0),
pro_sub = Zpro_suburban_18F,
car_tool = Zcar_as_atool_18F,
pro_env = Zpro_env_policies_18F
) %>%
select(pid, case_weight, work_school, nhwhite_no, nhwhite, edu1, edu2, edu3, edu4,
ownkidsunder6, ownkidsunder12, ownkidsunder17, inc1, inc2, inc3, house_not, house_own,
car0, car1, car2, car3,
pro_sub, car_tool, pro_env)
# following several lines are for the calcuation of standardized coefficients
temp <- map_dbl(df01, function(x) wt.sd(x, wt=df01$case_weight))
cov_wt_sd <- tibble(
cov = names(temp)[3:length(temp)],
sd = temp[3:length(temp)]
)
#write_csv(cov_wt_sd, "data/cov_wt_sd.csv")
cov_wt_sd2 <- c(1, cov_wt_sd$sd[1:21])
# end.
df02 <- cbind(
constant = rep(1, n = 729),
df01[c(4:24)]
)
# The below excel file is copied from M drive.
# Then, two additional sheets are made based on weighted sd values.
coef00 <- read_excel("data/Model03_LCCMtable.xlsx", sheet = "member")
coef01 <- coef00 %>%
filter(!is.na(X__1)== TRUE) %>%
select(X__1:X__7)
colnames(coef01) <- c("cov", "coef.1", "z.1", "sig.1", "coef.2", "z.2", "sig.2", "coef.3", "z.3", "sig.3")
coef02 <- coef01 %>%
select(cov, coef.1, coef.2, coef.3) %>%
filter(!is.na(coef.1) == TRUE) #, cov != "No")
coef02$cov[2:22] <- colnames(df01)[3:23]
coef02$coef.1 <- as.numeric(coef02$coef.1)
coef02$coef.2 <- as.numeric(coef02$coef.2)
coef02$coef.3 <- as.numeric(coef02$coef.3)
coef02$std.coef.1 <- coef02$coef.1*cov_wt_sd2
coef02$std.coef.2 <- coef02$coef.2*cov_wt_sd2
coef02$std.coef.3 <- coef02$coef.3*cov_wt_sd2
coef02$cov <- factor(coef02$cov, levels = c(coef02$cov))
coef03 <- coef02 %>%
gather(coef.1:std.coef.3, key = coef_type, value = estimate)
df03 <- df01 %>%
select(pid, case_weight, work_school) %>%
mutate(
class1 = exp(as.matrix(df02) %*% coef02[[2]]),
class2 = exp(as.matrix(df02) %*% coef02[[3]]),
class3 = exp(as.matrix(df02) %*% coef02[[4]])
)
df03$NHtype.work <- factor(df03$work_school, levels = c(1, 2, 3, 4, 5),
labels = c("Central city", "Urban", "Suburban",
"Exurban", "Rural"))
df04 <- df03 %>%
gather(class1:class3, key = class, value = utility) %>%
group_by(pid) %>%
summarise(
util_sum = sum(utility)
)
df05 <- df03 %>%
left_join(df04) %>%
mutate(
id = seq.int(nrow(.)),
prob.class1 = class1/util_sum, # . placeholder
prob.class2 = class2/util_sum,
prob.class3 = class3/util_sum
) %>%
select(pid, id, case_weight, NHtype.work, prob.class1, prob.class2, prob.class3)