Data Prep and preliminary weighted index
ch2 <- ch %>%
mutate(
# target: edu_ter â numeric â log (zero if missing or â¤0)
edu_ter_num = parse_number(edu_ter),
edu_ter_num = if_else(is.na(edu_ter_num) | edu_ter_num <= 0, 0, edu_ter_num),
ln_price = if_else(edu_ter_num > 0, log(edu_ter_num), 0),
# weather / climate logs
ln_temp = log(t2hk_avg),
ln_rain = log(dphk_sum),
ln_wspd = log(wsp_max),
wdir_num = parse_number(dom_dirhk_mode),
wdir_num = if_else(is.na(wdir_num) | wdir_num <= 0, 0, wdir_num),
ln_wdir = if_else(wdir_num > 0, log(wdir_num), 0),
# property age log
age_num = parse_number(wp),
age_num = if_else(is.na(age_num) | age_num <= 0, 0, age_num),
ln_age = if_else(age_num > 0, log(age_num), 0),
# ensure Date class
date = as.Date(date)
)
# 1b) fit the pooled FE model
model <- feols(
ln_price ~
ln_temp
+ ln_temp:ln_age
+ ln_wspd
+ ln_wdir
+ relelev120_x
+ ln_rain * hk_ndvi_x
+ slope_x + rough_x + elevation_x + waterdist_x
| year + Code,
data = ch2
)
# 1c) show clustered SE summary
summary(model, cluster = "Code")
## OLS estimation, Dep. Var.: ln_price
## Observations: 242,713
## Fixed-effects: year: 25, Code: 759
## Standard-errors: Clustered (Code)
## Estimate Std. Error t value Pr(>|t|)
## ln_temp -0.941121 0.046274 -20.338204 < 2.2e-16 ***
## ln_wspd -0.034087 0.010213 -3.337648 8.8613e-04 ***
## ln_wdir -0.002673 0.004784 -0.558734 5.7651e-01
## ln_rain -0.032753 0.004128 -7.934928 7.5671e-15 ***
## ln_temp:ln_age 0.709981 0.012912 54.984350 < 2.2e-16 ***
## ln_rain:hk_ndvi_x 0.188240 0.057647 3.265387 1.1423e-03 **
## ... 6 variables were removed because of collinearity (relelev120_x, hk_ndvi_x and 4 others [full set in $collin.var])
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.15627 Adj. R2: 0.933708
## Within R2: 0.706831
# a neat Markdown table of coefficients & SEs
modelsummary(model, output = "markdown")
|
(1) |
ln_temp |
-0.941 |
|
(0.101) |
ln_wspd |
-0.034 |
|
(0.036) |
ln_wdir |
-0.003 |
|
(0.010) |
ln_rain |
-0.033 |
|
(0.008) |
ln_temp Ă ln_age |
0.710 |
|
(0.007) |
ln_rain Ă hk_ndvi_x |
0.188 |
|
(0.025) |
Num.Obs. |
242713 |
R2 |
0.934 |
R2 Adj. |
0.934 |
R2 Within |
0.707 |
R2 Within Adj. |
0.707 |
AIC |
760850.1 |
BIC |
769055.4 |
RMSE |
1.16 |
Std.Errors |
by: year |
FE: year |
X |
FE: Code |
X |
# 3a) remove any infinite logs
ch2_clean <- ch2 %>%
mutate(across(
c(ln_temp, ln_rain, ln_wspd, ln_wdir),
~ if_else(is.finite(.), ., NA_real_)
))
# 3b) compute daily means
daily_avgs <- ch2_clean %>%
group_by(date) %>%
summarise(
avg_ln_temp = mean(ln_temp, na.rm = TRUE),
avg_ln_wspd = mean(ln_wspd, na.rm = TRUE),
avg_ln_wdir = mean(ln_wdir, na.rm = TRUE),
avg_ln_rain = mean(ln_rain, na.rm = TRUE)
)
# 3c) drop monthâend rows
daily_avgs_clean <- daily_avgs %>%
filter(day(date) != days_in_month(date))
# 3d) extract coefficients
coefs <- coef(model)
w_temp <- coefs["ln_temp"]
w_wspd <- coefs["ln_wspd"]
w_wdir <- coefs["ln_wdir"]
w_rain <- coefs["ln_rain"] # adjust if interaction
# 3e) build the weighted index
daily_avgs_clean <- daily_avgs_clean %>%
mutate(
weighted_index =
w_temp * avg_ln_temp +
w_wspd * avg_ln_wspd +
w_wdir * avg_ln_wdir +
w_rain * avg_ln_rain
)
p1 <- ggplot(daily_avgs_clean, aes(date, avg_ln_temp)) +
geom_line() + labs(title="Avg. ln(temp)", y="avg_ln_temp")
p2 <- ggplot(daily_avgs_clean, aes(date, avg_ln_wspd)) +
geom_line() + labs(title="Avg. ln(wspd)", y="avg_ln_wspd")
p3 <- ggplot(daily_avgs_clean, aes(date, avg_ln_wdir)) +
geom_line() + labs(title="Avg. ln(wdir)", y="avg_ln_wdir")
p4 <- ggplot(daily_avgs_clean, aes(date, avg_ln_rain)) +
geom_line() + labs(title="Avg. ln(rain)", y="avg_ln_rain")
# 2Ă2 layout
(p1 | p2) / (p3 | p4)

ggplot(daily_avgs_clean, aes(date, weighted_index)) +
geom_line() +
labs(
title = "CoefficientâWeighted Weather Index",
x = "Date",
y = "Weighted Index"
)
