Data Prep and preliminary weighted index

ch2 <- ch %>%
  mutate(
    # target: edu_ter → numeric → log (zero if missing or ≤0)
    edu_ter_num = parse_number(edu_ter),
    edu_ter_num = if_else(is.na(edu_ter_num) | edu_ter_num <= 0, 0, edu_ter_num),
    ln_price    = if_else(edu_ter_num > 0, log(edu_ter_num), 0),
    # weather / climate logs
    ln_temp  = log(t2hk_avg),
    ln_rain  = log(dphk_sum),
    ln_wspd  = log(wsp_max),
    wdir_num = parse_number(dom_dirhk_mode),
    wdir_num = if_else(is.na(wdir_num) | wdir_num <= 0, 0, wdir_num),
    ln_wdir  = if_else(wdir_num > 0, log(wdir_num), 0),
    # property age log
    age_num  = parse_number(wp),
    age_num  = if_else(is.na(age_num) | age_num <= 0, 0, age_num),
    ln_age   = if_else(age_num > 0, log(age_num), 0),
    # ensure Date class
    date     = as.Date(date)
  )

# 1b) fit the pooled FE model
model <- feols(
  ln_price ~ 
    ln_temp
  + ln_temp:ln_age
  + ln_wspd
  + ln_wdir
  + relelev120_x 
  + ln_rain * hk_ndvi_x
  + slope_x + rough_x + elevation_x + waterdist_x 
  | year + Code,
  data = ch2
)

# 1c) show clustered SE summary
summary(model, cluster = "Code")
## OLS estimation, Dep. Var.: ln_price
## Observations: 242,713
## Fixed-effects: year: 25,  Code: 759
## Standard-errors: Clustered (Code) 
##                    Estimate Std. Error    t value   Pr(>|t|)    
## ln_temp           -0.941121   0.046274 -20.338204  < 2.2e-16 ***
## ln_wspd           -0.034087   0.010213  -3.337648 8.8613e-04 ***
## ln_wdir           -0.002673   0.004784  -0.558734 5.7651e-01    
## ln_rain           -0.032753   0.004128  -7.934928 7.5671e-15 ***
## ln_temp:ln_age     0.709981   0.012912  54.984350  < 2.2e-16 ***
## ln_rain:hk_ndvi_x  0.188240   0.057647   3.265387 1.1423e-03 ** 
## ... 6 variables were removed because of collinearity (relelev120_x, hk_ndvi_x and 4 others [full set in $collin.var])
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.15627     Adj. R2: 0.933708
##                 Within R2: 0.706831
# a neat Markdown table of coefficients & SEs
modelsummary(model, output = "markdown")
(1)
ln_temp -0.941
(0.101)
ln_wspd -0.034
(0.036)
ln_wdir -0.003
(0.010)
ln_rain -0.033
(0.008)
ln_temp × ln_age 0.710
(0.007)
ln_rain × hk_ndvi_x 0.188
(0.025)
Num.Obs. 242713
R2 0.934
R2 Adj. 0.934
R2 Within 0.707
R2 Within Adj. 0.707
AIC 760850.1
BIC 769055.4
RMSE 1.16
Std.Errors by: year
FE: year X
FE: Code X
# 3a) remove any infinite logs
ch2_clean <- ch2 %>%
  mutate(across(
    c(ln_temp, ln_rain, ln_wspd, ln_wdir),
    ~ if_else(is.finite(.), ., NA_real_)
  ))

# 3b) compute daily means
daily_avgs <- ch2_clean %>%
  group_by(date) %>%
  summarise(
    avg_ln_temp = mean(ln_temp, na.rm = TRUE),
    avg_ln_wspd = mean(ln_wspd, na.rm = TRUE),
    avg_ln_wdir = mean(ln_wdir, na.rm = TRUE),
    avg_ln_rain = mean(ln_rain, na.rm = TRUE)
  )

# 3c) drop month‐end rows
daily_avgs_clean <- daily_avgs %>%
  filter(day(date) != days_in_month(date))

# 3d) extract coefficients
coefs   <- coef(model)
w_temp  <- coefs["ln_temp"]
w_wspd  <- coefs["ln_wspd"]
w_wdir  <- coefs["ln_wdir"]
w_rain  <- coefs["ln_rain"]  # adjust if interaction

# 3e) build the weighted index
daily_avgs_clean <- daily_avgs_clean %>%
  mutate(
    weighted_index =
      w_temp * avg_ln_temp +
      w_wspd * avg_ln_wspd +
      w_wdir * avg_ln_wdir +
      w_rain * avg_ln_rain
  )

p1 <- ggplot(daily_avgs_clean, aes(date, avg_ln_temp)) +
  geom_line() + labs(title="Avg. ln(temp)", y="avg_ln_temp")

p2 <- ggplot(daily_avgs_clean, aes(date, avg_ln_wspd)) +
  geom_line() + labs(title="Avg. ln(wspd)", y="avg_ln_wspd")

p3 <- ggplot(daily_avgs_clean, aes(date, avg_ln_wdir)) +
  geom_line() + labs(title="Avg. ln(wdir)", y="avg_ln_wdir")

p4 <- ggplot(daily_avgs_clean, aes(date, avg_ln_rain)) +
  geom_line() + labs(title="Avg. ln(rain)", y="avg_ln_rain")

# 2×2 layout
(p1 | p2) / (p3 | p4)

ggplot(daily_avgs_clean, aes(date, weighted_index)) +
  geom_line() +
  labs(
    title = "Coefficient‐Weighted Weather Index", 
    x     = "Date", 
    y     = "Weighted Index"
  )