library(tidyverse)
df <- datasets::quakes
df_dich <- df %>% mutate(station_bin = cut(stations, breaks=c(0, 10, 20, 30, 40, 50)))
head(df_dich)
## lat long depth mag stations station_bin
## 1 -20.42 181.62 562 4.8 41 (40,50]
## 2 -20.62 181.03 650 4.2 15 (10,20]
## 3 -26.00 184.10 42 5.4 43 (40,50]
## 4 -17.97 181.66 626 4.1 19 (10,20]
## 5 -20.42 181.96 649 4.0 11 (10,20]
## 6 -19.68 184.31 195 4.0 12 (10,20]
df.lm <- lm(mag ~ depth + depth*stations + station_bin + I(depth^2), data=df_dich)
summary(df.lm)
##
## Call:
## lm(formula = mag ~ depth + depth * stations + station_bin + I(depth^2),
## data = df_dich)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.5827 -0.1344 0.0016 0.1155 0.8677
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.273e+00 6.030e-02 70.870 < 2e-16 ***
## depth -1.455e-03 1.831e-04 -7.944 6.45e-15 ***
## stations 2.019e-02 2.565e-03 7.872 1.10e-14 ***
## station_bin(10,20] -5.148e-02 4.605e-02 -1.118 0.2639
## station_bin(20,30] -7.422e-02 5.677e-02 -1.307 0.1915
## station_bin(30,40] -1.239e-01 7.423e-02 -1.670 0.0954 .
## station_bin(40,50] -1.553e-01 9.539e-02 -1.628 0.1039
## I(depth^2) 1.566e-06 2.385e-07 6.568 9.08e-11 ***
## depth:stations 3.229e-06 2.903e-06 1.112 0.2664
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1912 on 818 degrees of freedom
## (173 observations deleted due to missingness)
## Multiple R-squared: 0.5544, Adjusted R-squared: 0.55
## F-statistic: 127.2 on 8 and 818 DF, p-value: < 2.2e-16
It doesn’t care for the binning. Depth, number of stations, and depth^2 are most significant.