d <- read.csv('https://stats.dip.jp/01_ds/data/real_estate_price.csv')

library(DT)
datatable(round(d, 1))
library(psych)
pairs.panels(d)

library(ggcorrplot)
##  要求されたパッケージ ggplot2 をロード中です
## 
##  次のパッケージを付け加えます: 'ggplot2'
##  以下のオブジェクトは 'package:psych' からマスクされています:
## 
##     %+%, alpha
library(plotly)
## 
##  次のパッケージを付け加えます: 'plotly'
##  以下のオブジェクトは 'package:ggplot2' からマスクされています:
## 
##     last_plot
##  以下のオブジェクトは 'package:stats' からマスクされています:
## 
##     filter
##  以下のオブジェクトは 'package:graphics' からマスクされています:
## 
##     layout
cor(d) |> ggcorrplot(lab = T, hc.order = T, outline.color = "white", p.mat = cor_pmat(d)) |> ggplotly() |>
layout(font  = list(size = 11, color = 'blue', family = 'UD Digi Kyokasho NK-R'),
       title = '新台湾市の住宅価格',
       xaxis = list(title = 'x軸'),
       yaxis = list(title = 'y軸'))
fit <- lm(price ~ id + yr + yrs_old + m_sta + nstores + lat + lon, data = d)
summary(fit)
## 
## Call:
## lm(formula = price ~ id + yr + yrs_old + m_sta + nstores + lat + 
##     lon, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -35.942  -5.004  -1.122   4.214  75.542 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -9.556e+03  6.352e+03  -1.504  0.13326    
## id          -4.248e-03  3.654e-03  -1.162  0.24575    
## yr           2.950e+00  9.499e-01   3.106  0.00203 ** 
## yrs_old     -2.761e-01  3.864e-02  -7.146 4.18e-12 ***
## m_sta       -4.414e-03  7.174e-04  -6.152 1.84e-09 ***
## nstores      1.157e+00  1.882e-01   6.145 1.91e-09 ***
## lat          2.333e+02  4.449e+01   5.244 2.53e-07 ***
## lon         -1.780e+01  4.873e+01  -0.365  0.71501    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.868 on 406 degrees of freedom
## Multiple R-squared:  0.5824, Adjusted R-squared:  0.5752 
## F-statistic: 80.88 on 7 and 406 DF,  p-value: < 2.2e-16
library(sjPlot)
tab_model(fit, show.stat = T, show.aic = T)
  price
Predictors Estimates CI Statistic p
(Intercept) -9555.85 -22042.65 – 2930.94 -1.50 0.133
id -0.00 -0.01 – 0.00 -1.16 0.246
yr 2.95 1.08 – 4.82 3.11 0.002
yrs old -0.28 -0.35 – -0.20 -7.15 <0.001
m sta -0.00 -0.01 – -0.00 -6.15 <0.001
nstores 1.16 0.79 – 1.53 6.14 <0.001
lat 233.32 145.86 – 320.77 5.24 <0.001
lon -17.80 -113.59 – 77.99 -0.37 0.715
Observations 414
R2 / R2 adjusted 0.582 / 0.575
AIC 2991.907
plot_model(fit, show.values = T, show.intercept = T, width = 0.1)

plot(fit)

fit2 <- lm(price ~ id + yr + yrs_old + m_sta + nstores + lat + lon -1, data = d)
summary(fit2)
## 
## Call:
## lm(formula = price ~ id + yr + yrs_old + m_sta + nstores + lat + 
##     lon - 1, data = d)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -36.198  -4.914  -1.267   4.271  74.877 
## 
## Coefficients:
##           Estimate Std. Error t value Pr(>|t|)    
## id      -4.474e-03  3.657e-03  -1.223  0.22188    
## yr       2.593e+00  9.213e-01   2.815  0.00511 ** 
## yrs_old -2.765e-01  3.870e-02  -7.144 4.20e-12 ***
## m_sta   -5.210e-03  4.851e-04 -10.739  < 2e-16 ***
## nstores  1.147e+00  1.884e-01   6.088 2.64e-09 ***
## lat      2.144e+02  4.274e+01   5.016 7.88e-07 ***
## lon     -8.663e+01  1.681e+01  -5.154 3.98e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.882 on 407 degrees of freedom
## Multiple R-squared:  0.9523, Adjusted R-squared:  0.9515 
## F-statistic:  1162 on 7 and 407 DF,  p-value: < 2.2e-16
library(sjPlot)
tab_model(fit2, show.stat = T, show.aic = T)
  price
Predictors Estimates CI Statistic p
id -0.00 -0.01 – 0.00 -1.22 0.222
yr 2.59 0.78 – 4.40 2.82 0.005
yrs old -0.28 -0.35 – -0.20 -7.14 <0.001
m sta -0.01 -0.01 – -0.00 -10.74 <0.001
nstores 1.15 0.78 – 1.52 6.09 <0.001
lat 214.39 130.38 – 298.41 5.02 <0.001
lon -86.63 -119.66 – -53.59 -5.15 <0.001
Observations 414
R2 / R2 adjusted 0.952 / 0.952
AIC 2992.209
plot_model(fit2, show.values = T, show.intercept = T, width = 0.1)

plot(fit2)