d <- read.csv('https://stats.dip.jp/01_ds/data/real_estate_price.csv')
library(DT)
datatable(round(d, 1))
library(psych)
pairs.panels(d)

library(ggcorrplot)
## 要求されたパッケージ ggplot2 をロード中です
##
## 次のパッケージを付け加えます: 'ggplot2'
## 以下のオブジェクトは 'package:psych' からマスクされています:
##
## %+%, alpha
library(plotly)
##
## 次のパッケージを付け加えます: 'plotly'
## 以下のオブジェクトは 'package:ggplot2' からマスクされています:
##
## last_plot
## 以下のオブジェクトは 'package:stats' からマスクされています:
##
## filter
## 以下のオブジェクトは 'package:graphics' からマスクされています:
##
## layout
cor(d) |> ggcorrplot(lab = T, hc.order = T, outline.color = "white", p.mat = cor_pmat(d)) |> ggplotly() |>
layout(font = list(size = 11, color = 'blue', family = 'UD Digi Kyokasho NK-R'),
title = '新台湾市の住宅価格',
xaxis = list(title = 'x軸'),
yaxis = list(title = 'y軸'))
fit <- lm(price ~ id + yr + yrs_old + m_sta + nstores + lat + lon, data = d)
summary(fit)
##
## Call:
## lm(formula = price ~ id + yr + yrs_old + m_sta + nstores + lat +
## lon, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -35.942 -5.004 -1.122 4.214 75.542
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.556e+03 6.352e+03 -1.504 0.13326
## id -4.248e-03 3.654e-03 -1.162 0.24575
## yr 2.950e+00 9.499e-01 3.106 0.00203 **
## yrs_old -2.761e-01 3.864e-02 -7.146 4.18e-12 ***
## m_sta -4.414e-03 7.174e-04 -6.152 1.84e-09 ***
## nstores 1.157e+00 1.882e-01 6.145 1.91e-09 ***
## lat 2.333e+02 4.449e+01 5.244 2.53e-07 ***
## lon -1.780e+01 4.873e+01 -0.365 0.71501
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.868 on 406 degrees of freedom
## Multiple R-squared: 0.5824, Adjusted R-squared: 0.5752
## F-statistic: 80.88 on 7 and 406 DF, p-value: < 2.2e-16
library(sjPlot)
tab_model(fit, show.stat = T, show.aic = T)
|
price
|
Predictors
|
Estimates
|
CI
|
Statistic
|
p
|
(Intercept)
|
-9555.85
|
-22042.65 – 2930.94
|
-1.50
|
0.133
|
id
|
-0.00
|
-0.01 – 0.00
|
-1.16
|
0.246
|
yr
|
2.95
|
1.08 – 4.82
|
3.11
|
0.002
|
yrs old
|
-0.28
|
-0.35 – -0.20
|
-7.15
|
<0.001
|
m sta
|
-0.00
|
-0.01 – -0.00
|
-6.15
|
<0.001
|
nstores
|
1.16
|
0.79 – 1.53
|
6.14
|
<0.001
|
lat
|
233.32
|
145.86 – 320.77
|
5.24
|
<0.001
|
lon
|
-17.80
|
-113.59 – 77.99
|
-0.37
|
0.715
|
Observations
|
414
|
R2 / R2 adjusted
|
0.582 / 0.575
|
AIC
|
2991.907
|
plot_model(fit, show.values = T, show.intercept = T, width = 0.1)

plot(fit)




fit2 <- lm(price ~ id + yr + yrs_old + m_sta + nstores + lat + lon -1, data = d)
summary(fit2)
##
## Call:
## lm(formula = price ~ id + yr + yrs_old + m_sta + nstores + lat +
## lon - 1, data = d)
##
## Residuals:
## Min 1Q Median 3Q Max
## -36.198 -4.914 -1.267 4.271 74.877
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## id -4.474e-03 3.657e-03 -1.223 0.22188
## yr 2.593e+00 9.213e-01 2.815 0.00511 **
## yrs_old -2.765e-01 3.870e-02 -7.144 4.20e-12 ***
## m_sta -5.210e-03 4.851e-04 -10.739 < 2e-16 ***
## nstores 1.147e+00 1.884e-01 6.088 2.64e-09 ***
## lat 2.144e+02 4.274e+01 5.016 7.88e-07 ***
## lon -8.663e+01 1.681e+01 -5.154 3.98e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.882 on 407 degrees of freedom
## Multiple R-squared: 0.9523, Adjusted R-squared: 0.9515
## F-statistic: 1162 on 7 and 407 DF, p-value: < 2.2e-16
library(sjPlot)
tab_model(fit2, show.stat = T, show.aic = T)
|
price
|
Predictors
|
Estimates
|
CI
|
Statistic
|
p
|
id
|
-0.00
|
-0.01 – 0.00
|
-1.22
|
0.222
|
yr
|
2.59
|
0.78 – 4.40
|
2.82
|
0.005
|
yrs old
|
-0.28
|
-0.35 – -0.20
|
-7.14
|
<0.001
|
m sta
|
-0.01
|
-0.01 – -0.00
|
-10.74
|
<0.001
|
nstores
|
1.15
|
0.78 – 1.52
|
6.09
|
<0.001
|
lat
|
214.39
|
130.38 – 298.41
|
5.02
|
<0.001
|
lon
|
-86.63
|
-119.66 – -53.59
|
-5.15
|
<0.001
|
Observations
|
414
|
R2 / R2 adjusted
|
0.952 / 0.952
|
AIC
|
2992.209
|
plot_model(fit2, show.values = T, show.intercept = T, width = 0.1)

plot(fit2)



