Данные представляют собой квартальные наблюдения с 2018 по 2024 год. Ниже представлено описание переменных:
variables_description <- data.frame(
Переменная = c("year", "quarter", "gdp", "m2", "flow", "n", "n_re", "n_pure", "n_pure_re", "market", "warehouses"),
Описание = c("Год наблюдения", "Квартал наблюдения", "Валовой внутренний продукт России",
"Денежная масса (агрегат M2)", "Объем средств, привлеченных в ЗПИФ",
"Общее количество ЗПИФ", "Количество ЗПИФ недвижимости",
"Стоимость чистых активов ЗПИФ",
"Стоимость чистых активов ЗПИФ недвижимости",
"Объем рынка коммерческой недвижимости",
"Объем рынка складской недвижимости"),
Единицы_измерения = c("Год", "Квартал (1-4)", "млрд руб.", "млрд руб.", "млрд руб.",
"единиц", "единиц", "единиц", "млн руб.", "млн кв.м.", "млн кв.м.")
)
# Вывод таблицы с описанием переменных
kable(variables_description, caption = "Описание переменных")
| Переменная | Описание | Единицы_измерения |
|---|---|---|
| year | Год наблюдения | Год |
| quarter | Квартал наблюдения | Квартал (1-4) |
| gdp | Валовой внутренний продукт России | млрд руб. |
| m2 | Денежная масса (агрегат M2) | млрд руб. |
| flow | Объем средств, привлеченных в ЗПИФ | млрд руб. |
| n | Общее количество ЗПИФ | единиц |
| n_re | Количество ЗПИФ недвижимости | единиц |
| n_pure | Стоимость чистых активов ЗПИФ | единиц |
| n_pure_re | Стоимость чистых активов ЗПИФ недвижимости | млн руб. |
| market | Объем рынка коммерческой недвижимости | млн кв.м. |
| warehouses | Объем рынка складской недвижимости | млн кв.м. |
df_clean <- df %>%
mutate(across(where(is.character), as.numeric))
df_clean$yearqtr <- paste(df_clean$year, df_clean$quarter, sep = "Q")
summary(df_clean)
## year quarter gdp m2
## Min. :2018 Min. :1.00 Min. :22474 Min. : 42046
## 1st Qu.:2019 1st Qu.:1.75 1st Qu.:27492 1st Qu.: 48793
## Median :2021 Median :2.50 Median :32026 Median : 59506
## Mean :2021 Mean :2.50 Mean :34198 Mean : 67053
## 3rd Qu.:2023 3rd Qu.:3.25 3rd Qu.:40126 3rd Qu.: 84516
## Max. :2024 Max. :4.00 Max. :50814 Max. :111025
## NA's :1
## flow n n_re n_pure
## Min. : -6.00 Min. : 850 Min. : 33.0 Min. : 2329650
## 1st Qu.: 65.05 1st Qu.:1014 1st Qu.: 50.0 1st Qu.: 3299789
## Median : 172.90 Median :1293 Median : 95.0 Median : 5330049
## Mean : 251.79 Mean :1411 Mean :107.7 Mean : 5902585
## 3rd Qu.: 305.10 3rd Qu.:1638 3rd Qu.:160.0 3rd Qu.: 7112619
## Max. :1530.00 Max. :2593 Max. :239.0 Max. :13927461
## NA's :1 NA's :1 NA's :1 NA's :1
## n_pure_re market warehouses yearqtr
## Min. : 52175 Min. :2.300 Min. :0.700 Length:28
## 1st Qu.:206745242 1st Qu.:2.850 1st Qu.:0.920 Class :character
## Median :411358775 Median :3.600 Median :1.000 Mode :character
## Mean :433563593 Mean :3.589 Mean :1.269
## 3rd Qu.:650819008 3rd Qu.:4.700 3rd Qu.:1.700
## Max. :977086593 Max. :4.700 Max. :2.100
## NA's :1 NA's :1 NA's :1
# Вычисление корреляционной матрицы
correlation_matrix <- cor(df_clean[, c("gdp", "m2", "flow", "n", "n_re", "n_pure", "n_pure_re",
"market", "warehouses")],
use = "complete.obs")
# Тепловая карта корреляций
melted_corr <- melt(correlation_matrix)
colnames(melted_corr) <- c("Var1", "Var2", "value")
ggplot(data = melted_corr, aes(x = Var1, y = Var2, fill = value)) +
geom_tile(color = "white") +
scale_fill_gradient2(low = "#6D9EC1", high = "#E46726", mid = "white",
midpoint = 0, limit = c(-1, 1), space = "Lab",
name = "Корреляция\nПирсона") +
geom_text(aes(label = round(value, 2)), color = "black", size = 3) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1),
plot.title = element_text(hjust = 0.5, face = "bold")) +
coord_fixed() +
labs(title = "Тепловая карта корреляций",
x = "", y = "")
# Полная визуализация с диаграммами рассеяния и гистограммами
ggpairs(df_clean[, c("gdp", "m2", "flow", "n", "n_re", "n_pure", "n_pure_re",
"market", "warehouses")],
upper = list(continuous = "cor", combo = "box_no_facet"),
lower = list(continuous = "smooth", combo = "dot_no_facet"),
diag = list(continuous = "barDiag"),
title = "Матрица корреляций Пирсона с диаграммами рассеяния",
axisLabels = "show",
switch = "both") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
axis.text.y = element_text(size = 8),
strip.text = element_text(size = 8))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removed 2 rows containing missing values
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
## Warning in ggally_statistic(data = data, mapping = mapping, na.rm = na.rm, :
## Removing 1 row that contained a missing value
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 2 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
Наблюдается высокая положительная корреляция между flow и gdp (0.69) -Средняя положительная корреляция между flow и m2 (0.54)
-Умеренная положительная корреляция между flow и общим количеством ЗПИФ (n) (0.56)
-Средняя положительная корреляция между flow и стоимостью чистых активов ЗПИФ (n_pure) (0.57)
# 1. Базовая модель с макроэкономическими переменными
model1 <- lm(flow ~ gdp + m2, data = df_clean)
summary(model1)
##
## Call:
## lm(formula = flow ~ gdp + m2, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -382.96 -124.10 -12.79 121.81 761.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.794e+02 2.082e+02 -3.743 0.00106 **
## gdp 4.679e-02 1.440e-02 3.250 0.00353 **
## m2 -8.961e-03 5.939e-03 -1.509 0.14497
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 231.2 on 23 degrees of freedom
## (2 пропущенных наблюдений удалены)
## Multiple R-squared: 0.5257, Adjusted R-squared: 0.4845
## F-statistic: 12.75 on 2 and 23 DF, p-value: 0.0001881
# 2. Модель с добавлением количественных характеристик ЗПИФ
model2 <- lm(flow ~ gdp + m2 + n + n_re, data = df_clean)
summary(model2)
##
## Call:
## lm(formula = flow ~ gdp + m2 + n + n_re, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -383.72 -140.22 -44.16 117.76 721.66
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.353e+03 6.806e+02 -1.988 0.0606 .
## gdp 6.043e-02 1.900e-02 3.180 0.0047 **
## m2 8.677e-03 1.885e-02 0.460 0.6502
## n -8.679e-01 9.163e-01 -0.947 0.3548
## n_re 9.742e-01 1.778e+00 0.548 0.5897
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 239 on 20 degrees of freedom
## (3 пропущенных наблюдений удалены)
## Multiple R-squared: 0.5522, Adjusted R-squared: 0.4626
## F-statistic: 6.165 on 4 and 20 DF, p-value: 0.002116
# 3. Модель с добавлением показателей стоимости активов
model3 <- lm(flow ~ gdp + m2 + n + n_re + n_pure + n_pure_re, data = df_clean)
summary(model3)
##
## Call:
## lm(formula = flow ~ gdp + m2 + n + n_re + n_pure + n_pure_re,
## data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -258.04 -154.66 -55.03 76.51 573.73
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.650e+02 9.185e+02 -0.180 0.85941
## gdp 7.340e-02 1.877e-02 3.910 0.00103 **
## m2 -7.177e-03 2.145e-02 -0.335 0.74184
## n -1.577e+00 1.546e+00 -1.020 0.32135
## n_re -1.278e+00 1.985e+00 -0.644 0.52779
## n_pure 1.397e-04 1.254e-04 1.114 0.27992
## n_pure_re -3.485e-07 2.174e-07 -1.603 0.12639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 223.7 on 18 degrees of freedom
## (3 пропущенных наблюдений удалены)
## Multiple R-squared: 0.6468, Adjusted R-squared: 0.529
## F-statistic: 5.493 on 6 and 18 DF, p-value: 0.002195
# 4. Полная модель с показателями рынка недвижимости
model4 <- lm(flow ~ gdp + m2 + n + n_re + n_pure + n_pure_re + market + warehouses, data = df_clean)
summary(model4)
##
## Call:
## lm(formula = flow ~ gdp + m2 + n + n_re + n_pure + n_pure_re +
## market + warehouses, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -304.22 -94.69 -55.78 116.21 504.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.428e+03 1.527e+03 0.935 0.36351
## gdp 6.242e-02 1.935e-02 3.225 0.00529 **
## m2 1.343e-03 2.774e-02 0.048 0.96199
## n -2.981e+00 1.809e+00 -1.648 0.11883
## n_re -3.787e+00 3.021e+00 -1.253 0.22804
## n_pure 3.495e-04 1.716e-04 2.036 0.05860 .
## n_pure_re -2.856e-07 2.615e-07 -1.092 0.29096
## market -3.360e+01 2.190e+02 -0.153 0.87997
## warehouses -5.381e+02 5.893e+02 -0.913 0.37474
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 216 on 16 degrees of freedom
## (3 пропущенных наблюдений удалены)
## Multiple R-squared: 0.7073, Adjusted R-squared: 0.5609
## F-statistic: 4.832 on 8 and 16 DF, p-value: 0.00362
# 5. Модель с выбранными переменными на основе корреляционной матрицы
# Выбираем переменные с наибольшей корреляцией с flow
model5 <- lm(flow ~ gdp + n + n_pure, data = df_clean)
summary(model5)
##
## Call:
## lm(formula = flow ~ gdp + n + n_pure, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -287.23 -153.80 -54.45 116.45 638.65
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.166e+02 4.041e+02 -1.278 0.21509
## gdp 6.323e-02 1.735e-02 3.643 0.00152 **
## n -1.646e+00 8.286e-01 -1.987 0.06014 .
## n_pure 1.451e-04 1.018e-04 1.425 0.16894
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 224.8 on 21 degrees of freedom
## (3 пропущенных наблюдений удалены)
## Multiple R-squared: 0.5839, Adjusted R-squared: 0.5245
## F-statistic: 9.823 on 3 and 21 DF, p-value: 0.0002991
# 6. Простая модель с одной переменной, имеющей наивысшую корреляцию (gdp)
model6 <- lm(flow ~ gdp, data = df_clean)
summary(model6)
##
## Call:
## lm(formula = flow ~ gdp, data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -364.69 -130.08 -11.94 89.86 848.99
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -6.779e+02 2.022e+02 -3.352 0.00265 **
## gdp 2.674e-02 5.695e-03 4.695 9.01e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 237.2 on 24 degrees of freedom
## (2 пропущенных наблюдений удалены)
## Multiple R-squared: 0.4788, Adjusted R-squared: 0.4571
## F-statistic: 22.05 on 1 and 24 DF, p-value: 9.011e-05
# 7. Проверка на мультиколлинеарность для полной модели
if(requireNamespace("car", quietly = TRUE)) {
vif_result <- vif(model4)
print("Факторы инфляции дисперсии (VIF):")
print(vif_result)
}
## [1] "Факторы инфляции дисперсии (VIF):"
## gdp m2 n n_re n_pure n_pure_re market
## 12.706962 158.281222 348.469934 19.148616 148.784236 2.970638 18.487923
## warehouses
## 37.708428
# 8. Логарифмические трансформации
min_flow <- min(df_clean$flow, na.rm = TRUE)
if(min_flow <= 0) {
df_clean$log_flow <- log(df_clean$flow - min_flow + 1)
} else {
df_clean$log_flow <- log(df_clean$flow)
}
df_clean$log_gdp <- log(df_clean$gdp)
df_clean$log_m2 <- log(df_clean$m2)
df_clean$log_n <- log(df_clean$n)
df_clean$log_n_pure <- log(df_clean$n_pure)
# 9. Логарифмическая модель с ключевыми переменными
log_model <- lm(log_flow ~ log_gdp + log_m2 + log_n + log_n_pure, data = df_clean)
summary(log_model)
##
## Call:
## lm(formula = log_flow ~ log_gdp + log_m2 + log_n + log_n_pure,
## data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0722 -0.5425 0.1074 0.6794 1.7226
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -77.6033 45.7325 -1.697 0.10523
## log_gdp 2.4245 3.1958 0.759 0.45691
## log_m2 0.5816 6.6653 0.087 0.93134
## log_n -21.9279 10.4177 -2.105 0.04815 *
## log_n_pure 13.5001 4.0953 3.296 0.00361 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.123 on 20 degrees of freedom
## (3 пропущенных наблюдений удалены)
## Multiple R-squared: 0.5102, Adjusted R-squared: 0.4123
## F-statistic: 5.209 on 4 and 20 DF, p-value: 0.004845
# 10. Диагностика для логарифмической модели
par(mfrow=c(2,2))
plot(log_model)
# 11. Тесты на гетероскедастичность и нормальность остатков
if(requireNamespace("lmtest", quietly = TRUE) && requireNamespace("tseries", quietly = TRUE)) {
# Тест Бреуша-Пагана(на гетероскедастичность)
bp_test <- lmtest::bptest(log_model)
print("Тест Бреуша-Пагана на гетероскедастичность:")
print(bp_test)
# Тест Шапиро-Уилка(на нормальность остатков)
sw_test <- shapiro.test(residuals(log_model))
print("Тест Шапиро-Уилка на нормальность остатков:")
print(sw_test)
}
## [1] "Тест Бреуша-Пагана на гетероскедастичность:"
##
## studentized Breusch-Pagan test
##
## data: log_model
## BP = 10.704, df = 4, p-value = 0.0301
##
## [1] "Тест Шапиро-Уилка на нормальность остатков:"
##
## Shapiro-Wilk normality test
##
## data: residuals(log_model)
## W = 0.9452, p-value = 0.195
# 12. Пошаговое построение модели с помощью функции step
step_model <- step(model4, direction = "both", trace = TRUE)
## Start: AIC=275.61
## flow ~ gdp + m2 + n + n_re + n_pure + n_pure_re + market + warehouses
##
## Df Sum of Sq RSS AIC
## - m2 1 109 746798 273.62
## - market 1 1099 747787 273.65
## - warehouses 1 38909 785598 274.88
## - n_pure_re 1 55660 802348 275.41
## <none> 746688 275.61
## - n_re 1 73325 820013 275.95
## - n 1 126761 873450 277.53
## - n_pure 1 193542 940230 279.38
## - gdp 1 485522 1232210 286.14
##
## Step: AIC=273.62
## flow ~ gdp + n + n_re + n_pure + n_pure_re + market + warehouses
##
## Df Sum of Sq RSS AIC
## - market 1 2595 749393 271.70
## - n_pure_re 1 55594 802392 273.41
## - warehouses 1 61639 808437 273.60
## <none> 746798 273.62
## - n_re 1 99712 846510 274.75
## + m2 1 109 746688 275.61
## - n_pure 1 221373 968171 278.11
## - n 1 280248 1027046 279.58
## - gdp 1 497868 1244666 284.39
##
## Step: AIC=271.7
## flow ~ gdp + n + n_re + n_pure + n_pure_re + warehouses
##
## Df Sum of Sq RSS AIC
## <none> 749393 271.70
## - n_re 1 114857 864250 273.27
## + market 1 2595 746798 273.62
## + m2 1 1606 747787 273.65
## - warehouses 1 157179 906572 274.46
## - n_pure_re 1 168130 917523 274.76
## - n_pure 1 218813 968206 276.11
## - n 1 287157 1036550 277.81
## - gdp 1 510392 1259785 282.69
summary(step_model)
##
## Call:
## lm(formula = flow ~ gdp + n + n_re + n_pure + n_pure_re + warehouses,
## data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -301.34 -98.73 -28.75 114.58 511.80
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.292e+03 1.126e+03 1.147 0.26634
## gdp 6.272e-02 1.791e-02 3.501 0.00255 **
## n -2.838e+00 1.081e+00 -2.626 0.01713 *
## n_re -3.534e+00 2.127e+00 -1.661 0.11404
## n_pure 3.428e-04 1.495e-04 2.293 0.03414 *
## n_pure_re -3.313e-07 1.649e-07 -2.010 0.05971 .
## warehouses -5.943e+02 3.058e+02 -1.943 0.06782 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 204 on 18 degrees of freedom
## (3 пропущенных наблюдений удалены)
## Multiple R-squared: 0.7062, Adjusted R-squared: 0.6083
## F-statistic: 7.211 on 6 and 18 DF, p-value: 0.000486
# 13. Модель с учетом сезонности (квартальные данные)
model_seasonal <- lm(flow ~ gdp + m2 + factor(quarter), data = df_clean)
summary(model_seasonal)
##
## Call:
## lm(formula = flow ~ gdp + m2 + factor(quarter), data = df_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -330.96 -149.47 -16.08 127.54 694.69
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.166e+02 2.330e+02 -3.076 0.00596 **
## gdp 3.922e-02 2.209e-02 1.775 0.09107 .
## m2 -5.818e-03 8.818e-03 -0.660 0.51690
## factor(quarter)2 -2.344e+01 1.346e+02 -0.174 0.86353
## factor(quarter)3 -9.244e+01 1.513e+02 -0.611 0.54824
## factor(quarter)4 9.732e+01 1.962e+02 0.496 0.62530
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 237.3 on 20 degrees of freedom
## (2 пропущенных наблюдений удалены)
## Multiple R-squared: 0.5655, Adjusted R-squared: 0.4569
## F-statistic: 5.206 on 5 and 20 DF, p-value: 0.003207