# Pacotes
#install.packages("modeldata")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modeldata)
library(dplyr)
library(ggplot2)
library(lmtest)
## Carregando pacotes exigidos: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(olsrr)
##
## Attaching package: 'olsrr'
##
## The following object is masked from 'package:datasets':
##
## rivers
library(MASS)
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:olsrr':
##
## cement
##
## The following object is masked from 'package:dplyr':
##
## select
library(car)
## Carregando pacotes exigidos: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(tseries)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
# Carregando o conjunto de dados Ames Housing
data("ames")
# Vendo a estrutura dos dados
glimpse(ames)
## Rows: 2,930
## Columns: 74
## $ MS_SubClass <fct> One_Story_1946_and_Newer_All_Styles, One_Story_1946…
## $ MS_Zoning <fct> Residential_Low_Density, Residential_High_Density, …
## $ Lot_Frontage <dbl> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, 0, 63,…
## $ Lot_Area <int> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005…
## $ Street <fct> Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pav…
## $ Alley <fct> No_Alley_Access, No_Alley_Access, No_Alley_Access, …
## $ Lot_Shape <fct> Slightly_Irregular, Regular, Slightly_Irregular, Re…
## $ Land_Contour <fct> Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, HLS, Lvl, Lvl, L…
## $ Utilities <fct> AllPub, AllPub, AllPub, AllPub, AllPub, AllPub, All…
## $ Lot_Config <fct> Corner, Inside, Corner, Corner, Inside, Inside, Ins…
## $ Land_Slope <fct> Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, G…
## $ Neighborhood <fct> North_Ames, North_Ames, North_Ames, North_Ames, Gil…
## $ Condition_1 <fct> Norm, Feedr, Norm, Norm, Norm, Norm, Norm, Norm, No…
## $ Condition_2 <fct> Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm, Nor…
## $ Bldg_Type <fct> OneFam, OneFam, OneFam, OneFam, OneFam, OneFam, Twn…
## $ House_Style <fct> One_Story, One_Story, One_Story, One_Story, Two_Sto…
## $ Overall_Cond <fct> Average, Above_Average, Above_Average, Average, Ave…
## $ Year_Built <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 199…
## $ Year_Remod_Add <int> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 199…
## $ Roof_Style <fct> Hip, Gable, Hip, Hip, Gable, Gable, Gable, Gable, G…
## $ Roof_Matl <fct> CompShg, CompShg, CompShg, CompShg, CompShg, CompSh…
## $ Exterior_1st <fct> BrkFace, VinylSd, Wd Sdng, BrkFace, VinylSd, VinylS…
## $ Exterior_2nd <fct> Plywood, VinylSd, Wd Sdng, BrkFace, VinylSd, VinylS…
## $ Mas_Vnr_Type <fct> Stone, None, BrkFace, None, None, BrkFace, None, No…
## $ Mas_Vnr_Area <dbl> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6…
## $ Exter_Cond <fct> Typical, Typical, Typical, Typical, Typical, Typica…
## $ Foundation <fct> CBlock, CBlock, CBlock, CBlock, PConc, PConc, PConc…
## $ Bsmt_Cond <fct> Good, Typical, Typical, Typical, Typical, Typical, …
## $ Bsmt_Exposure <fct> Gd, No, No, No, No, No, Mn, No, No, No, No, No, No,…
## $ BsmtFin_Type_1 <fct> BLQ, Rec, ALQ, ALQ, GLQ, GLQ, GLQ, ALQ, GLQ, Unf, U…
## $ BsmtFin_SF_1 <dbl> 2, 6, 1, 1, 3, 3, 3, 1, 3, 7, 7, 1, 7, 3, 3, 1, 3, …
## $ BsmtFin_Type_2 <fct> Unf, LwQ, Unf, Unf, Unf, Unf, Unf, Unf, Unf, Unf, U…
## $ BsmtFin_SF_2 <dbl> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0…
## $ Bsmt_Unf_SF <dbl> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994,…
## $ Total_Bsmt_SF <dbl> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, …
## $ Heating <fct> GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA, Gas…
## $ Heating_QC <fct> Fair, Typical, Typical, Excellent, Good, Excellent,…
## $ Central_Air <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, …
## $ Electrical <fct> SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SB…
## $ First_Flr_SF <int> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, …
## $ Second_Flr_SF <int> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0,…
## $ Gr_Liv_Area <int> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616…
## $ Bsmt_Full_Bath <dbl> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, …
## $ Bsmt_Half_Bath <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Full_Bath <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, …
## $ Half_Bath <int> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, …
## $ Bedroom_AbvGr <int> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, …
## $ Kitchen_AbvGr <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ TotRms_AbvGrd <int> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8,…
## $ Functional <fct> Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, T…
## $ Fireplaces <int> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, …
## $ Garage_Type <fct> Attchd, Attchd, Attchd, Attchd, Attchd, Attchd, Att…
## $ Garage_Finish <fct> Fin, Unf, Unf, Fin, Fin, Fin, Fin, RFn, RFn, Fin, F…
## $ Garage_Cars <dbl> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, …
## $ Garage_Area <dbl> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 4…
## $ Garage_Cond <fct> Typical, Typical, Typical, Typical, Typical, Typica…
## $ Paved_Drive <fct> Partial_Pavement, Paved, Paved, Paved, Paved, Paved…
## $ Wood_Deck_SF <int> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 48…
## $ Open_Porch_SF <int> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0…
## $ Enclosed_Porch <int> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Three_season_porch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Screen_Porch <int> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, …
## $ Pool_Area <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Pool_QC <fct> No_Pool, No_Pool, No_Pool, No_Pool, No_Pool, No_Poo…
## $ Fence <fct> No_Fence, Minimum_Privacy, No_Fence, No_Fence, Mini…
## $ Misc_Feature <fct> None, None, Gar2, None, None, None, None, None, Non…
## $ Misc_Val <int> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, …
## $ Mo_Sold <int> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, …
## $ Year_Sold <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 201…
## $ Sale_Type <fct> WD , WD , WD , WD , WD , WD , WD , WD , WD , WD , W…
## $ Sale_Condition <fct> Normal, Normal, Normal, Normal, Normal, Normal, Nor…
## $ Sale_Price <int> 215000, 105000, 172000, 244000, 189900, 195500, 213…
## $ Longitude <dbl> -93.61975, -93.61976, -93.61939, -93.61732, -93.638…
## $ Latitude <dbl> 42.05403, 42.05301, 42.05266, 42.05125, 42.06090, 4…
# Tentando entender a base de dados
table(ames$Neighborhood)
##
## North_Ames College_Creek
## 443 267
## Old_Town Edwards
## 239 194
## Somerset Northridge_Heights
## 182 166
## Gilbert Sawyer
## 165 151
## Northwest_Ames Sawyer_West
## 131 125
## Mitchell Brookside
## 114 108
## Crawford Iowa_DOT_and_Rail_Road
## 103 93
## Timberland Northridge
## 72 71
## Stone_Brook South_and_West_of_Iowa_State_University
## 51 48
## Clear_Creek Meadow_Village
## 44 37
## Briardale Bloomington_Heights
## 30 28
## Veenker Northpark_Villa
## 24 23
## Blueste Greens
## 10 8
## Green_Hills Landmark
## 2 1
## Hayden_Lake
## 0
table(ames$Foundation)
##
## BrkTil CBlock PConc Slab Stone Wood
## 311 1244 1310 49 11 5
table(ames$House_Style)
##
## One_and_Half_Fin One_and_Half_Unf One_Story SFoyer
## 314 19 1481 83
## SLvl Two_and_Half_Fin Two_and_Half_Unf Two_Story
## 128 8 24 873
table(ames$Bldg_Type)
##
## OneFam TwoFmCon Duplex Twnhs TwnhsE
## 2425 62 109 101 233
# Criando a base de dados a ser usada na modelagem
dados = ames %>%
filter(Bldg_Type == "OneFam" & MS_Zoning == "Residential_Low_Density") %>%
#filter(Neighborhood == "North_Ames") %>%
#dplyr::select(Sale_Price, Gr_Liv_Area, Year_Built, Full_Bath, Bedroom_AbvGr, Garage_Cars)%>%
dplyr::select(Sale_Price, Gr_Liv_Area, Year_Built, Garage_Cars)%>%
rename(
preco_venda = Sale_Price,
area_construida = Gr_Liv_Area,
ano_construcao = Year_Built,
#banheiros_completos = Full_Bath,
#quartos_acima_solo = Bedroom_AbvGr,
vagas_garagem = Garage_Cars
) %>%
mutate(
log_preco_venda = log(preco_venda),
#preco_venda = (preco_venda-mean(preco_venda))/sd(preco_venda),
area_construida = (area_construida-mean(area_construida))/sd(area_construida)#,
#ano_construcao = (ano_construcao-mean(ano_construcao))/sd(ano_construcao)#,
#ano_construcao = ano_construcao-min(ano_construcao)#,
#banheiros_completos = (banheiros_completos-mean(banheiros_completos))/sd(banheiros_completos),
#quartos_acima_solo = (quartos_acima_solo-mean(quartos_acima_solo))/sd(quartos_acima_solo),
#vagas_garagem = (vagas_garagem-mean(vagas_garagem))/sd(vagas_garagem)
)
#Treino (base de dados usada no ajuste)
inds_treino = 1:(nrow(dados)-100)#sort(sample(1:nrow(dados), nrow(dados)-100, replace=F))
inds_previsao = setdiff(1:nrow(dados), inds_treino)
base = dados[inds_treino,]
#Teste (base de dados usada na previsao)
base_previsao = dados[inds_previsao,]
#Analisando as variaveis selecionadas
correlacao <- round(cor(base %>% dplyr::select(-preco_venda)), 3)
#correlacao <- round(cor(base %>% dplyr::select(-log_preco_venda)), 3)
as.data.frame(correlacao)
## area_construida ano_construcao vagas_garagem log_preco_venda
## area_construida 1.000 0.359 0.563 0.765
## ano_construcao 0.359 1.000 0.589 0.625
## vagas_garagem 0.563 0.589 1.000 0.726
## log_preco_venda 0.765 0.625 0.726 1.000
#representando graficamente
#install.packages("corrplot")
library(corrplot)
## corrplot 0.92 loaded
par(mfrow=c(1,1))
png("Fig_Correlacao_AMES.png")
#cores_claras <- colorRampPalette(c("#D6EAF8", "#FFFFFF", "#F5B7B1"))(200)
corrplot(correlacao, method = "color",
type = "upper",
# col = cores_claras, # <<<<<< AQUI
tl.col = "black",
tl.srt = 45,
addCoef.col = "black") # Adiciona os valores
dev.off()
## png
## 2
par(mfrow=c(1,2))
hist(base$preco_venda, freq=F, col="grey", border="white",
main="Histograma", ylab="Densidade",
xlab="Preço do imóvel", cex.lab=1.4, cex.axis=1.4)
png("Fig_Histograma_AMES.png")
hist(base$log_preco_venda, freq=F, col="grey", border="white",
main="Histograma", ylab="Densidade",
xlab="Log do preço do imóvel", cex.lab=1.4, cex.axis=1.4)
dev.off()
## png
## 2
par(mfrow=c(1,3))

plot(base$area_construida, base$preco_venda, bty="n", xlab="área construída", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)
plot(base$ano_construcao, base$preco_venda, bty="n", xlab="ano de construção", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)
#plot(base$banheiros_completos, base$preco_venda, bty="n", xlab="total de banheiros", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)
#plot(base$quartos_acima_solo, base$preco_venda, bty="n", xlab="total de quartos acima do solo", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)
plot(base$vagas_garagem, base$preco_venda, bty="n", xlab="total de vagas na garagem", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)

plot(base$area_construida, base$log_preco_venda, bty="n", xlab="área construída", ylab="log do preço de venda", cex.lab=1.4, cex.axis=1.4)
plot(base$ano_construcao, base$log_preco_venda, bty="n", xlab="ano de construção", ylab="log do preço de venda", cex.lab=1.4, cex.axis=1.4)
plot(base$vagas_garagem, base$log_preco_venda, bty="n", xlab="total de vagas na garagem", ylab="log do preço de venda", cex.lab=1.4, cex.axis=1.4)

# Ajustando o modelo de regressão linear múltipla
ajuste <- lm(log_preco_venda ~ area_construida+ano_construcao+vagas_garagem, data = base) #usando o log do preço de venda como resposta
summary(ajuste)
##
## Call:
## lm(formula = log_preco_venda ~ area_construida + ano_construcao +
## vagas_garagem, data = base)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.76980 -0.10179 -0.00418 0.09476 0.79973
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.4961572 0.3999024 8.743 <2e-16 ***
## area_construida 0.1937830 0.0051021 37.981 <2e-16 ***
## ano_construcao 0.0042174 0.0002062 20.455 <2e-16 ***
## vagas_garagem 0.1452266 0.0082821 17.535 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1861 on 1905 degrees of freedom
## Multiple R-squared: 0.7649, Adjusted R-squared: 0.7645
## F-statistic: 2066 on 3 and 1905 DF, p-value: < 2.2e-16
AIC(ajuste)
## [1] -996.6417
## Escolha das variáveis para o modelo ----
ajuste_step = stepAIC(ajuste, direction = "backward")
## Start: AIC=-6416.15
## log_preco_venda ~ area_construida + ano_construcao + vagas_garagem
##
## Df Sum of Sq RSS AIC
## <none> 65.966 -6416.1
## - vagas_garagem 1 10.647 76.613 -6132.5
## - ano_construcao 1 14.489 80.456 -6039.1
## - area_construida 1 49.953 115.919 -5341.9
ajuste_step
##
## Call:
## lm(formula = log_preco_venda ~ area_construida + ano_construcao +
## vagas_garagem, data = base)
##
## Coefficients:
## (Intercept) area_construida ano_construcao vagas_garagem
## 3.496157 0.193783 0.004217 0.145227
## Outra forma de selecionar o melhor modelo ----
#install.packages("MuMIn")
#library(MuMIn)
#options(na.action = "na.fail") # Requisito do dredge
#todos_modelos <- dredge(ajuste)
#summary(todos_modelos)
#modelo_melhor <- get.models(todos_modelos, 1)[[1]]
#summary(modelo_melhor)
#min(todos_modelos$AICc)
#AIC(ajuste)
#AIC(ajuste_step)
## Analisando os Coeficientes ----
nc = 4
### β₀ ----
b0 <- ajuste_step$coefficients[[1]]
ic_inf <- confint(ajuste_step)[1,1]
ic_sup <- confint(ajuste_step)[1,2]
beta0 <- data.frame(
Estimativa = round(b0, nc),
`LI` = round(ic_inf, nc),
`LS` = round(ic_sup, nc)
)
### β₁ ----
b1 <- ajuste_step$coefficients[[2]]
ic_inf1 <- confint(ajuste_step)[2,1]
ic_sup1 <- confint(ajuste_step)[2,2]
beta1 <- data.frame(
Estimativa = round(b1, nc),
`LI` = round(ic_inf1, nc),
`LS` = round(ic_sup1, nc)
)
### β₂ ----
b2 <- ajuste_step$coefficients[[3]]
ic_inf2 <- confint(ajuste_step)[3,1]
ic_sup2 <- confint(ajuste_step)[3,2]
beta2 <- data.frame(
Estimativa = round(b2, nc),
`LI` = round(ic_inf2, nc),
`LS` = round(ic_sup2, nc)
)
### β₃ ----
b3 <- ajuste_step$coefficients[[4]]
ic_inf3 <- confint(ajuste_step)[4,1]
ic_sup3 <- confint(ajuste_step)[4,2]
beta3 <- data.frame(
Estimativa = round(b3,nc),
`LI` = round(ic_inf3, nc),
`LS` = round(ic_sup3, nc)
)
rbind(beta0, beta1, beta2, beta3)
## Estimativa LI LS
## 1 3.4962 2.7119 4.2804
## 2 0.1938 0.1838 0.2038
## 3 0.0042 0.0038 0.0046
## 4 0.1452 0.1290 0.1615
## Verificando as Suposições ----
### Normalidade dos Resíduos ----
par(mfrow=c(1,2))
qqnorm(ajuste_step$residuals, bty="n", xlab="Quantis teóricos", ylab="Quantis amostrais", cex.lab=1.4, cex.axis=1.4)
qqline(ajuste_step$residuals, col = "blue", lwd=2)
hist(ajuste_step$residuals, freq=F,
main="Histograma", ylab="Densidade",
xlab="Resíduos", cex.lab=1.4, cex.axis=1.4)
curve(dnorm(x, 0, sqrt(summary(ajuste_step)$sigma^2)), add=T, col="red", lwd=2)

# Hipótese nula (H₀): os resíduos têm distribuição normal.
# Hipótese alternativa (H₁): os resíduos não têm distribuição normal.
shapiro.test(ajuste_step$residuals)
##
## Shapiro-Wilk normality test
##
## data: ajuste_step$residuals
## W = 0.92805, p-value < 2.2e-16
ks.test(ajuste_step$residuals, "pnorm", mean(ajuste_step$residuals), sd(ajuste_step$residuals))
## Warning in ks.test.default(ajuste_step$residuals, "pnorm",
## mean(ajuste_step$residuals), : ties should not be present for the
## Kolmogorov-Smirnov test
##
## Asymptotic one-sample Kolmogorov-Smirnov test
##
## data: ajuste_step$residuals
## D = 0.061718, p-value = 9.659e-07
## alternative hypothesis: two-sided
jarque.bera.test(ajuste_step$residuals)
##
## Jarque Bera Test
##
## data: ajuste_step$residuals
## X-squared = 7864, df = 2, p-value < 2.2e-16
### Homocedasticididade ----
plot(ajuste_step$fitted.values, ajuste_step$residuals,
xlab = "Valores Ajustados", ylab = "Residuos", main = "", bty="n", cex.lab=1.4, cex.axis=1.4)
abline(h = 0, col = "red", lwd=2)
#Hipótese nula (H₀): os resíduos têm variância constante (ou seja, homocedasticidade).
#Hipótese alternativa (H₁): a variância dos resíduos não é constante (isto é, há heterocedasticidade)
bptest(ajuste_step)
##
## studentized Breusch-Pagan test
##
## data: ajuste_step
## BP = 181.69, df = 3, p-value < 2.2e-16
### Independência dos resíduos ----
dwtest(ajuste_step)
##
## Durbin-Watson test
##
## data: ajuste_step
## DW = 1.5709, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(ajuste_step, order = 1)
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: ajuste_step
## LM test = 90.571, df = 1, p-value < 2.2e-16
bgtest(ajuste_step, order = 2)
##
## Breusch-Godfrey test for serial correlation of order up to 2
##
## data: ajuste_step
## LM test = 120.99, df = 2, p-value < 2.2e-16
bgtest(ajuste_step, order = 3)
##
## Breusch-Godfrey test for serial correlation of order up to 3
##
## data: ajuste_step
## LM test = 131.34, df = 3, p-value < 2.2e-16
### Multicolinearidade ----
vif(ajuste_step)
## area_construida ano_construcao vagas_garagem
## 1.466810 1.533160 1.956098
## Tabela de ANOVA ----
anova(ajuste)
## Analysis of Variance Table
##
## Response: log_preco_venda
## Df Sum Sq Mean Sq F value Pr(>F)
## area_construida 1 164.375 164.375 4746.89 < 2.2e-16 ***
## ano_construcao 1 39.567 39.567 1142.63 < 2.2e-16 ***
## vagas_garagem 1 10.647 10.647 307.47 < 2.2e-16 ***
## Residuals 1905 65.966 0.035
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(ajuste_step)
## Analysis of Variance Table
##
## Response: log_preco_venda
## Df Sum Sq Mean Sq F value Pr(>F)
## area_construida 1 164.375 164.375 4746.89 < 2.2e-16 ***
## ano_construcao 1 39.567 39.567 1142.63 < 2.2e-16 ***
## vagas_garagem 1 10.647 10.647 307.47 < 2.2e-16 ***
## Residuals 1905 65.966 0.035
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Fazendo previsões ----
previsoes = predict(ajuste_step, newdata = base_previsao, interval = "prediction", level = 0.95)
# Convertendo para tibble e adicionando às observações reais
previsao_completa = base_previsao %>%
mutate(
log_preco_previsto = previsoes[, "fit"],
log_preco_lower = previsoes[, "lwr"],
log_preco_upper = previsoes[, "upr"]
)
# Comparando previsto vs observado (gráfico)
ggplot(previsao_completa, aes(x = log_preco_venda, y = log_preco_previsto)) +
geom_point(color = "blue") +
geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed", size = 1.2) +
labs(x = "Log(Preço) Observado", y = "Log(Preço) Previsto", title = "Comparação entre Valores Observados e Previstos") +
theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Calculando métricas de erro
erro = previsao_completa$log_preco_venda - previsao_completa$log_preco_previsto
# Erro Quadrático Médio (RMSE)
rmse = sqrt(mean(erro^2))
# Erro Médio Absoluto (MAE)
mae = mean(abs(erro))
# Exibindo as métricas
cat("RMSE:", round(rmse, 4), "\n")
## RMSE: 0.2058
cat("MAE:", round(mae, 4), "\n")
## MAE: 0.1553
# Visualizando previsões com intervalos
head(previsao_completa)
## # A tibble: 6 × 8
## preco_venda area_construida ano_construcao vagas_garagem log_preco_venda
## <int> <dbl> <int> <dbl> <dbl>
## 1 205000 -0.00451 2001 3 12.2
## 2 132500 -1.11 1976 2 11.8
## 3 157500 -0.293 1977 1 12.0
## 4 174000 -0.493 1978 3 12.1
## 5 128500 -1.08 1969 1 11.8
## 6 128500 -1.28 1972 1 11.8
## # ℹ 3 more variables: log_preco_previsto <dbl>, log_preco_lower <dbl>,
## # log_preco_upper <dbl>
# Pacotes necessários
library(ggplot2)
# Adicionar índice das unidades amostrais na base de previsão
previsao_completa = previsao_completa %>%
mutate(indice = inds_previsao)
# Gráfico
png("Fig_Previsao_AMES.png", width = 10, height = 8.3, units = "in", res = 300)
ggplot(previsao_completa, aes(x = indice)) +
# Faixa do intervalo de previsão (área sombreada)
geom_ribbon(aes(ymin = log_preco_lower, ymax = log_preco_upper), fill = "lightblue", alpha = 0.5) +
# Estimativa pontual (linha ou pontos)
geom_line(aes(y = log_preco_previsto), color = "blue", size = 1) +
geom_point(aes(y = log_preco_previsto), color = "blue", size = 2) +
# Valores observados (x vermelhos)
#geom_point(aes(y = preco_venda), shape = 4, color = "red", size = 3, stroke = 1.5) + #caso a resposta seja o preco de venda
geom_point(aes(y = log_preco_venda), shape = 4, color = "red", size = 3, stroke = 1.5) + #caso a resposta seja o log do preco de venda
labs(x = "Índice da Unidade Amostral", y = "Log(Preço de Venda)",
title = "Previsões com Intervalos vs Valores Observados") +
theme_minimal()
dev.off()
## png
## 2
