# Pacotes
#install.packages("modeldata")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(modeldata)
library(dplyr)
library(ggplot2)
library(lmtest)
## Carregando pacotes exigidos: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(olsrr)
## 
## Attaching package: 'olsrr'
## 
## The following object is masked from 'package:datasets':
## 
##     rivers
library(MASS)        
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:olsrr':
## 
##     cement
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(car)
## Carregando pacotes exigidos: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(tseries)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
# Carregando o conjunto de dados Ames Housing
data("ames")

# Vendo a estrutura dos dados
glimpse(ames)
## Rows: 2,930
## Columns: 74
## $ MS_SubClass        <fct> One_Story_1946_and_Newer_All_Styles, One_Story_1946…
## $ MS_Zoning          <fct> Residential_Low_Density, Residential_High_Density, …
## $ Lot_Frontage       <dbl> 141, 80, 81, 93, 74, 78, 41, 43, 39, 60, 75, 0, 63,…
## $ Lot_Area           <int> 31770, 11622, 14267, 11160, 13830, 9978, 4920, 5005…
## $ Street             <fct> Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pave, Pav…
## $ Alley              <fct> No_Alley_Access, No_Alley_Access, No_Alley_Access, …
## $ Lot_Shape          <fct> Slightly_Irregular, Regular, Slightly_Irregular, Re…
## $ Land_Contour       <fct> Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, Lvl, HLS, Lvl, Lvl, L…
## $ Utilities          <fct> AllPub, AllPub, AllPub, AllPub, AllPub, AllPub, All…
## $ Lot_Config         <fct> Corner, Inside, Corner, Corner, Inside, Inside, Ins…
## $ Land_Slope         <fct> Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, Gtl, G…
## $ Neighborhood       <fct> North_Ames, North_Ames, North_Ames, North_Ames, Gil…
## $ Condition_1        <fct> Norm, Feedr, Norm, Norm, Norm, Norm, Norm, Norm, No…
## $ Condition_2        <fct> Norm, Norm, Norm, Norm, Norm, Norm, Norm, Norm, Nor…
## $ Bldg_Type          <fct> OneFam, OneFam, OneFam, OneFam, OneFam, OneFam, Twn…
## $ House_Style        <fct> One_Story, One_Story, One_Story, One_Story, Two_Sto…
## $ Overall_Cond       <fct> Average, Above_Average, Above_Average, Average, Ave…
## $ Year_Built         <int> 1960, 1961, 1958, 1968, 1997, 1998, 2001, 1992, 199…
## $ Year_Remod_Add     <int> 1960, 1961, 1958, 1968, 1998, 1998, 2001, 1992, 199…
## $ Roof_Style         <fct> Hip, Gable, Hip, Hip, Gable, Gable, Gable, Gable, G…
## $ Roof_Matl          <fct> CompShg, CompShg, CompShg, CompShg, CompShg, CompSh…
## $ Exterior_1st       <fct> BrkFace, VinylSd, Wd Sdng, BrkFace, VinylSd, VinylS…
## $ Exterior_2nd       <fct> Plywood, VinylSd, Wd Sdng, BrkFace, VinylSd, VinylS…
## $ Mas_Vnr_Type       <fct> Stone, None, BrkFace, None, None, BrkFace, None, No…
## $ Mas_Vnr_Area       <dbl> 112, 0, 108, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6…
## $ Exter_Cond         <fct> Typical, Typical, Typical, Typical, Typical, Typica…
## $ Foundation         <fct> CBlock, CBlock, CBlock, CBlock, PConc, PConc, PConc…
## $ Bsmt_Cond          <fct> Good, Typical, Typical, Typical, Typical, Typical, …
## $ Bsmt_Exposure      <fct> Gd, No, No, No, No, No, Mn, No, No, No, No, No, No,…
## $ BsmtFin_Type_1     <fct> BLQ, Rec, ALQ, ALQ, GLQ, GLQ, GLQ, ALQ, GLQ, Unf, U…
## $ BsmtFin_SF_1       <dbl> 2, 6, 1, 1, 3, 3, 3, 1, 3, 7, 7, 1, 7, 3, 3, 1, 3, …
## $ BsmtFin_Type_2     <fct> Unf, LwQ, Unf, Unf, Unf, Unf, Unf, Unf, Unf, Unf, U…
## $ BsmtFin_SF_2       <dbl> 0, 144, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1120, 0…
## $ Bsmt_Unf_SF        <dbl> 441, 270, 406, 1045, 137, 324, 722, 1017, 415, 994,…
## $ Total_Bsmt_SF      <dbl> 1080, 882, 1329, 2110, 928, 926, 1338, 1280, 1595, …
## $ Heating            <fct> GasA, GasA, GasA, GasA, GasA, GasA, GasA, GasA, Gas…
## $ Heating_QC         <fct> Fair, Typical, Typical, Excellent, Good, Excellent,…
## $ Central_Air        <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, Y, …
## $ Electrical         <fct> SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SBrkr, SB…
## $ First_Flr_SF       <int> 1656, 896, 1329, 2110, 928, 926, 1338, 1280, 1616, …
## $ Second_Flr_SF      <int> 0, 0, 0, 0, 701, 678, 0, 0, 0, 776, 892, 0, 676, 0,…
## $ Gr_Liv_Area        <int> 1656, 896, 1329, 2110, 1629, 1604, 1338, 1280, 1616…
## $ Bsmt_Full_Bath     <dbl> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, …
## $ Bsmt_Half_Bath     <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Full_Bath          <int> 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 3, 2, …
## $ Half_Bath          <int> 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, …
## $ Bedroom_AbvGr      <int> 3, 2, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 2, 1, 4, 4, …
## $ Kitchen_AbvGr      <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ TotRms_AbvGrd      <int> 7, 5, 6, 8, 6, 7, 6, 5, 5, 7, 7, 6, 7, 5, 4, 12, 8,…
## $ Functional         <fct> Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, Typ, T…
## $ Fireplaces         <int> 2, 0, 0, 2, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, …
## $ Garage_Type        <fct> Attchd, Attchd, Attchd, Attchd, Attchd, Attchd, Att…
## $ Garage_Finish      <fct> Fin, Unf, Unf, Fin, Fin, Fin, Fin, RFn, RFn, Fin, F…
## $ Garage_Cars        <dbl> 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, …
## $ Garage_Area        <dbl> 528, 730, 312, 522, 482, 470, 582, 506, 608, 442, 4…
## $ Garage_Cond        <fct> Typical, Typical, Typical, Typical, Typical, Typica…
## $ Paved_Drive        <fct> Partial_Pavement, Paved, Paved, Paved, Paved, Paved…
## $ Wood_Deck_SF       <int> 210, 140, 393, 0, 212, 360, 0, 0, 237, 140, 157, 48…
## $ Open_Porch_SF      <int> 62, 0, 36, 0, 34, 36, 0, 82, 152, 60, 84, 21, 75, 0…
## $ Enclosed_Porch     <int> 0, 0, 0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Three_season_porch <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Screen_Porch       <int> 0, 120, 0, 0, 0, 0, 0, 144, 0, 0, 0, 0, 0, 0, 140, …
## $ Pool_Area          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
## $ Pool_QC            <fct> No_Pool, No_Pool, No_Pool, No_Pool, No_Pool, No_Poo…
## $ Fence              <fct> No_Fence, Minimum_Privacy, No_Fence, No_Fence, Mini…
## $ Misc_Feature       <fct> None, None, Gar2, None, None, None, None, None, Non…
## $ Misc_Val           <int> 0, 0, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 500, 0, 0, 0, …
## $ Mo_Sold            <int> 5, 6, 6, 4, 3, 6, 4, 1, 3, 6, 4, 3, 5, 2, 6, 6, 6, …
## $ Year_Sold          <int> 2010, 2010, 2010, 2010, 2010, 2010, 2010, 2010, 201…
## $ Sale_Type          <fct> WD , WD , WD , WD , WD , WD , WD , WD , WD , WD , W…
## $ Sale_Condition     <fct> Normal, Normal, Normal, Normal, Normal, Normal, Nor…
## $ Sale_Price         <int> 215000, 105000, 172000, 244000, 189900, 195500, 213…
## $ Longitude          <dbl> -93.61975, -93.61976, -93.61939, -93.61732, -93.638…
## $ Latitude           <dbl> 42.05403, 42.05301, 42.05266, 42.05125, 42.06090, 4…
# Tentando entender a base de dados
table(ames$Neighborhood)
## 
##                              North_Ames                           College_Creek 
##                                     443                                     267 
##                                Old_Town                                 Edwards 
##                                     239                                     194 
##                                Somerset                      Northridge_Heights 
##                                     182                                     166 
##                                 Gilbert                                  Sawyer 
##                                     165                                     151 
##                          Northwest_Ames                             Sawyer_West 
##                                     131                                     125 
##                                Mitchell                               Brookside 
##                                     114                                     108 
##                                Crawford                  Iowa_DOT_and_Rail_Road 
##                                     103                                      93 
##                              Timberland                              Northridge 
##                                      72                                      71 
##                             Stone_Brook South_and_West_of_Iowa_State_University 
##                                      51                                      48 
##                             Clear_Creek                          Meadow_Village 
##                                      44                                      37 
##                               Briardale                     Bloomington_Heights 
##                                      30                                      28 
##                                 Veenker                         Northpark_Villa 
##                                      24                                      23 
##                                 Blueste                                  Greens 
##                                      10                                       8 
##                             Green_Hills                                Landmark 
##                                       2                                       1 
##                             Hayden_Lake 
##                                       0
table(ames$Foundation)
## 
## BrkTil CBlock  PConc   Slab  Stone   Wood 
##    311   1244   1310     49     11      5
table(ames$House_Style)
## 
## One_and_Half_Fin One_and_Half_Unf        One_Story           SFoyer 
##              314               19             1481               83 
##             SLvl Two_and_Half_Fin Two_and_Half_Unf        Two_Story 
##              128                8               24              873
table(ames$Bldg_Type)
## 
##   OneFam TwoFmCon   Duplex    Twnhs   TwnhsE 
##     2425       62      109      101      233
# Criando a base de dados a ser usada na modelagem
dados = ames %>%
  filter(Bldg_Type == "OneFam" & MS_Zoning == "Residential_Low_Density") %>%
  #filter(Neighborhood == "North_Ames") %>%
  #dplyr::select(Sale_Price, Gr_Liv_Area, Year_Built, Full_Bath, Bedroom_AbvGr, Garage_Cars)%>%
  dplyr::select(Sale_Price, Gr_Liv_Area, Year_Built, Garage_Cars)%>%
  rename(
    preco_venda = Sale_Price,
    area_construida = Gr_Liv_Area,
    ano_construcao = Year_Built,
    #banheiros_completos = Full_Bath,
    #quartos_acima_solo = Bedroom_AbvGr,
    vagas_garagem = Garage_Cars
  ) %>% 
  mutate(
    log_preco_venda = log(preco_venda),
    #preco_venda = (preco_venda-mean(preco_venda))/sd(preco_venda),
    area_construida = (area_construida-mean(area_construida))/sd(area_construida)#,
    #ano_construcao = (ano_construcao-mean(ano_construcao))/sd(ano_construcao)#,
    #ano_construcao = ano_construcao-min(ano_construcao)#,
    #banheiros_completos = (banheiros_completos-mean(banheiros_completos))/sd(banheiros_completos),
    #quartos_acima_solo = (quartos_acima_solo-mean(quartos_acima_solo))/sd(quartos_acima_solo),
    #vagas_garagem = (vagas_garagem-mean(vagas_garagem))/sd(vagas_garagem)
  )


#Treino (base de dados usada no ajuste)
inds_treino = 1:(nrow(dados)-100)#sort(sample(1:nrow(dados), nrow(dados)-100, replace=F))
inds_previsao = setdiff(1:nrow(dados), inds_treino)

base = dados[inds_treino,]
#Teste (base de dados usada na previsao)
base_previsao = dados[inds_previsao,]

#Analisando as variaveis selecionadas
correlacao <- round(cor(base %>% dplyr::select(-preco_venda)), 3)
#correlacao <- round(cor(base %>% dplyr::select(-log_preco_venda)), 3)
as.data.frame(correlacao)
##                 area_construida ano_construcao vagas_garagem log_preco_venda
## area_construida           1.000          0.359         0.563           0.765
## ano_construcao            0.359          1.000         0.589           0.625
## vagas_garagem             0.563          0.589         1.000           0.726
## log_preco_venda           0.765          0.625         0.726           1.000
#representando graficamente
#install.packages("corrplot")
library(corrplot)
## corrplot 0.92 loaded
par(mfrow=c(1,1))
png("Fig_Correlacao_AMES.png")
#cores_claras <- colorRampPalette(c("#D6EAF8", "#FFFFFF", "#F5B7B1"))(200)
corrplot(correlacao, method = "color", 
         type = "upper", 
#         col = cores_claras,   # <<<<<< AQUI
         tl.col = "black", 
         tl.srt = 45, 
         addCoef.col = "black") # Adiciona os valores
dev.off()
## png 
##   2
par(mfrow=c(1,2))
hist(base$preco_venda, freq=F, col="grey", border="white", 
     main="Histograma", ylab="Densidade", 
     xlab="Preço do imóvel", cex.lab=1.4, cex.axis=1.4)

png("Fig_Histograma_AMES.png")
hist(base$log_preco_venda, freq=F, col="grey", border="white", 
     main="Histograma", ylab="Densidade", 
     xlab="Log do preço do imóvel", cex.lab=1.4, cex.axis=1.4)
dev.off()
## png 
##   2
par(mfrow=c(1,3))

plot(base$area_construida, base$preco_venda, bty="n", xlab="área construída", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)
plot(base$ano_construcao, base$preco_venda, bty="n", xlab="ano de construção", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)
#plot(base$banheiros_completos, base$preco_venda, bty="n", xlab="total de banheiros", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)
#plot(base$quartos_acima_solo, base$preco_venda, bty="n", xlab="total de quartos acima do solo", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)
plot(base$vagas_garagem, base$preco_venda, bty="n", xlab="total de vagas na garagem", ylab="preço de venda", cex.lab=1.4, cex.axis=1.4)

plot(base$area_construida, base$log_preco_venda, bty="n", xlab="área construída", ylab="log do preço de venda", cex.lab=1.4, cex.axis=1.4)
plot(base$ano_construcao, base$log_preco_venda, bty="n", xlab="ano de construção", ylab="log do preço de venda", cex.lab=1.4, cex.axis=1.4)
plot(base$vagas_garagem, base$log_preco_venda, bty="n", xlab="total de vagas na garagem", ylab="log do preço de venda", cex.lab=1.4, cex.axis=1.4)

# Ajustando o modelo de regressão linear múltipla
ajuste <- lm(log_preco_venda ~ area_construida+ano_construcao+vagas_garagem, data = base) #usando o log do preço de venda como resposta
summary(ajuste)
## 
## Call:
## lm(formula = log_preco_venda ~ area_construida + ano_construcao + 
##     vagas_garagem, data = base)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.76980 -0.10179 -0.00418  0.09476  0.79973 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     3.4961572  0.3999024   8.743   <2e-16 ***
## area_construida 0.1937830  0.0051021  37.981   <2e-16 ***
## ano_construcao  0.0042174  0.0002062  20.455   <2e-16 ***
## vagas_garagem   0.1452266  0.0082821  17.535   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1861 on 1905 degrees of freedom
## Multiple R-squared:  0.7649, Adjusted R-squared:  0.7645 
## F-statistic:  2066 on 3 and 1905 DF,  p-value: < 2.2e-16
AIC(ajuste)
## [1] -996.6417
## Escolha das variáveis para o modelo ----
ajuste_step = stepAIC(ajuste, direction = "backward")
## Start:  AIC=-6416.15
## log_preco_venda ~ area_construida + ano_construcao + vagas_garagem
## 
##                   Df Sum of Sq     RSS     AIC
## <none>                          65.966 -6416.1
## - vagas_garagem    1    10.647  76.613 -6132.5
## - ano_construcao   1    14.489  80.456 -6039.1
## - area_construida  1    49.953 115.919 -5341.9
ajuste_step
## 
## Call:
## lm(formula = log_preco_venda ~ area_construida + ano_construcao + 
##     vagas_garagem, data = base)
## 
## Coefficients:
##     (Intercept)  area_construida   ano_construcao    vagas_garagem  
##        3.496157         0.193783         0.004217         0.145227
## Outra forma de selecionar o melhor modelo ----
#install.packages("MuMIn")
#library(MuMIn)
#options(na.action = "na.fail")  # Requisito do dredge
#todos_modelos <- dredge(ajuste)
#summary(todos_modelos)
#modelo_melhor <- get.models(todos_modelos, 1)[[1]]
#summary(modelo_melhor)
#min(todos_modelos$AICc)
#AIC(ajuste)
#AIC(ajuste_step)

## Analisando os Coeficientes ----

nc = 4

### β₀ ----
b0 <- ajuste_step$coefficients[[1]]         
ic_inf <- confint(ajuste_step)[1,1]         
ic_sup <- confint(ajuste_step)[1,2]         
beta0 <- data.frame(
  Estimativa = round(b0, nc),
  `LI` = round(ic_inf, nc),
  `LS` = round(ic_sup, nc)
)


### β₁ ----  
b1 <- ajuste_step$coefficients[[2]]
ic_inf1 <- confint(ajuste_step)[2,1]
ic_sup1 <- confint(ajuste_step)[2,2]
beta1 <- data.frame(
  Estimativa = round(b1, nc),
  `LI` = round(ic_inf1, nc),
  `LS` = round(ic_sup1, nc)
)


### β₂ ----
b2 <- ajuste_step$coefficients[[3]]
ic_inf2 <- confint(ajuste_step)[3,1]
ic_sup2 <- confint(ajuste_step)[3,2]
beta2 <- data.frame(
  Estimativa = round(b2, nc),
  `LI` = round(ic_inf2, nc),
  `LS` = round(ic_sup2, nc)
)



### β₃ ----
b3 <- ajuste_step$coefficients[[4]]
ic_inf3 <- confint(ajuste_step)[4,1]
ic_sup3 <- confint(ajuste_step)[4,2]
beta3 <- data.frame(
  Estimativa = round(b3,nc),
  `LI` = round(ic_inf3, nc),
  `LS` = round(ic_sup3, nc)
)



rbind(beta0, beta1, beta2, beta3)
##   Estimativa     LI     LS
## 1     3.4962 2.7119 4.2804
## 2     0.1938 0.1838 0.2038
## 3     0.0042 0.0038 0.0046
## 4     0.1452 0.1290 0.1615
## Verificando as Suposições ----
### Normalidade dos Resíduos ----
par(mfrow=c(1,2))
qqnorm(ajuste_step$residuals, bty="n", xlab="Quantis teóricos", ylab="Quantis amostrais", cex.lab=1.4, cex.axis=1.4)
qqline(ajuste_step$residuals, col = "blue", lwd=2)

hist(ajuste_step$residuals, freq=F, 
     main="Histograma", ylab="Densidade", 
     xlab="Resíduos", cex.lab=1.4, cex.axis=1.4)
curve(dnorm(x, 0, sqrt(summary(ajuste_step)$sigma^2)), add=T, col="red", lwd=2)

# Hipótese nula (H₀): os resíduos têm distribuição normal.
# Hipótese alternativa (H₁): os resíduos não têm distribuição normal.
shapiro.test(ajuste_step$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  ajuste_step$residuals
## W = 0.92805, p-value < 2.2e-16
ks.test(ajuste_step$residuals, "pnorm", mean(ajuste_step$residuals), sd(ajuste_step$residuals))
## Warning in ks.test.default(ajuste_step$residuals, "pnorm",
## mean(ajuste_step$residuals), : ties should not be present for the
## Kolmogorov-Smirnov test
## 
##  Asymptotic one-sample Kolmogorov-Smirnov test
## 
## data:  ajuste_step$residuals
## D = 0.061718, p-value = 9.659e-07
## alternative hypothesis: two-sided
jarque.bera.test(ajuste_step$residuals)
## 
##  Jarque Bera Test
## 
## data:  ajuste_step$residuals
## X-squared = 7864, df = 2, p-value < 2.2e-16
### Homocedasticididade ----
plot(ajuste_step$fitted.values, ajuste_step$residuals, 
     xlab = "Valores Ajustados", ylab = "Residuos", main = "", bty="n", cex.lab=1.4, cex.axis=1.4)
abline(h = 0, col = "red", lwd=2)  

#Hipótese nula (H₀): os resíduos têm variância constante (ou seja, homocedasticidade).
#Hipótese alternativa (H₁): a variância dos resíduos não é constante (isto é, há heterocedasticidade)
bptest(ajuste_step)
## 
##  studentized Breusch-Pagan test
## 
## data:  ajuste_step
## BP = 181.69, df = 3, p-value < 2.2e-16
### Independência dos resíduos ----
dwtest(ajuste_step)
## 
##  Durbin-Watson test
## 
## data:  ajuste_step
## DW = 1.5709, p-value < 2.2e-16
## alternative hypothesis: true autocorrelation is greater than 0
bgtest(ajuste_step, order = 1) 
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  ajuste_step
## LM test = 90.571, df = 1, p-value < 2.2e-16
bgtest(ajuste_step, order = 2) 
## 
##  Breusch-Godfrey test for serial correlation of order up to 2
## 
## data:  ajuste_step
## LM test = 120.99, df = 2, p-value < 2.2e-16
bgtest(ajuste_step, order = 3) 
## 
##  Breusch-Godfrey test for serial correlation of order up to 3
## 
## data:  ajuste_step
## LM test = 131.34, df = 3, p-value < 2.2e-16
### Multicolinearidade ----
vif(ajuste_step)
## area_construida  ano_construcao   vagas_garagem 
##        1.466810        1.533160        1.956098
## Tabela de ANOVA ----
anova(ajuste)
## Analysis of Variance Table
## 
## Response: log_preco_venda
##                   Df  Sum Sq Mean Sq F value    Pr(>F)    
## area_construida    1 164.375 164.375 4746.89 < 2.2e-16 ***
## ano_construcao     1  39.567  39.567 1142.63 < 2.2e-16 ***
## vagas_garagem      1  10.647  10.647  307.47 < 2.2e-16 ***
## Residuals       1905  65.966   0.035                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(ajuste_step)
## Analysis of Variance Table
## 
## Response: log_preco_venda
##                   Df  Sum Sq Mean Sq F value    Pr(>F)    
## area_construida    1 164.375 164.375 4746.89 < 2.2e-16 ***
## ano_construcao     1  39.567  39.567 1142.63 < 2.2e-16 ***
## vagas_garagem      1  10.647  10.647  307.47 < 2.2e-16 ***
## Residuals       1905  65.966   0.035                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Fazendo previsões ----
previsoes = predict(ajuste_step, newdata = base_previsao, interval = "prediction", level = 0.95)

# Convertendo para tibble e adicionando às observações reais
previsao_completa = base_previsao %>%
  mutate(
    log_preco_previsto = previsoes[, "fit"],
    log_preco_lower = previsoes[, "lwr"],
    log_preco_upper = previsoes[, "upr"]
  )

# Comparando previsto vs observado (gráfico)
ggplot(previsao_completa, aes(x = log_preco_venda, y = log_preco_previsto)) +
  geom_point(color = "blue") +
  geom_abline(slope = 1, intercept = 0, color = "red", linetype = "dashed", size = 1.2) +
  labs(x = "Log(Preço) Observado", y = "Log(Preço) Previsto", title = "Comparação entre Valores Observados e Previstos") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Calculando métricas de erro
erro = previsao_completa$log_preco_venda - previsao_completa$log_preco_previsto

# Erro Quadrático Médio (RMSE)
rmse = sqrt(mean(erro^2))

# Erro Médio Absoluto (MAE)
mae = mean(abs(erro))

# Exibindo as métricas
cat("RMSE:", round(rmse, 4), "\n")
## RMSE: 0.2058
cat("MAE:", round(mae, 4), "\n")
## MAE: 0.1553
# Visualizando previsões com intervalos
head(previsao_completa)
## # A tibble: 6 × 8
##   preco_venda area_construida ano_construcao vagas_garagem log_preco_venda
##         <int>           <dbl>          <int>         <dbl>           <dbl>
## 1      205000        -0.00451           2001             3            12.2
## 2      132500        -1.11              1976             2            11.8
## 3      157500        -0.293             1977             1            12.0
## 4      174000        -0.493             1978             3            12.1
## 5      128500        -1.08              1969             1            11.8
## 6      128500        -1.28              1972             1            11.8
## # ℹ 3 more variables: log_preco_previsto <dbl>, log_preco_lower <dbl>,
## #   log_preco_upper <dbl>
# Pacotes necessários
library(ggplot2)

# Adicionar índice das unidades amostrais na base de previsão
previsao_completa = previsao_completa %>%
  mutate(indice = inds_previsao)

# Gráfico
png("Fig_Previsao_AMES.png", width = 10, height = 8.3, units = "in", res = 300)
ggplot(previsao_completa, aes(x = indice)) +
  # Faixa do intervalo de previsão (área sombreada)
  geom_ribbon(aes(ymin = log_preco_lower, ymax = log_preco_upper), fill = "lightblue", alpha = 0.5) +
  # Estimativa pontual (linha ou pontos)
  geom_line(aes(y = log_preco_previsto), color = "blue", size = 1) +
  geom_point(aes(y = log_preco_previsto), color = "blue", size = 2) +
  # Valores observados (x vermelhos)
  #geom_point(aes(y = preco_venda), shape = 4, color = "red", size = 3, stroke = 1.5) + #caso a resposta seja o preco de venda
  geom_point(aes(y = log_preco_venda), shape = 4, color = "red", size = 3, stroke = 1.5) + #caso a resposta seja o log do preco de venda
  labs(x = "Índice da Unidade Amostral", y = "Log(Preço de Venda)",
       title = "Previsões com Intervalos vs Valores Observados") +
  theme_minimal()
dev.off()
## png 
##   2