##
Libraries
#Installing libraries
library(readxl)
library(tidyverse)
library(ggplot2)
library(corrplot)
library(gmodels)
library(effects)
library(stargazer)
library(olsrr)
#library(kableExtra)
library(jtools)
library(fastmap)
library(Hmisc)
library(naniar)
library(glmnet)
library(caret)
library(car)
library(lmtest)
library(dplyr)
library(xts)
library(zoo)
library(tseries)
library(stats)
library(forecast)
library(astsa)
library(corrplot)
library(AER)
library(dynlm)
library(vars)
#library(mFilter)
library(TSstudio)
library(tidyverse)
library(sarima)
library(stargazer)
library(forecast)
coca <-read.csv("C:\\Users\\sebastian\\Downloads\\coc.csv")
coca
## sales_unitboxes consumer_sentiment CPI inflation_rate unemp_rate
## 1 5516689 38.063 87.110 -0.09 0.0523
## 2 5387496 37.491 87.275 0.19 0.0531
## 3 5886747 38.505 87.631 0.41 0.0461
## 4 6389182 37.843 87.404 -0.26 0.0510
## 5 6448275 38.032 86.967 -0.50 0.0552
## 6 6697947 39.112 87.113 0.17 0.0507
## 7 6420091 38.132 87.241 0.15 0.0542
## 8 6474440 37.384 87.425 0.21 0.0547
## 9 6340781 37.449 87.752 0.37 0.0538
## 10 6539561 37.813 88.204 0.51 0.0539
## 11 6025373 38.183 88.685 0.55 0.0438
## 12 6714438 38.369 89.047 0.41 0.0489
## 13 5477874 38.182 89.386 0.38 0.0479
## 14 5580397 36.732 89.778 0.44 0.0485
## 15 6399322 36.814 89.910 0.15 0.0433
## 16 6780480 36.714 89.625 -0.32 0.0452
## 17 7423475 37.485 89.226 -0.45 0.0511
## 18 7271309 38.349 89.324 0.11 0.0458
## 19 6872616 36.506 89.557 0.26 0.0459
## 20 6804384 35.655 89.809 0.28 0.0491
## 21 6779166 34.755 90.358 0.61 0.0537
## 22 6492389 35.032 90.906 0.61 0.0442
## 23 6105159 34.875 91.617 0.78 0.0436
## 24 6580560 35.478 92.039 0.46 0.0405
## 25 5757061 28.668 93.604 1.70 0.0401
## 26 5301755 31.516 94.145 0.58 0.0364
## 27 6272641 33.795 94.722 0.61 0.0368
## 28 6286247 34.935 94.839 0.12 0.0409
## 29 7345037 35.873 94.725 -0.12 0.0414
## 30 7211316 36.010 94.964 0.25 0.0378
## 31 6329457 36.489 95.323 0.38 0.0407
## 32 6865977 36.506 95.794 0.49 0.0445
## 33 6219637 36.788 96.094 0.31 0.0445
## 34 6182126 36.437 96.698 0.63 0.0414
## 35 6498477 36.717 97.695 1.03 0.0401
## 36 6590566 36.315 98.273 0.59 0.0347
## 37 5705102 34.802 98.795 0.53 0.0401
## 38 5568552 34.189 99.171 0.38 0.0393
## 39 6882616 34.337 99.492 0.32 0.0359
## 40 7121483 35.612 99.155 -0.34 0.0414
## 41 7963063 36.648 98.994 -0.16 0.0384
## 42 7330137 37.148 99.376 0.39 0.0407
## 43 7130397 43.341 99.909 0.54 0.0394
## 44 7457473 43.006 100.492 0.58 0.0454
## 45 6264685 42.133 100.917 0.42 0.0399
## 46 6347760 42.533 101.440 0.52 0.0366
## 47 6140687 41.675 102.303 0.85 0.0379
## 48 6556749 44.865 103.020 0.70 0.0414
## gdp_percapita itaee itaee_growth pop_density job_density pop_minwage
## 1 11659.56 103.7654 0.0497 98.5418 18.2605 9.6579
## 2 11659.55 103.7654 0.0497 98.5419 18.4633 9.6579
## 3 11659.55 103.7654 0.0497 98.5419 18.6416 9.6579
## 4 11625.75 107.7518 0.0318 98.8284 18.6788 9.5949
## 5 11625.74 107.7518 0.0318 98.8284 18.6754 9.5949
## 6 11625.74 107.7518 0.0318 98.8285 18.6467 9.5949
## 7 11591.89 110.5957 0.0565 99.1170 18.7028 9.3984
## 8 11591.89 110.5957 0.0565 99.1171 18.7835 9.3984
## 9 11591.89 110.5957 0.0565 99.1171 18.9389 9.3984
## 10 11558.59 111.7800 0.0056 99.4026 19.0979 10.6757
## 11 11558.59 111.7800 0.0056 99.4026 19.3272 10.6757
## 12 11558.59 111.7800 0.0056 99.4026 19.1579 10.6757
## 13 11987.32 108.7077 0.0476 99.6856 19.1579 11.3009
## 14 11987.32 108.7077 0.0476 99.6857 19.3712 11.3009
## 15 11987.32 108.7077 0.0476 99.6857 19.4560 11.3009
## 16 11953.72 111.7936 0.0375 99.9659 19.5872 10.8817
## 17 11953.71 111.7936 0.0375 99.9659 19.6069 10.8817
## 18 11953.71 111.7936 0.0375 99.9659 19.6768 10.8817
## 19 11919.98 113.7051 0.0281 100.2488 19.7453 10.8337
## 20 11919.98 113.7051 0.0281 100.2488 19.8668 10.8337
## 21 11919.97 113.7051 0.0281 100.2488 20.0734 10.8337
## 22 11886.91 117.0615 0.0472 100.5277 20.3076 10.9448
## 23 11886.91 117.0615 0.0472 100.5277 20.5067 10.9448
## 24 11886.91 117.0615 0.0472 100.5277 20.2683 10.9448
## 25 12137.86 113.2336 0.0416 100.8041 20.2683 11.3279
## 26 12137.86 113.2336 0.0416 100.8041 20.4683 11.3279
## 27 12137.86 113.2336 0.0416 100.8042 20.7349 11.3279
## 28 12105.16 112.6669 0.0078 101.0764 20.7453 11.2363
## 29 12105.16 112.6669 0.0078 101.0764 20.7721 11.2363
## 30 12105.16 112.6669 0.0078 101.0764 20.8715 11.2363
## 31 12072.12 116.3738 0.0235 101.3531 20.9045 11.0423
## 32 12072.12 116.3738 0.0235 101.3531 21.1465 11.0423
## 33 12072.12 116.3738 0.0235 101.3531 21.3258 11.0423
## 34 12039.80 119.7875 0.0233 101.6251 21.5634 11.2409
## 35 12039.80 119.7875 0.0233 101.6251 21.7130 11.2409
## 36 12039.80 119.7875 0.0233 101.6251 21.4366 11.2409
## 37 12329.05 115.6723 0.0215 101.8944 21.4366 12.7219
## 38 12329.05 115.6723 0.0215 101.8944 21.6969 12.7219
## 39 12329.04 115.6723 0.0215 101.8945 21.7603 12.7219
## 40 12296.98 117.3254 0.0413 102.1602 21.8253 13.0263
## 41 12296.98 117.3254 0.0413 102.1602 21.8741 13.0263
## 42 12296.97 117.3254 0.0413 102.1602 21.9094 13.0263
## 43 12264.69 118.9366 0.0220 102.4291 21.8432 12.2970
## 44 12264.69 118.9366 0.0220 102.4291 22.0394 12.2970
## 45 12264.69 118.9366 0.0220 102.4291 22.1380 12.2970
## 46 12233.00 122.4821 0.0225 102.6945 22.2484 11.6695
## 47 12233.00 122.4821 0.0225 102.6945 22.3622 11.6695
## 48 12232.99 122.4821 0.0225 102.6945 21.9749 11.6695
## exchange_rate max_temperature holiday_month
## 1 14.6926 28 0
## 2 14.9213 31 0
## 3 15.2283 29 0
## 4 15.2262 32 1
## 5 15.2645 34 0
## 6 15.4830 32 0
## 7 15.9396 29 0
## 8 16.5368 29 0
## 9 16.8578 29 1
## 10 16.5640 29 0
## 11 16.6357 29 0
## 12 17.0666 26 1
## 13 18.0728 28 0
## 14 18.4731 31 0
## 15 17.6490 32 1
## 16 17.4877 33 0
## 17 18.1542 35 0
## 18 18.6530 33 0
## 19 18.6014 31 0
## 20 18.4749 32 0
## 21 19.1924 33 1
## 22 18.8924 29 0
## 23 20.1185 29 0
## 24 20.5206 28 1
## 25 21.3853 29 0
## 26 20.2905 30 0
## 27 19.3010 31 0
## 28 18.7875 33 1
## 29 18.7557 36 0
## 30 18.1326 35 0
## 31 17.8283 29 0
## 32 17.8070 29 0
## 33 17.8357 30 1
## 34 18.8161 30 0
## 35 18.9158 30 0
## 36 19.1812 27 1
## 37 18.9074 27 0
## 38 18.6449 29 0
## 39 18.6308 33 1
## 40 18.3872 33 0
## 41 19.5910 37 0
## 42 20.3032 35 0
## 43 19.0095 31 0
## 44 18.8575 29 0
## 45 19.0154 28 1
## 46 19.1859 28 0
## 47 20.2612 28 0
## 48 20.1112 26 1
Briefly describe the dataset. For example, what is the structure of the dataset? Its a Coca Cola data set winch it have 15 variables, t period that is our date, sales_unitboxes wich is our dependent variable meaning sales of coca cola unit boxes, consumer_setinment it explains how consumers feel about the state of the economy, CPI is the consumers prices index 2018=100, inflation_rate is the change in the consumers price index, unemp_rate is percentage of the labor force that is unemployed, gdp_percapita is gross domestic population by population, itaee is the Indicator of the State Economic Activity - ITAEE, itaee_growth is the itaee’s growth rate, pop_density is the population per km2, job_density is the employed population per km2, pop_minwage refers to population per km2 earning 1-2 minimum wages, exchange_rate is the exchange rate U.S. - MXN, max_temperature is the average max temperature and holiday_month is in Boolean numbers mining 1 if month includes a holiday week including: public holiday, easter holiday, and Christmas; 0 otherwise.
#Identify missing values
missing_values = colSums(is.na(coca))
missing_values
## sales_unitboxes consumer_sentiment CPI inflation_rate
## 0 0 0 0
## unemp_rate gdp_percapita itaee itaee_growth
## 0 0 0 0
## pop_density job_density pop_minwage exchange_rate
## 0 0 0 0
## max_temperature holiday_month
## 0 0
#Display data set structure
str(coca)
## 'data.frame': 48 obs. of 14 variables:
## $ sales_unitboxes : num 5516689 5387496 5886747 6389182 6448275 ...
## $ consumer_sentiment: num 38.1 37.5 38.5 37.8 38 ...
## $ CPI : num 87.1 87.3 87.6 87.4 87 ...
## $ inflation_rate : num -0.09 0.19 0.41 -0.26 -0.5 0.17 0.15 0.21 0.37 0.51 ...
## $ unemp_rate : num 0.0523 0.0531 0.0461 0.051 0.0552 0.0507 0.0542 0.0547 0.0538 0.0539 ...
## $ gdp_percapita : num 11660 11660 11660 11626 11626 ...
## $ itaee : num 104 104 104 108 108 ...
## $ itaee_growth : num 0.0497 0.0497 0.0497 0.0318 0.0318 0.0318 0.0565 0.0565 0.0565 0.0056 ...
## $ pop_density : num 98.5 98.5 98.5 98.8 98.8 ...
## $ job_density : num 18.3 18.5 18.6 18.7 18.7 ...
## $ pop_minwage : num 9.66 9.66 9.66 9.59 9.59 ...
## $ exchange_rate : num 14.7 14.9 15.2 15.2 15.3 ...
## $ max_temperature : int 28 31 29 32 34 32 29 29 29 29 ...
## $ holiday_month : int 0 0 0 1 0 0 0 0 1 0 ...
#Include summary of descriptive statistics. What is the mean, min, and max values of the dependent variable?
summary(coca)
## sales_unitboxes consumer_sentiment CPI inflation_rate
## Min. :5301755 Min. :28.67 Min. : 86.97 Min. :-0.5000
## 1st Qu.:6171767 1st Qu.:35.64 1st Qu.: 89.18 1st Qu.: 0.1650
## Median :6461357 Median :36.76 Median : 92.82 Median : 0.3850
## Mean :6473691 Mean :37.15 Mean : 93.40 Mean : 0.3485
## 3rd Qu.:6819782 3rd Qu.:38.14 3rd Qu.: 98.40 3rd Qu.: 0.5575
## Max. :7963063 Max. :44.87 Max. :103.02 Max. : 1.7000
## unemp_rate gdp_percapita itaee itaee_growth
## Min. :0.03470 Min. :11559 Min. :103.8 Min. :0.00560
## 1st Qu.:0.04010 1st Qu.:11830 1st Qu.:111.5 1st Qu.:0.02237
## Median :0.04370 Median :12014 Median :113.5 Median :0.02995
## Mean :0.04442 Mean :11979 Mean :113.9 Mean :0.03172
## 3rd Qu.:0.04895 3rd Qu.:12162 3rd Qu.:117.1 3rd Qu.:0.04300
## Max. :0.05520 Max. :12329 Max. :122.5 Max. :0.05650
## pop_density job_density pop_minwage exchange_rate
## Min. : 98.54 Min. :18.26 Min. : 9.398 Min. :14.69
## 1st Qu.: 99.61 1st Qu.:19.28 1st Qu.:10.794 1st Qu.:17.38
## Median :100.67 Median :20.39 Median :11.139 Median :18.62
## Mean :100.65 Mean :20.38 Mean :11.116 Mean :18.18
## 3rd Qu.:101.69 3rd Qu.:21.60 3rd Qu.:11.413 3rd Qu.:19.06
## Max. :102.69 Max. :22.36 Max. :13.026 Max. :21.39
## max_temperature holiday_month
## Min. :26.00 Min. :0.00
## 1st Qu.:29.00 1st Qu.:0.00
## Median :30.00 Median :0.00
## Mean :30.50 Mean :0.25
## 3rd Qu.:32.25 3rd Qu.:0.25
## Max. :37.00 Max. :1.00
How many observations include the dataset? Is there any presence of missing values in the dataset? ### 48 observation of 15 variables, theres presence of NA in the tperiod so we eliminated this column by excel
sales_unitboxes
Min. :5301755
Median :6461357 Mean :6473691
Max. :7963063
# Histogram Inflation (graph 1)
hist1=ggplot(data = coca, aes(x = inflation_rate))+
geom_histogram(bins = 10, fill = "yellow", color = "black", boundary = 15) + labs(title = "Sales unit boxes vs Inflation", x="Inflation rate", y="Sales Unit Boxes")+ theme(plot.title = element_text(hjust = 0.5))
hist1
# Scatter plot GRAPH 2
ggplot(coca, aes(y=sales_unitboxes, x= unemp_rate)) +
geom_point(stat= "identity", fill="black", color="green", alpha=0.7) +
labs(title="Unemployment", y="Sales unit boxes") +
theme_minimal()
# Histogram exchange rate (graph3)
hist3=ggplot(data = coca, aes(x = exchange_rate))+
geom_histogram(bins = 10, fill = "red", color = "black", boundary = 15) + labs(title = "Sales Unit boxes vs exchange rate", x="Exchange Rate", y="Sales_unitboxes")+ theme(plot.title = element_text(hjust = 0.5))
hist3
#Scatter plot GRAPH 4
ggplot(data=coca, aes(x=consumer_sentiment, y=sales_unitboxes)) +
geom_point() +
labs(title="Scatter Consumer Sentiment vs Sales unit boxes ", x="Consumer Sentiment", y="Sales unit boxes") +
theme_minimal()
# Histogram Temperature (graph 5)
hist4=ggplot(data = coca, aes(x = max_temperature))+
geom_histogram(bins = 10, fill = "orange", color = "black", boundary = 15) + labs(title = "Sales Unit Boxes vs Max Temperature", x="Max Temperature", y="Sales Unit Boxes")+ theme(plot.title = element_text(hjust = 0.5))
hist4
-Describe the data patterns:
Inflation: When the inflation interval is between 0 to 1, a consumption of cash units is observed and approximately when inflation is at 0.5, the highest peak of cents of cash units is observed. The moment inflation passes this interval, it is observed that sales decrease since inflation is very low.
Exchange rate: At first glance, a certain increase in the exchange rate as the price per currency increased, when the dollar was at 16 and almost 22 pesos, shows that there was a variation in sales by units of cash, in the same way when the rate exchange rate was between 18 to 19 pesos, the highest sales were had.
res <- cor(coca)
round(res, 2)
## sales_unitboxes consumer_sentiment CPI inflation_rate
## sales_unitboxes 1.00 0.23 0.21 -0.34
## consumer_sentiment 0.23 1.00 0.22 -0.14
## CPI 0.21 0.22 1.00 0.33
## inflation_rate -0.34 -0.14 0.33 1.00
## unemp_rate -0.08 0.13 -0.80 -0.38
## gdp_percapita 0.21 -0.03 0.89 0.22
## itaee 0.32 0.21 0.85 0.42
## itaee_growth -0.24 -0.18 -0.40 -0.11
## pop_density 0.30 0.17 0.98 0.33
## job_density 0.29 0.14 0.98 0.34
## pop_minwage 0.28 -0.02 0.83 0.19
## exchange_rate 0.18 -0.21 0.67 0.54
## max_temperature 0.57 -0.23 -0.09 -0.56
## holiday_month 0.03 0.07 0.08 0.00
## unemp_rate gdp_percapita itaee itaee_growth pop_density
## sales_unitboxes -0.08 0.21 0.32 -0.24 0.30
## consumer_sentiment 0.13 -0.03 0.21 -0.18 0.17
## CPI -0.80 0.89 0.85 -0.40 0.98
## inflation_rate -0.38 0.22 0.42 -0.11 0.33
## unemp_rate 1.00 -0.80 -0.67 0.33 -0.79
## gdp_percapita -0.80 1.00 0.69 -0.24 0.91
## itaee -0.67 0.69 1.00 -0.38 0.91
## itaee_growth 0.33 -0.24 -0.38 1.00 -0.41
## pop_density -0.79 0.91 0.91 -0.41 1.00
## job_density -0.81 0.90 0.90 -0.41 0.99
## pop_minwage -0.74 0.91 0.67 -0.32 0.86
## exchange_rate -0.71 0.75 0.76 -0.14 0.76
## max_temperature 0.03 0.14 -0.20 0.00 -0.03
## holiday_month -0.04 -0.03 0.10 -0.14 0.04
## job_density pop_minwage exchange_rate max_temperature
## sales_unitboxes 0.29 0.28 0.18 0.57
## consumer_sentiment 0.14 -0.02 -0.21 -0.23
## CPI 0.98 0.83 0.67 -0.09
## inflation_rate 0.34 0.19 0.54 -0.56
## unemp_rate -0.81 -0.74 -0.71 0.03
## gdp_percapita 0.90 0.91 0.75 0.14
## itaee 0.90 0.67 0.76 -0.20
## itaee_growth -0.41 -0.32 -0.14 0.00
## pop_density 0.99 0.86 0.76 -0.03
## job_density 1.00 0.85 0.74 -0.02
## pop_minwage 0.85 1.00 0.71 0.14
## exchange_rate 0.74 0.71 1.00 -0.02
## max_temperature -0.02 0.14 -0.02 1.00
## holiday_month 0.06 -0.02 0.06 -0.17
## holiday_month
## sales_unitboxes 0.03
## consumer_sentiment 0.07
## CPI 0.08
## inflation_rate 0.00
## unemp_rate -0.04
## gdp_percapita -0.03
## itaee 0.10
## itaee_growth -0.14
## pop_density 0.04
## job_density 0.06
## pop_minwage -0.02
## exchange_rate 0.06
## max_temperature -0.17
## holiday_month 1.00
#correlation plot
co_matrix <- cor(coca, use = "complete.obs")
corrplot(co_matrix, method = "circle",type="upper")
corrplot(cor(coca), type = "upper", order = 'hclust',addCoef.col='purple')
H0:The max_temperature variable has no impact on the Sales Unit Boxes. H1:The max_temperature variable has a significant impact on the Sales Unit Boxes.
H0:Having a high percentage of itaee is not significant on the Sales Unit Boxes. H1:Having a high percentage of itaee has a significant impact on the Sales Unit Boxes.
H0:The inflation rate variable has no impact on the Sales Unit
Boxes.
H1:The inflation rate variable has a negative impact on the Sales Unit
Boxes.
## Model 1 Multiple linear regression
m1 <- lm(sales_unitboxes ~ itaee +inflation_rate + unemp_rate+ exchange_rate + pop_minwage, data = coca)
summary(m1)
##
## Call:
## lm(formula = sales_unitboxes ~ itaee + inflation_rate + unemp_rate +
## exchange_rate + pop_minwage, data = coca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -952695 -385964 24484 390060 907209
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4064887 2925906 -1.389 0.172071
## itaee 65336 24238 2.696 0.010062 *
## inflation_rate -940452 231988 -4.054 0.000213 ***
## unemp_rate 27808113 19831138 1.402 0.168190
## exchange_rate 99971 85209 1.173 0.247308
## pop_minwage 33731 124106 0.272 0.787115
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 484400 on 42 degrees of freedom
## Multiple R-squared: 0.415, Adjusted R-squared: 0.3454
## F-statistic: 5.96 on 5 and 42 DF, p-value: 0.0002986
## Model 2: Linear model logarithmic
m2 <- lm(sales_unitboxes ~ consumer_sentiment + CPI + itaee + inflation_rate + log(max_temperature), data = coca)
summary(m2)
##
## Call:
## lm(formula = sales_unitboxes ~ consumer_sentiment + CPI + itaee +
## inflation_rate + log(max_temperature), data = coca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -613809 -284901 32550 217158 917879
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19298716 3412215 -5.656 1.24e-06 ***
## consumer_sentiment 61739 22018 2.804 0.00761 **
## CPI -53846 20840 -2.584 0.01334 *
## itaee 105483 22897 4.607 3.77e-05 ***
## inflation_rate -181726 201696 -0.901 0.37273
## log(max_temperature) 4850924 854601 5.676 1.16e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 370000 on 42 degrees of freedom
## Multiple R-squared: 0.6587, Adjusted R-squared: 0.6181
## F-statistic: 16.21 on 5 and 42 DF, p-value: 6.865e-09
## Model 3: Linear model logarithmic
m3 <- lm(sales_unitboxes ~ CPI + itaee + pop_minwage + log(max_temperature) + exchange_rate, data = coca)
summary(m3)
##
## Call:
## lm(formula = sales_unitboxes ~ CPI + itaee + pop_minwage + log(max_temperature) +
## exchange_rate, data = coca)
##
## Residuals:
## Min 1Q Median 3Q Max
## -761986 -263909 24069 266108 925278
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -18634991 3446116 -5.408 2.81e-06 ***
## CPI -65166 31535 -2.066 0.0450 *
## itaee 140330 29102 4.822 1.89e-05 ***
## pop_minwage 120457 127895 0.942 0.3517
## log(max_temperature) 4897039 758854 6.453 8.85e-08 ***
## exchange_rate -156197 63737 -2.451 0.0185 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 398700 on 42 degrees of freedom
## Multiple R-squared: 0.6038, Adjusted R-squared: 0.5567
## F-statistic: 12.8 on 5 and 42 DF, p-value: 1.392e-07
# Model comparison
stargazer(m1,m2,m3,type="text",title="OLS Regression Results",single.row=TRUE,ci=FALSE,ci.level=0.9)
##
## OLS Regression Results
## ===================================================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------------------------------
## sales_unitboxes
## (1) (2) (3)
## -----------------------------------------------------------------------------------------------------------------------------------
## consumer_sentiment 61,739.160*** (22,018.360)
## CPI -53,845.550** (20,840.240) -65,166.280** (31,534.860)
## itaee 65,336.140** (24,237.720) 105,483.200*** (22,896.740) 140,329.600*** (29,102.010)
## inflation_rate -940,451.700*** (231,987.500) -181,726.000 (201,696.100)
## unemp_rate 27,808,113.000 (19,831,138.000)
## exchange_rate 99,970.850 (85,208.800) -156,197.000** (63,737.450)
## pop_minwage 33,730.810 (124,105.900) 120,456.700 (127,895.100)
## log(max_temperature) 4,850,924.000*** (854,601.100) 4,897,039.000*** (758,853.500)
## Constant -4,064,887.000 (2,925,905.000) -19,298,716.000*** (3,412,215.000) -18,634,991.000*** (3,446,116.000)
## -----------------------------------------------------------------------------------------------------------------------------------
## Observations 48 48 48
## R2 0.415 0.659 0.604
## Adjusted R2 0.345 0.618 0.557
## Residual Std. Error (df = 42) 484,418.400 370,022.500 398,656.800
## F Statistic (df = 5; 42) 5.960*** 16.211*** 12.803***
## ===================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Diagnostics test plays an important role on modern tasks, by helping identify and improve the accuracy of the linear regression results and predictive analytics. By examining trends and correlation between the variables to determine the cause and get to know the story of what happened.
This process helps us to obtain a better precision in the three models obtained, in order to have the most reliable when taking action in the business decision making, optimization process, among others.
# Modelo 1
AIC(m1)
## [1] 1400.516
# Modelo 2
AIC(m2)
## [1] 1374.655
# Modelo 3
AIC(m3)
## [1] 1381.811
AIC: “Estimator of the relative quality of the model that takes into account its complexity. As the number of input parameters of a polynomial increases, the value of R will be better, because the mean square error decreases. The Akaike information criterion (aic metric) penalizes complex models in favor of simple ones to avoid overfitting.”(KeepCoding,2023)
VIF: Variance Inflation Factor, it help us diagnose multicollinearity. One thing we have to take in consideration is if our result in VIF is greater than 10, is preferable to eliminate the variable that is causing the multicollnearity.
bptest: The Breusch-Pagan Test is estimated to validate the presence of heteroscedasticity. A p-value≥ fails to reject the null hypothesis of homoscedasticity.
# Show the level of accuracy for each linear regression model
# Model 1
vif(m1)
## itaee inflation_rate unemp_rate exchange_rate pop_minwage
## 2.658370 1.636188 2.716368 3.751160 3.109500
bptest(m1)
##
## studentized Breusch-Pagan test
##
## data: m1
## BP = 14.227, df = 5, p-value = 0.01423
histogram(m1$residuals)
# Model 2
vif(m2)
## consumer_sentiment CPI itaee
## 1.389169 3.814285 4.065970
## inflation_rate log(max_temperature)
## 2.119747 1.809984
bptest(m2)
##
## studentized Breusch-Pagan test
##
## data: m2
## BP = 4.0274, df = 5, p-value = 0.5455
histogram(m2$residuals)
# Model 3
vif(m3)
## CPI itaee pop_minwage
## 7.523975 5.658752 4.875922
## log(max_temperature) exchange_rate
## 1.229480 3.099052
bptest(m3)
##
## studentized Breusch-Pagan test
##
## data: m3
## BP = 3.8234, df = 5, p-value = 0.5751
histogram(m3$residuals)
To summarize this analysis, consumer sentiment, CPI, itaee, and log(max_temperature) are significant factors in explaining the dependent variable. The model is statistically significant, it explains about 65.87% of the variance in the dependent variable. p-value was 6.865e meaning p is less than .10, so we reject the null hypothesis.
# Effect plots
library(car)
m221 <- lm(sales_unitboxes ~ consumer_sentiment + CPI + itaee + inflation_rate + log(max_temperature), data = coca)
avPlots(m221)