# Importar paquetes y llamar librerías

# install.packages("tidyverse")
library(tidyverse)

Importar la base de datos

df <- read.csv("C:\\Users\\Adrián\\Downloads\\walmart.csv")

Entender la base de datos

summary(df)
##      Store        Date            Weekly_Sales      Holiday_Flag    
##  Min.   : 1   Length:6435        Min.   : 209986   Min.   :0.00000  
##  1st Qu.:12   Class :character   1st Qu.: 553350   1st Qu.:0.00000  
##  Median :23   Mode  :character   Median : 960746   Median :0.00000  
##  Mean   :23                      Mean   :1046965   Mean   :0.06993  
##  3rd Qu.:34                      3rd Qu.:1420159   3rd Qu.:0.00000  
##  Max.   :45                      Max.   :3818687   Max.   :1.00000  
##   Temperature       Fuel_Price         CPI         Unemployment   
##  Min.   : -2.06   Min.   :2.472   Min.   :126.1   Min.   : 3.879  
##  1st Qu.: 47.46   1st Qu.:2.933   1st Qu.:131.7   1st Qu.: 6.891  
##  Median : 62.67   Median :3.445   Median :182.6   Median : 7.874  
##  Mean   : 60.66   Mean   :3.359   Mean   :171.6   Mean   : 7.999  
##  3rd Qu.: 74.94   3rd Qu.:3.735   3rd Qu.:212.7   3rd Qu.: 8.622  
##  Max.   :100.14   Max.   :4.468   Max.   :227.2   Max.   :14.313
str(df)
## 'data.frame':    6435 obs. of  8 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : chr  "05-02-2010" "12-02-2010" "19-02-2010" "26-02-2010" ...
##  $ Weekly_Sales: num  1643691 1641957 1611968 1409728 1554807 ...
##  $ Holiday_Flag: int  0 1 0 0 0 0 0 0 0 0 ...
##  $ Temperature : num  42.3 38.5 39.9 46.6 46.5 ...
##  $ Fuel_Price  : num  2.57 2.55 2.51 2.56 2.62 ...
##  $ CPI         : num  211 211 211 211 211 ...
##  $ Unemployment: num  8.11 8.11 8.11 8.11 8.11 ...
df$Date <- as.Date(df$Date, format="%d-%m-%Y")
str(df)
## 'data.frame':    6435 obs. of  8 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : Date, format: "2010-02-05" "2010-02-12" ...
##  $ Weekly_Sales: num  1643691 1641957 1611968 1409728 1554807 ...
##  $ Holiday_Flag: int  0 1 0 0 0 0 0 0 0 0 ...
##  $ Temperature : num  42.3 38.5 39.9 46.6 46.5 ...
##  $ Fuel_Price  : num  2.57 2.55 2.51 2.56 2.62 ...
##  $ CPI         : num  211 211 211 211 211 ...
##  $ Unemployment: num  8.11 8.11 8.11 8.11 8.11 ...

Agregar variables a la base de datos

df$Year <- format(df$Date, "%Y")
df$Year <- as.integer(df$Year)

df$Month <- format(df$Date, "%m")
df$Month <- as.integer(df$Month)

df$weekYear <- format(df$Date, "%W")
df$weekYear <- as.integer(df$weekYear)

df$weekDay <- format(df$Date, "%u") # 1: Lunes
df$weekDay <- as.integer(df$weekDay)

df$Day <- format(df$Date, "%d")
df$Day <- as.integer(df$Day)

summary(df)
##      Store         Date             Weekly_Sales      Holiday_Flag    
##  Min.   : 1   Min.   :2010-02-05   Min.   : 209986   Min.   :0.00000  
##  1st Qu.:12   1st Qu.:2010-10-08   1st Qu.: 553350   1st Qu.:0.00000  
##  Median :23   Median :2011-06-17   Median : 960746   Median :0.00000  
##  Mean   :23   Mean   :2011-06-17   Mean   :1046965   Mean   :0.06993  
##  3rd Qu.:34   3rd Qu.:2012-02-24   3rd Qu.:1420159   3rd Qu.:0.00000  
##  Max.   :45   Max.   :2012-10-26   Max.   :3818687   Max.   :1.00000  
##   Temperature       Fuel_Price         CPI         Unemployment   
##  Min.   : -2.06   Min.   :2.472   Min.   :126.1   Min.   : 3.879  
##  1st Qu.: 47.46   1st Qu.:2.933   1st Qu.:131.7   1st Qu.: 6.891  
##  Median : 62.67   Median :3.445   Median :182.6   Median : 7.874  
##  Mean   : 60.66   Mean   :3.359   Mean   :171.6   Mean   : 7.999  
##  3rd Qu.: 74.94   3rd Qu.:3.735   3rd Qu.:212.7   3rd Qu.: 8.622  
##  Max.   :100.14   Max.   :4.468   Max.   :227.2   Max.   :14.313  
##       Year          Month           weekYear        weekDay       Day       
##  Min.   :2010   Min.   : 1.000   Min.   : 1.00   Min.   :5   Min.   : 1.00  
##  1st Qu.:2010   1st Qu.: 4.000   1st Qu.:14.00   1st Qu.:5   1st Qu.: 8.00  
##  Median :2011   Median : 6.000   Median :26.00   Median :5   Median :16.00  
##  Mean   :2011   Mean   : 6.448   Mean   :25.82   Mean   :5   Mean   :15.68  
##  3rd Qu.:2012   3rd Qu.: 9.000   3rd Qu.:38.00   3rd Qu.:5   3rd Qu.:23.00  
##  Max.   :2012   Max.   :12.000   Max.   :52.00   Max.   :5   Max.   :31.00
str(df)
## 'data.frame':    6435 obs. of  13 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : Date, format: "2010-02-05" "2010-02-12" ...
##  $ Weekly_Sales: num  1643691 1641957 1611968 1409728 1554807 ...
##  $ Holiday_Flag: int  0 1 0 0 0 0 0 0 0 0 ...
##  $ Temperature : num  42.3 38.5 39.9 46.6 46.5 ...
##  $ Fuel_Price  : num  2.57 2.55 2.51 2.56 2.62 ...
##  $ CPI         : num  211 211 211 211 211 ...
##  $ Unemployment: num  8.11 8.11 8.11 8.11 8.11 ...
##  $ Year        : int  2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ...
##  $ Month       : int  2 2 2 2 3 3 3 3 4 4 ...
##  $ weekYear    : int  5 6 7 8 9 10 11 12 13 14 ...
##  $ weekDay     : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Day         : int  5 12 19 26 5 12 19 26 2 9 ...

Generar la regresión

regresion <- lm(Weekly_Sales~.,data=df)
summary(regresion)
## 
## Call:
## lm(formula = Weekly_Sales ~ ., data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1094800  -382464   -42860   375406  2587123 
## 
## Coefficients: (2 not defined because of singularities)
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -2.384e+09  9.127e+09  -0.261   0.7940    
## Store        -1.538e+04  5.202e+02 -29.576  < 2e-16 ***
## Date         -3.399e+03  1.266e+04  -0.268   0.7883    
## Holiday_Flag  4.773e+04  2.706e+04   1.763   0.0779 .  
## Temperature  -1.817e+03  4.053e+02  -4.484 7.47e-06 ***
## Fuel_Price    6.124e+04  2.876e+04   2.130   0.0332 *  
## CPI          -2.109e+03  1.928e+02 -10.941  < 2e-16 ***
## Unemployment -2.209e+04  3.967e+03  -5.569 2.67e-08 ***
## Year          1.212e+06  4.633e+06   0.262   0.7937    
## Month         1.177e+05  3.858e+05   0.305   0.7604    
## weekYear             NA         NA      NA       NA    
## weekDay              NA         NA      NA       NA    
## Day           2.171e+03  1.269e+04   0.171   0.8642    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 520900 on 6424 degrees of freedom
## Multiple R-squared:  0.1495, Adjusted R-squared:  0.1482 
## F-statistic:   113 on 10 and 6424 DF,  p-value: < 2.2e-16

Ajustar la regresión

df_ajustada <- df %>% select(-Date,-Year:-Day,-Fuel_Price)
regresion_ajustada <- lm(Weekly_Sales~.,data=df_ajustada)
summary(regresion_ajustada)
## 
## Call:
## lm(formula = Weekly_Sales ~ ., data = df_ajustada)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1035858  -392195   -40416   371110  2711797 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2031943.1    50654.7  40.114  < 2e-16 ***
## Store         -15373.4      521.3 -29.488  < 2e-16 ***
## Holiday_Flag   72218.3    25911.0   2.787  0.00533 ** 
## Temperature     -929.0      369.1  -2.517  0.01186 *  
## CPI            -2345.9      180.2 -13.019  < 2e-16 ***
## Unemployment  -22198.9     3755.9  -5.910 3.59e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 523100 on 6429 degrees of freedom
## Multiple R-squared:  0.1415, Adjusted R-squared:  0.1408 
## F-statistic: 211.9 on 5 and 6429 DF,  p-value: < 2.2e-16

Conclusión

El modelo de regresión ajustado es estadisticamente significativo en todos sus componentes para la predicción de las ventas. Bajo este modelo, factores como la presencia de festividades impactan positivamente las ventas de la tienda, mientras que el aumento de la temperatura, CPI y desempleo son factores que impactan de forma negativa en los resultados de las ventas.

LS0tDQp0aXRsZTogIlJlZ3Jlc2nDs24gTGluZWFsIg0KYXV0aG9yOiAiTHVpcyBEw6F2aWxhIC0gQTAxMjg1NTIxIg0KZGF0ZTogIjIwMjUtMDgtMjUiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiBUUlVFDQogICAgdG9jX2Zsb2F0OiBUUlVFDQogICAgY29kZV9kb3dubG9hZDogVFJVRQ0KICAgIHRoZW1lOiBjb3Ntbw0KLS0tDQoNCiFbXShodHRwczovL3JvYXN0YnJpZWYuY29tLm14L3dwLWNvbnRlbnQvdXBsb2Fkcy8yMDI0LzA1L2NmOWFlNzkxLXdhbG1hcnQtbG9nby1wcm9tby5wbmcpICAgICAgIA0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZSI+IEltcG9ydGFyIHBhcXVldGVzIHkgbGxhbWFyIGxpYnJlcsOtYXMgPC9zcGFuPiAgIA0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCiMgaW5zdGFsbC5wYWNrYWdlcygidGlkeXZlcnNlIikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWUiPiBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zIDwvc3Bhbj4gICAgDQpgYGB7cn0NCmRmIDwtIHJlYWQuY3N2KCJDOlxcVXNlcnNcXEFkcmnDoW5cXERvd25sb2Fkc1xcd2FsbWFydC5jc3YiKQ0KYGBgDQoNCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWUiPiBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zIDwvc3Bhbj4gICAgDQpgYGB7cn0NCnN1bW1hcnkoZGYpDQpzdHIoZGYpDQpkZiREYXRlIDwtIGFzLkRhdGUoZGYkRGF0ZSwgZm9ybWF0PSIlZC0lbS0lWSIpDQpzdHIoZGYpDQpgYGANCg0KIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZSI+IEFncmVnYXIgdmFyaWFibGVzIGEgbGEgYmFzZSBkZSBkYXRvcyA8L3NwYW4+ICAgICANCmBgYHtyfQ0KZGYkWWVhciA8LSBmb3JtYXQoZGYkRGF0ZSwgIiVZIikNCmRmJFllYXIgPC0gYXMuaW50ZWdlcihkZiRZZWFyKQ0KDQpkZiRNb250aCA8LSBmb3JtYXQoZGYkRGF0ZSwgIiVtIikNCmRmJE1vbnRoIDwtIGFzLmludGVnZXIoZGYkTW9udGgpDQoNCmRmJHdlZWtZZWFyIDwtIGZvcm1hdChkZiREYXRlLCAiJVciKQ0KZGYkd2Vla1llYXIgPC0gYXMuaW50ZWdlcihkZiR3ZWVrWWVhcikNCg0KZGYkd2Vla0RheSA8LSBmb3JtYXQoZGYkRGF0ZSwgIiV1IikgIyAxOiBMdW5lcw0KZGYkd2Vla0RheSA8LSBhcy5pbnRlZ2VyKGRmJHdlZWtEYXkpDQoNCmRmJERheSA8LSBmb3JtYXQoZGYkRGF0ZSwgIiVkIikNCmRmJERheSA8LSBhcy5pbnRlZ2VyKGRmJERheSkNCg0Kc3VtbWFyeShkZikNCnN0cihkZikNCmBgYA0KDQojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlIj4gR2VuZXJhciBsYSByZWdyZXNpw7NuIDwvc3Bhbj4gICAgDQpgYGB7cn0NCnJlZ3Jlc2lvbiA8LSBsbShXZWVrbHlfU2FsZXN+LixkYXRhPWRmKQ0Kc3VtbWFyeShyZWdyZXNpb24pDQpgYGANCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWUiPiBBanVzdGFyIGxhIHJlZ3Jlc2nDs24gPC9zcGFuPiAgDQpgYGB7cn0NCmRmX2FqdXN0YWRhIDwtIGRmICU+JSBzZWxlY3QoLURhdGUsLVllYXI6LURheSwtRnVlbF9QcmljZSkNCnJlZ3Jlc2lvbl9hanVzdGFkYSA8LSBsbShXZWVrbHlfU2FsZXN+LixkYXRhPWRmX2FqdXN0YWRhKQ0Kc3VtbWFyeShyZWdyZXNpb25fYWp1c3RhZGEpDQpgYGANCiMgPHNwYW4gc3R5bGU9ImNvbG9yOmJsdWUiPiBDb25jbHVzacOzbiA8L3NwYW4+ICAgICAgICAgIA0KRWwgbW9kZWxvIGRlIHJlZ3Jlc2nDs24gYWp1c3RhZG8gZXMgZXN0YWRpc3RpY2FtZW50ZSBzaWduaWZpY2F0aXZvIGVuIHRvZG9zIHN1cyBjb21wb25lbnRlcyBwYXJhIGxhIHByZWRpY2Npw7NuIGRlIGxhcyB2ZW50YXMuIEJham8gZXN0ZSBtb2RlbG8sIGZhY3RvcmVzIGNvbW8gbGEgcHJlc2VuY2lhIGRlIGZlc3RpdmlkYWRlcyBpbXBhY3RhbiBwb3NpdGl2YW1lbnRlIGxhcyB2ZW50YXMgZGUgbGEgdGllbmRhLCBtaWVudHJhcyBxdWUgZWwgYXVtZW50byBkZSBsYSB0ZW1wZXJhdHVyYSwgQ1BJIHkgZGVzZW1wbGVvIHNvbiBmYWN0b3JlcyBxdWUgaW1wYWN0YW4gZGUgZm9ybWEgbmVnYXRpdmEgZW4gbG9zIHJlc3VsdGFkb3MgZGUgbGFzIHZlbnRhcy4NCg0K