Importar bases de datos

stores <- read.csv("/Users/ivannagarza/Desktop/TEC/7 SEMESTRE/stores.csv")
features <- read.csv("/Users/ivannagarza/Desktop/TEC/7 SEMESTRE/features.csv")
train <- read.csv("/Users/ivannagarza/Desktop/TEC/7 SEMESTRE/train.csv")
test <- read.csv("/Users/ivannagarza/Desktop/TEC/7 SEMESTRE/test.csv")

Instalar paquetes y llamar librerias

# install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Entender las bases de datos

summary(stores)
##      Store        Type                Size       
##  Min.   : 1   Length:45          Min.   : 34875  
##  1st Qu.:12   Class :character   1st Qu.: 70713  
##  Median :23   Mode  :character   Median :126512  
##  Mean   :23                      Mean   :130288  
##  3rd Qu.:34                      3rd Qu.:202307  
##  Max.   :45                      Max.   :219622
count(stores, Type, sort = TRUE)
##   Type  n
## 1    A 22
## 2    B 17
## 3    C  6
str(stores)
## 'data.frame':    45 obs. of  3 variables:
##  $ Store: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Type : chr  "A" "A" "B" "A" ...
##  $ Size : int  151315 202307 37392 205863 34875 202505 70713 155078 125833 126512 ...
summary (features)
##      Store        Date            Temperature       Fuel_Price   
##  Min.   : 1   Length:8190        Min.   : -7.29   Min.   :2.472  
##  1st Qu.:12   Class :character   1st Qu.: 45.90   1st Qu.:3.041  
##  Median :23   Mode  :character   Median : 60.71   Median :3.513  
##  Mean   :23                      Mean   : 59.36   Mean   :3.406  
##  3rd Qu.:34                      3rd Qu.: 73.88   3rd Qu.:3.743  
##  Max.   :45                      Max.   :101.95   Max.   :4.468  
##                                                                  
##    MarkDown1        MarkDown2           MarkDown3           MarkDown4       
##  Min.   : -2781   Min.   :  -265.76   Min.   :  -179.26   Min.   :    0.22  
##  1st Qu.:  1578   1st Qu.:    68.88   1st Qu.:     6.60   1st Qu.:  304.69  
##  Median :  4744   Median :   364.57   Median :    36.26   Median : 1176.42  
##  Mean   :  7032   Mean   :  3384.18   Mean   :  1760.10   Mean   : 3292.94  
##  3rd Qu.:  8923   3rd Qu.:  2153.35   3rd Qu.:   163.15   3rd Qu.: 3310.01  
##  Max.   :103185   Max.   :104519.54   Max.   :149483.31   Max.   :67474.85  
##  NA's   :4158     NA's   :5269        NA's   :4577        NA's   :4726      
##    MarkDown5             CPI         Unemployment    IsHoliday      
##  Min.   :  -185.2   Min.   :126.1   Min.   : 3.684   Mode :logical  
##  1st Qu.:  1440.8   1st Qu.:132.4   1st Qu.: 6.634   FALSE:7605     
##  Median :  2727.1   Median :182.8   Median : 7.806   TRUE :585      
##  Mean   :  4132.2   Mean   :172.5   Mean   : 7.827                  
##  3rd Qu.:  4832.6   3rd Qu.:213.9   3rd Qu.: 8.567                  
##  Max.   :771448.1   Max.   :229.0   Max.   :14.313                  
##  NA's   :4140       NA's   :585     NA's   :585
str(features)
## 'data.frame':    8190 obs. of  12 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : chr  "05/02/2010" "12/02/2010" "19/02/2010" "26/02/2010" ...
##  $ Temperature : num  42.3 38.5 39.9 46.6 46.5 ...
##  $ Fuel_Price  : num  2.57 2.55 2.51 2.56 2.62 ...
##  $ MarkDown1   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ MarkDown2   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ MarkDown3   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ MarkDown4   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ MarkDown5   : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CPI         : num  211 211 211 211 211 ...
##  $ Unemployment: num  8.11 8.11 8.11 8.11 8.11 ...
##  $ IsHoliday   : logi  FALSE TRUE FALSE FALSE FALSE FALSE ...
summary (train)
##      Store           Dept           Date            Weekly_Sales   
##  Min.   : 1.0   Min.   : 1.00   Length:421570      Min.   : -4989  
##  1st Qu.:11.0   1st Qu.:18.00   Class :character   1st Qu.:  2080  
##  Median :22.0   Median :37.00   Mode  :character   Median :  7612  
##  Mean   :22.2   Mean   :44.26                      Mean   : 15981  
##  3rd Qu.:33.0   3rd Qu.:74.00                      3rd Qu.: 20206  
##  Max.   :45.0   Max.   :99.00                      Max.   :693099  
##  IsHoliday      
##  Mode :logical  
##  FALSE:391909   
##  TRUE :29661    
##                 
##                 
## 
str(train)
## 'data.frame':    421570 obs. of  5 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Dept        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : chr  "05/02/2010" "12/02/2010" "19/02/2010" "26/02/2010" ...
##  $ Weekly_Sales: num  24924 46039 41596 19404 21828 ...
##  $ IsHoliday   : logi  FALSE TRUE FALSE FALSE FALSE FALSE ...
summary (test)
##      Store            Dept           Date           IsHoliday      
##  Min.   : 1.00   Min.   : 1.00   Length:115064      Mode :logical  
##  1st Qu.:11.00   1st Qu.:18.00   Class :character   FALSE:106136   
##  Median :22.00   Median :37.00   Mode  :character   TRUE :8928     
##  Mean   :22.24   Mean   :44.34                                     
##  3rd Qu.:33.00   3rd Qu.:74.00                                     
##  Max.   :45.00   Max.   :99.00
str(test)
## 'data.frame':    115064 obs. of  4 variables:
##  $ Store    : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Dept     : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date     : chr  "02/11/2012" "09/11/2012" "16/11/2012" "23/11/2012" ...
##  $ IsHoliday: logi  FALSE FALSE FALSE TRUE FALSE FALSE ...

OBSERVACIONES

  1. “Features”, “Test” y “Train” tiene la fecha como caracter.
  2. “Features”, hay NA’s en más de la mitad de los registros de Markdown (del 1 al 5)
  3. En “features”, hay 585 NA’s en CPI y Unemployment, y hay 585 registros de IsHoliday = TRUE ¿Tiene relación? R: sin relación
  4. “Train” tiene ventas semanales negativas.

Herramienta “El Generador de Valor de Datos”

Paso 1. Definir el área de negocio que buscamos impactar o mejorar y su KPI. El departamento de mercadotecnia de EUA (con muestra de 45 tiendas) en el indicador de Ventas Semanales

Paso 2. Seleccionar plantilla (-s) para crear valor a partir de los datos de los clientes.
Visión / Segmentación / Personalización / Contextualización

Paso 3. Generar ideas o conceptos específicos
Elaborar un modelo predictivo de ventas semanales.

Paso 4. Reunir los datos requeridos
Elaborar una base de datos con la variale dependiente (Ventas Semanales) y las variables independientes.

Paso 5. Plan de ejecución
Mercadotecnia elaborará plan para desplegar modelo predictivo en fases.

Fase 1. Piloto (San Antonio, Tx)
Fase 2. Texas
Fase 3. EUA

Sistemas asegurará la captura del Markdown en las bases de datos.

Creación de la base de datos consolidada

Agregar “Stores” a “Train”

bd <- merge(train, stores, by="Store")

Agregar “bd” a “Features”

bd2 <- merge (bd, features)
summary(bd2)
##      Store          Date           IsHoliday            Dept      
##  Min.   : 1.0   Length:421570      Mode :logical   Min.   : 1.00  
##  1st Qu.:11.0   Class :character   FALSE:391909    1st Qu.:18.00  
##  Median :22.0   Mode  :character   TRUE :29661     Median :37.00  
##  Mean   :22.2                                      Mean   :44.26  
##  3rd Qu.:33.0                                      3rd Qu.:74.00  
##  Max.   :45.0                                      Max.   :99.00  
##                                                                   
##   Weekly_Sales        Type                Size         Temperature    
##  Min.   : -4989   Length:421570      Min.   : 34875   Min.   : -2.06  
##  1st Qu.:  2080   Class :character   1st Qu.: 93638   1st Qu.: 46.68  
##  Median :  7612   Mode  :character   Median :140167   Median : 62.09  
##  Mean   : 15981                      Mean   :136728   Mean   : 60.09  
##  3rd Qu.: 20206                      3rd Qu.:202505   3rd Qu.: 74.28  
##  Max.   :693099                      Max.   :219622   Max.   :100.14  
##                                                                       
##    Fuel_Price      MarkDown1          MarkDown2          MarkDown3        
##  Min.   :2.472   Min.   :    0.27   Min.   :  -265.8   Min.   :   -29.10  
##  1st Qu.:2.933   1st Qu.: 2240.27   1st Qu.:    41.6   1st Qu.:     5.08  
##  Median :3.452   Median : 5347.45   Median :   192.0   Median :    24.60  
##  Mean   :3.361   Mean   : 7246.42   Mean   :  3334.6   Mean   :  1439.42  
##  3rd Qu.:3.738   3rd Qu.: 9210.90   3rd Qu.:  1926.9   3rd Qu.:   103.99  
##  Max.   :4.468   Max.   :88646.76   Max.   :104519.5   Max.   :141630.61  
##                  NA's   :270889     NA's   :310322     NA's   :284479     
##    MarkDown4          MarkDown5             CPI         Unemployment   
##  Min.   :    0.22   Min.   :   135.2   Min.   :126.1   Min.   : 3.879  
##  1st Qu.:  504.22   1st Qu.:  1878.4   1st Qu.:132.0   1st Qu.: 6.891  
##  Median : 1481.31   Median :  3359.4   Median :182.3   Median : 7.866  
##  Mean   : 3383.17   Mean   :  4629.0   Mean   :171.2   Mean   : 7.960  
##  3rd Qu.: 3595.04   3rd Qu.:  5563.8   3rd Qu.:212.4   3rd Qu.: 8.572  
##  Max.   :67474.85   Max.   :108519.3   Max.   :227.2   Max.   :14.313  
##  NA's   :286603     NA's   :270138

Cambiar formato de fecha

bd3 <- bd2
bd3$Date <- as.Date(bd3$Date, format = "%d/%m/%Y")
tibble(bd3)
## # A tibble: 421,570 × 16
##    Store Date       IsHoliday  Dept Weekl…¹ Type    Size Tempe…² Fuel_…³ MarkD…⁴
##    <int> <date>     <lgl>     <int>   <dbl> <chr>  <int>   <dbl>   <dbl>   <dbl>
##  1     1 2011-04-01 FALSE        49  13168. A     151315    59.2    3.52      NA
##  2     1 2011-04-01 FALSE        26   5947. A     151315    59.2    3.52      NA
##  3     1 2011-04-01 FALSE        81  28545. A     151315    59.2    3.52      NA
##  4     1 2011-04-01 FALSE        34   9950. A     151315    59.2    3.52      NA
##  5     1 2011-04-01 FALSE        59    317. A     151315    59.2    3.52      NA
##  6     1 2011-04-01 FALSE        30   3897. A     151315    59.2    3.52      NA
##  7     1 2011-04-01 FALSE         7  20145. A     151315    59.2    3.52      NA
##  8     1 2011-04-01 FALSE        85   3209. A     151315    59.2    3.52      NA
##  9     1 2011-04-01 FALSE         8  35319. A     151315    59.2    3.52      NA
## 10     1 2011-04-01 FALSE        28    603. A     151315    59.2    3.52      NA
## # … with 421,560 more rows, 6 more variables: MarkDown2 <dbl>, MarkDown3 <dbl>,
## #   MarkDown4 <dbl>, MarkDown5 <dbl>, CPI <dbl>, Unemployment <dbl>, and
## #   abbreviated variable names ¹​Weekly_Sales, ²​Temperature, ³​Fuel_Price,
## #   ⁴​MarkDown1

Eliminar columnas

bd4 <- bd3
bd4 <- subset (bd3,select = -c(MarkDown1, MarkDown2, MarkDown3, MarkDown4, MarkDown5))
summary (bd4)
##      Store           Date            IsHoliday            Dept      
##  Min.   : 1.0   Min.   :2010-02-05   Mode :logical   Min.   : 1.00  
##  1st Qu.:11.0   1st Qu.:2010-10-08   FALSE:391909    1st Qu.:18.00  
##  Median :22.0   Median :2011-06-17   TRUE :29661     Median :37.00  
##  Mean   :22.2   Mean   :2011-06-18                   Mean   :44.26  
##  3rd Qu.:33.0   3rd Qu.:2012-02-24                   3rd Qu.:74.00  
##  Max.   :45.0   Max.   :2012-10-26                   Max.   :99.00  
##   Weekly_Sales        Type                Size         Temperature    
##  Min.   : -4989   Length:421570      Min.   : 34875   Min.   : -2.06  
##  1st Qu.:  2080   Class :character   1st Qu.: 93638   1st Qu.: 46.68  
##  Median :  7612   Mode  :character   Median :140167   Median : 62.09  
##  Mean   : 15981                      Mean   :136728   Mean   : 60.09  
##  3rd Qu.: 20206                      3rd Qu.:202505   3rd Qu.: 74.28  
##  Max.   :693099                      Max.   :219622   Max.   :100.14  
##    Fuel_Price         CPI         Unemployment   
##  Min.   :2.472   Min.   :126.1   Min.   : 3.879  
##  1st Qu.:2.933   1st Qu.:132.0   1st Qu.: 6.891  
##  Median :3.452   Median :182.3   Median : 7.866  
##  Mean   :3.361   Mean   :171.2   Mean   : 7.960  
##  3rd Qu.:3.738   3rd Qu.:212.4   3rd Qu.: 8.572  
##  Max.   :4.468   Max.   :227.2   Max.   :14.313
# install.packages("wordspace")
library(wordspace)
## Loading required package: Matrix
signcount(bd4$Weekly_Sales)
##    pos   zero    neg 
## 420212     73   1285

Eliminar ventas menores que 0

bd5<-bd4
bd5<- bd5 [bd5$Weekly_Sales > 0,]
summary(bd5)
##      Store           Date            IsHoliday            Dept      
##  Min.   : 1.0   Min.   :2010-02-05   Mode :logical   Min.   : 1.00  
##  1st Qu.:11.0   1st Qu.:2010-10-08   FALSE:390652    1st Qu.:18.00  
##  Median :22.0   Median :2011-06-17   TRUE :29560     Median :37.00  
##  Mean   :22.2   Mean   :2011-06-18                   Mean   :44.24  
##  3rd Qu.:33.0   3rd Qu.:2012-02-24                   3rd Qu.:74.00  
##  Max.   :45.0   Max.   :2012-10-26                   Max.   :99.00  
##   Weekly_Sales        Type                Size         Temperature    
##  Min.   :     0   Length:420212      Min.   : 34875   Min.   : -2.06  
##  1st Qu.:  2120   Class :character   1st Qu.: 93638   1st Qu.: 46.68  
##  Median :  7662   Mode  :character   Median :140167   Median : 62.09  
##  Mean   : 16033                      Mean   :136750   Mean   : 60.09  
##  3rd Qu.: 20271                      3rd Qu.:202505   3rd Qu.: 74.28  
##  Max.   :693099                      Max.   :219622   Max.   :100.14  
##    Fuel_Price         CPI         Unemployment   
##  Min.   :2.472   Min.   :126.1   Min.   : 3.879  
##  1st Qu.:2.933   1st Qu.:132.0   1st Qu.: 6.891  
##  Median :3.452   Median :182.4   Median : 7.866  
##  Mean   :3.361   Mean   :171.2   Mean   : 7.960  
##  3rd Qu.:3.738   3rd Qu.:212.4   3rd Qu.: 8.567  
##  Max.   :4.468   Max.   :227.2   Max.   :14.313

Agregar número de la semana

bd6<- bd5
bd6$week_number <- strftime(bd6$Date, format = "%V")
str(bd6)
## 'data.frame':    420212 obs. of  12 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : Date, format: "2011-04-01" "2011-04-01" ...
##  $ IsHoliday   : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Dept        : int  49 26 81 34 59 30 7 85 8 28 ...
##  $ Weekly_Sales: num  13168 5947 28545 9950 317 ...
##  $ Type        : chr  "A" "A" "A" "A" ...
##  $ Size        : int  151315 151315 151315 151315 151315 151315 151315 151315 151315 151315 ...
##  $ Temperature : num  59.2 59.2 59.2 59.2 59.2 ...
##  $ Fuel_Price  : num  3.52 3.52 3.52 3.52 3.52 ...
##  $ CPI         : num  215 215 215 215 215 ...
##  $ Unemployment: num  7.68 7.68 7.68 7.68 7.68 ...
##  $ week_number : chr  "13" "13" "13" "13" ...
bd6$week_number <- as.integer(bd6$week_number)
str(bd6)
## 'data.frame':    420212 obs. of  12 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : Date, format: "2011-04-01" "2011-04-01" ...
##  $ IsHoliday   : logi  FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ Dept        : int  49 26 81 34 59 30 7 85 8 28 ...
##  $ Weekly_Sales: num  13168 5947 28545 9950 317 ...
##  $ Type        : chr  "A" "A" "A" "A" ...
##  $ Size        : int  151315 151315 151315 151315 151315 151315 151315 151315 151315 151315 ...
##  $ Temperature : num  59.2 59.2 59.2 59.2 59.2 ...
##  $ Fuel_Price  : num  3.52 3.52 3.52 3.52 3.52 ...
##  $ CPI         : num  215 215 215 215 215 ...
##  $ Unemployment: num  7.68 7.68 7.68 7.68 7.68 ...
##  $ week_number : int  13 13 13 13 13 13 13 13 13 13 ...
summary(bd6)
##      Store           Date            IsHoliday            Dept      
##  Min.   : 1.0   Min.   :2010-02-05   Mode :logical   Min.   : 1.00  
##  1st Qu.:11.0   1st Qu.:2010-10-08   FALSE:390652    1st Qu.:18.00  
##  Median :22.0   Median :2011-06-17   TRUE :29560     Median :37.00  
##  Mean   :22.2   Mean   :2011-06-18                   Mean   :44.24  
##  3rd Qu.:33.0   3rd Qu.:2012-02-24                   3rd Qu.:74.00  
##  Max.   :45.0   Max.   :2012-10-26                   Max.   :99.00  
##   Weekly_Sales        Type                Size         Temperature    
##  Min.   :     0   Length:420212      Min.   : 34875   Min.   : -2.06  
##  1st Qu.:  2120   Class :character   1st Qu.: 93638   1st Qu.: 46.68  
##  Median :  7662   Mode  :character   Median :140167   Median : 62.09  
##  Mean   : 16033                      Mean   :136750   Mean   : 60.09  
##  3rd Qu.: 20271                      3rd Qu.:202505   3rd Qu.: 74.28  
##  Max.   :693099                      Max.   :219622   Max.   :100.14  
##    Fuel_Price         CPI         Unemployment     week_number   
##  Min.   :2.472   Min.   :126.1   Min.   : 3.879   Min.   : 1.00  
##  1st Qu.:2.933   1st Qu.:132.0   1st Qu.: 6.891   1st Qu.:14.00  
##  Median :3.452   Median :182.4   Median : 7.866   Median :26.00  
##  Mean   :3.361   Mean   :171.2   Mean   : 7.960   Mean   :25.83  
##  3rd Qu.:3.738   3rd Qu.:212.4   3rd Qu.: 8.567   3rd Qu.:38.00  
##  Max.   :4.468   Max.   :227.2   Max.   :14.313   Max.   :52.00

Separar Año, Mes y Día

bd7 <- bd6 
bd7 <- bd6 %>%
  dplyr::mutate(year = lubridate:: year(Date),
                month = lubridate :: month(Date),
                day = lubridate :: day(Date))

summary(bd7)
##      Store           Date            IsHoliday            Dept      
##  Min.   : 1.0   Min.   :2010-02-05   Mode :logical   Min.   : 1.00  
##  1st Qu.:11.0   1st Qu.:2010-10-08   FALSE:390652    1st Qu.:18.00  
##  Median :22.0   Median :2011-06-17   TRUE :29560     Median :37.00  
##  Mean   :22.2   Mean   :2011-06-18                   Mean   :44.24  
##  3rd Qu.:33.0   3rd Qu.:2012-02-24                   3rd Qu.:74.00  
##  Max.   :45.0   Max.   :2012-10-26                   Max.   :99.00  
##   Weekly_Sales        Type                Size         Temperature    
##  Min.   :     0   Length:420212      Min.   : 34875   Min.   : -2.06  
##  1st Qu.:  2120   Class :character   1st Qu.: 93638   1st Qu.: 46.68  
##  Median :  7662   Mode  :character   Median :140167   Median : 62.09  
##  Mean   : 16033                      Mean   :136750   Mean   : 60.09  
##  3rd Qu.: 20271                      3rd Qu.:202505   3rd Qu.: 74.28  
##  Max.   :693099                      Max.   :219622   Max.   :100.14  
##    Fuel_Price         CPI         Unemployment     week_number   
##  Min.   :2.472   Min.   :126.1   Min.   : 3.879   Min.   : 1.00  
##  1st Qu.:2.933   1st Qu.:132.0   1st Qu.: 6.891   1st Qu.:14.00  
##  Median :3.452   Median :182.4   Median : 7.866   Median :26.00  
##  Mean   :3.361   Mean   :171.2   Mean   : 7.960   Mean   :25.83  
##  3rd Qu.:3.738   3rd Qu.:212.4   3rd Qu.: 8.567   3rd Qu.:38.00  
##  Max.   :4.468   Max.   :227.2   Max.   :14.313   Max.   :52.00  
##       year          month            day       
##  Min.   :2010   Min.   : 1.00   Min.   : 1.00  
##  1st Qu.:2010   1st Qu.: 4.00   1st Qu.: 8.00  
##  Median :2011   Median : 6.00   Median :16.00  
##  Mean   :2011   Mean   : 6.45   Mean   :15.67  
##  3rd Qu.:2012   3rd Qu.: 9.00   3rd Qu.:23.00  
##  Max.   :2012   Max.   :12.00   Max.   :31.00

Generar regresión

regresion <- lm(Weekly_Sales ~  Store + IsHoliday + Dept + Type + Size + Temperature + Fuel_Price + CPI + Unemployment + week_number + year + month + day, data=bd7)
summary(regresion)
## 
## Call:
## lm(formula = Weekly_Sales ~ Store + IsHoliday + Dept + Type + 
##     Size + Temperature + Fuel_Price + CPI + Unemployment + week_number + 
##     year + month + day, data = bd7)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -34331 -12895  -5852   5626 671540 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    1.110e+06  2.999e+05   3.701 0.000214 ***
## Store         -1.426e+02  3.087e+00 -46.198  < 2e-16 ***
## IsHolidayTRUE  8.511e+02  1.391e+02   6.119 9.45e-10 ***
## Dept           1.108e+02  1.097e+00 101.013  < 2e-16 ***
## TypeB         -3.133e+02  1.078e+02  -2.908 0.003642 ** 
## TypeC          5.836e+03  1.840e+02  31.709  < 2e-16 ***
## Size           9.920e-02  9.584e-04 103.511  < 2e-16 ***
## Temperature    3.701e+00  2.133e+00   1.735 0.082688 .  
## Fuel_Price     4.791e+02  1.480e+02   3.237 0.001207 ** 
## CPI           -2.340e+01  9.996e-01 -23.409  < 2e-16 ***
## Unemployment  -2.538e+02  2.062e+01 -12.308  < 2e-16 ***
## week_number    7.678e+02  4.566e+02   1.682 0.092648 .  
## year          -5.485e+02  1.485e+02  -3.695 0.000220 ***
## month         -3.167e+03  1.988e+03  -1.594 0.111036    
## day           -1.281e+02  6.539e+01  -1.959 0.050115 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 21690 on 420197 degrees of freedom
## Multiple R-squared:  0.08982,    Adjusted R-squared:  0.08979 
## F-statistic:  2962 on 14 and 420197 DF,  p-value: < 2.2e-16

Conclusión

Durante la elaboración de este código se tuvo que realizar una limpieza de la base de datos, en la cual identificamos cada variable que fuera un caracter para poder adaptarla para asi proceder a realizar un modelo predictivo. Asimismo, en la limpieza de datos, se eliminaron las columnas en las cuales habia datos inexistentes.
El modelo predictivo se realizó para poder predecir y pronosticar las ventas semanales y asi poder tomar decisiones ante eventos diferentes y cambios.

LS0tCnRpdGxlOiAiV2FsbWFydCIKYXV0aG9yOiAiSXZhbm5hIEdhcnphIEEwMTI4Mzc1OSIKZGF0ZTogIjIwMjItMDktMDkiCm91dHB1dDogCiBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIHRoZW1lOiBjZXJ1bGVhbgogICAgaGlnaGxpZ2h0OiB0YW5nbwogICAgY29kZV9kb3dubG9hZDogdHJ1ZSAKLS0tCgo8aW1nIHNyYz0gIi9Vc2Vycy9pdmFubmFnYXJ6YS9EZXNrdG9wL2xvZ28tV2FsbWFydC5wbmciPgoKIyBJbXBvcnRhciBiYXNlcyBkZSBkYXRvcyAKYGBge3J9CnN0b3JlcyA8LSByZWFkLmNzdigiL1VzZXJzL2l2YW5uYWdhcnphL0Rlc2t0b3AvVEVDLzcgU0VNRVNUUkUvc3RvcmVzLmNzdiIpCmZlYXR1cmVzIDwtIHJlYWQuY3N2KCIvVXNlcnMvaXZhbm5hZ2FyemEvRGVza3RvcC9URUMvNyBTRU1FU1RSRS9mZWF0dXJlcy5jc3YiKQp0cmFpbiA8LSByZWFkLmNzdigiL1VzZXJzL2l2YW5uYWdhcnphL0Rlc2t0b3AvVEVDLzcgU0VNRVNUUkUvdHJhaW4uY3N2IikKdGVzdCA8LSByZWFkLmNzdigiL1VzZXJzL2l2YW5uYWdhcnphL0Rlc2t0b3AvVEVDLzcgU0VNRVNUUkUvdGVzdC5jc3YiKQpgYGAKCiMgSW5zdGFsYXIgcGFxdWV0ZXMgeSBsbGFtYXIgbGlicmVyaWFzIApgYGB7cn0KIyBpbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpCmxpYnJhcnkoZHBseXIpCmBgYAoKIyBFbnRlbmRlciBsYXMgYmFzZXMgZGUgZGF0b3MgCgpgYGB7cn0Kc3VtbWFyeShzdG9yZXMpCmNvdW50KHN0b3JlcywgVHlwZSwgc29ydCA9IFRSVUUpCnN0cihzdG9yZXMpCgpzdW1tYXJ5IChmZWF0dXJlcykKc3RyKGZlYXR1cmVzKQoKc3VtbWFyeSAodHJhaW4pCnN0cih0cmFpbikKCnN1bW1hcnkgKHRlc3QpCnN0cih0ZXN0KQoKYGBgCgojICoqT0JTRVJWQUNJT05FUyoqCjEuICJGZWF0dXJlcyIsICJUZXN0IiB5ICJUcmFpbiIgdGllbmUgbGEgZmVjaGEgY29tbyBjYXJhY3Rlci4gCjIuICJGZWF0dXJlcyIsIGhheSBOQSdzIGVuIG3DoXMgZGUgbGEgbWl0YWQgZGUgbG9zIHJlZ2lzdHJvcyBkZSBNYXJrZG93biAoZGVsIDEgYWwgNSkKMy4gRW4gImZlYXR1cmVzIiwgaGF5IDU4NSBOQSdzIGVuIENQSSB5IFVuZW1wbG95bWVudCwgeSBoYXkgNTg1IHJlZ2lzdHJvcyBkZSBJc0hvbGlkYXkgPSBUUlVFIMK/VGllbmUgcmVsYWNpw7NuPyBSOiBzaW4gcmVsYWNpw7NuCjQuICJUcmFpbiIgdGllbmUgdmVudGFzIHNlbWFuYWxlcyBuZWdhdGl2YXMuIAoKIyAqKkhlcnJhbWllbnRhICJFbCBHZW5lcmFkb3IgZGUgVmFsb3IgZGUgRGF0b3MiKiogIAoKKipQYXNvIDEuIERlZmluaXIgZWwgw6FyZWEgZGUgbmVnb2NpbyBxdWUgYnVzY2Ftb3MgaW1wYWN0YXIgbyBtZWpvcmFyIHkgc3UgS1BJLioqCipFbCBkZXBhcnRhbWVudG8gZGUgbWVyY2Fkb3RlY25pYSBkZSBFVUEgKGNvbiBtdWVzdHJhIGRlIDQ1IHRpZW5kYXMpIGVuIGVsIGluZGljYWRvciBkZSBWZW50YXMgU2VtYW5hbGVzKiAgCgoqKlBhc28gMi4gU2VsZWNjaW9uYXIgcGxhbnRpbGxhICgtcykgcGFyYSBjcmVhciB2YWxvciBhIHBhcnRpciBkZSBsb3MgZGF0b3MgZGUgbG9zIGNsaWVudGVzLioqICAKKipWaXNpw7NuKiogKi8gU2VnbWVudGFjacOzbiAvIFBlcnNvbmFsaXphY2nDs24gLyBDb250ZXh0dWFsaXphY2nDs24qCgoqKlBhc28gMy4gR2VuZXJhciBpZGVhcyBvIGNvbmNlcHRvcyBlc3BlY8OtZmljb3MqKiAgCipFbGFib3JhciB1biBtb2RlbG8gcHJlZGljdGl2byBkZSB2ZW50YXMgc2VtYW5hbGVzLiogIAoKKipQYXNvIDQuIFJldW5pciBsb3MgZGF0b3MgcmVxdWVyaWRvcyoqICAKKkVsYWJvcmFyIHVuYSBiYXNlIGRlIGRhdG9zIGNvbiBsYSB2YXJpYWxlIGRlcGVuZGllbnRlIChWZW50YXMgU2VtYW5hbGVzKSB5IGxhcyB2YXJpYWJsZXMgaW5kZXBlbmRpZW50ZXMuKiAgCgoqKlBhc28gNS4gUGxhbiBkZSBlamVjdWNpw7NuKiogIAoqTWVyY2Fkb3RlY25pYSBlbGFib3JhcsOhIHBsYW4gcGFyYSBkZXNwbGVnYXIgbW9kZWxvIHByZWRpY3Rpdm8gZW4gZmFzZXMuKiAgCgoqRmFzZSAxLiBQaWxvdG8gKFNhbiBBbnRvbmlvLCBUeCkqICAKKkZhc2UgMi4gVGV4YXMqICAKKkZhc2UgMy4gRVVBKiAgCgpTaXN0ZW1hcyBhc2VndXJhcsOhIGxhIGNhcHR1cmEgZGVsIE1hcmtkb3duIGVuIGxhcyBiYXNlcyBkZSBkYXRvcy4gIAoKIyBDcmVhY2nDs24gZGUgbGEgYmFzZSBkZSBkYXRvcyBjb25zb2xpZGFkYSAKCiMjIEFncmVnYXIgIlN0b3JlcyIgYSAiVHJhaW4iIApgYGB7cn0KYmQgPC0gbWVyZ2UodHJhaW4sIHN0b3JlcywgYnk9IlN0b3JlIikKCmBgYAoKIyMgQWdyZWdhciAiYmQiIGEgIkZlYXR1cmVzIgpgYGB7cn0KYmQyIDwtIG1lcmdlIChiZCwgZmVhdHVyZXMpCnN1bW1hcnkoYmQyKQpgYGAKCiMjIENhbWJpYXIgZm9ybWF0byBkZSBmZWNoYSAKYGBge3J9CmJkMyA8LSBiZDIKYmQzJERhdGUgPC0gYXMuRGF0ZShiZDMkRGF0ZSwgZm9ybWF0ID0gIiVkLyVtLyVZIikKdGliYmxlKGJkMykKYGBgCgojIyBFbGltaW5hciBjb2x1bW5hcwpgYGB7cn0KYmQ0IDwtIGJkMwpiZDQgPC0gc3Vic2V0IChiZDMsc2VsZWN0ID0gLWMoTWFya0Rvd24xLCBNYXJrRG93bjIsIE1hcmtEb3duMywgTWFya0Rvd240LCBNYXJrRG93bjUpKQpzdW1tYXJ5IChiZDQpCmBgYAoKYGBge3J9CiMgaW5zdGFsbC5wYWNrYWdlcygid29yZHNwYWNlIikKbGlicmFyeSh3b3Jkc3BhY2UpCmBgYAoKYGBge3J9CnNpZ25jb3VudChiZDQkV2Vla2x5X1NhbGVzKQoKYGBgCgojIEVsaW1pbmFyIHZlbnRhcyBtZW5vcmVzIHF1ZSAwCmBgYHtyfQpiZDU8LWJkNApiZDU8LSBiZDUgW2JkNSRXZWVrbHlfU2FsZXMgPiAwLF0Kc3VtbWFyeShiZDUpCgpgYGAKCiMgQWdyZWdhciBuw7ptZXJvIGRlIGxhIHNlbWFuYSAKYGBge3J9CmJkNjwtIGJkNQpiZDYkd2Vla19udW1iZXIgPC0gc3RyZnRpbWUoYmQ2JERhdGUsIGZvcm1hdCA9ICIlViIpCnN0cihiZDYpCmJkNiR3ZWVrX251bWJlciA8LSBhcy5pbnRlZ2VyKGJkNiR3ZWVrX251bWJlcikKc3RyKGJkNikKc3VtbWFyeShiZDYpCmBgYAoKIyBTZXBhcmFyIEHDsW8sIE1lcyB5IETDrWEgCmBgYHtyfQpiZDcgPC0gYmQ2IApiZDcgPC0gYmQ2ICU+JQogIGRwbHlyOjptdXRhdGUoeWVhciA9IGx1YnJpZGF0ZTo6IHllYXIoRGF0ZSksCiAgICAgICAgICAgICAgICBtb250aCA9IGx1YnJpZGF0ZSA6OiBtb250aChEYXRlKSwKICAgICAgICAgICAgICAgIGRheSA9IGx1YnJpZGF0ZSA6OiBkYXkoRGF0ZSkpCgpzdW1tYXJ5KGJkNykKYGBgCgojIEdlbmVyYXIgcmVncmVzacOzbiAKYGBge3J9CnJlZ3Jlc2lvbiA8LSBsbShXZWVrbHlfU2FsZXMgfiAgU3RvcmUgKyBJc0hvbGlkYXkgKyBEZXB0ICsgVHlwZSArIFNpemUgKyBUZW1wZXJhdHVyZSArIEZ1ZWxfUHJpY2UgKyBDUEkgKyBVbmVtcGxveW1lbnQgKyB3ZWVrX251bWJlciArIHllYXIgKyBtb250aCArIGRheSwgZGF0YT1iZDcpCnN1bW1hcnkocmVncmVzaW9uKQoKYGBgCgojIENvbmNsdXNpw7NuIApEdXJhbnRlIGxhIGVsYWJvcmFjacOzbiBkZSBlc3RlIGPDs2RpZ28gc2UgdHV2byBxdWUgcmVhbGl6YXIgdW5hIGxpbXBpZXphIGRlIGxhIGJhc2UgZGUgZGF0b3MsIGVuIGxhIGN1YWwgaWRlbnRpZmljYW1vcyBjYWRhIHZhcmlhYmxlIHF1ZSBmdWVyYSB1biBjYXJhY3RlciBwYXJhIHBvZGVyIGFkYXB0YXJsYSBwYXJhIGFzaSBwcm9jZWRlciBhIHJlYWxpemFyIHVuIG1vZGVsbyBwcmVkaWN0aXZvLiBBc2ltaXNtbywgZW4gbGEgbGltcGllemEgZGUgZGF0b3MsIHNlIGVsaW1pbmFyb24gbGFzIGNvbHVtbmFzIGVuIGxhcyBjdWFsZXMgaGFiaWEgZGF0b3MgaW5leGlzdGVudGVzLiAgCkVsIG1vZGVsbyBwcmVkaWN0aXZvIHNlIHJlYWxpesOzIHBhcmEgcG9kZXIgcHJlZGVjaXIgeSBwcm9ub3N0aWNhciBsYXMgdmVudGFzIHNlbWFuYWxlcyB5IGFzaSBwb2RlciB0b21hciBkZWNpc2lvbmVzIGFudGUgZXZlbnRvcyBkaWZlcmVudGVzIHkgY2FtYmlvcy4gCg==