Importar base de datos:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- read.csv("/Users/kikepablos/Documents/Development/escuela/concentracion_ai/modulo_6/data_sources/Walmart_Store_sales.csv")

Entender la base de datos:

summary(df)
##      Store        Date            Weekly_Sales      Holiday_Flag    
##  Min.   : 1   Length:6435        Min.   : 209986   Min.   :0.00000  
##  1st Qu.:12   Class :character   1st Qu.: 553350   1st Qu.:0.00000  
##  Median :23   Mode  :character   Median : 960746   Median :0.00000  
##  Mean   :23                      Mean   :1046965   Mean   :0.06993  
##  3rd Qu.:34                      3rd Qu.:1420159   3rd Qu.:0.00000  
##  Max.   :45                      Max.   :3818686   Max.   :1.00000  
##   Temperature       Fuel_Price         CPI         Unemployment   
##  Min.   : -2.06   Min.   :2.472   Min.   :126.1   Min.   : 3.879  
##  1st Qu.: 47.46   1st Qu.:2.933   1st Qu.:131.7   1st Qu.: 6.891  
##  Median : 62.67   Median :3.445   Median :182.6   Median : 7.874  
##  Mean   : 60.66   Mean   :3.359   Mean   :171.6   Mean   : 7.999  
##  3rd Qu.: 74.94   3rd Qu.:3.735   3rd Qu.:212.7   3rd Qu.: 8.622  
##  Max.   :100.14   Max.   :4.468   Max.   :227.2   Max.   :14.313
str(df)
## 'data.frame':    6435 obs. of  8 variables:
##  $ Store       : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Date        : chr  "05-02-2010" "12-02-2010" "19-02-2010" "26-02-2010" ...
##  $ Weekly_Sales: num  1643691 1641957 1611968 1409728 1554807 ...
##  $ Holiday_Flag: int  0 1 0 0 0 0 0 0 0 0 ...
##  $ Temperature : num  42.3 38.5 39.9 46.6 46.5 ...
##  $ Fuel_Price  : num  2.57 2.55 2.51 2.56 2.62 ...
##  $ CPI         : num  211 211 211 211 211 ...
##  $ Unemployment: num  8.11 8.11 8.11 8.11 8.11 ...
df$Date <- as.Date(df$Date, format = "%d-%m-%Y")
df$Month <- as.integer(format(df$Date, "%m"))
df$Day <- as.integer(format(df$Date, "%d"))
df$Year <- as.integer(format(df$Date, "%Y"))
df$WeekYear <- as.integer(format(df$Date, "%W"))
df$WeekDay <- as.integer(format(df$Date, "%u"))
head(df)
##   Store       Date Weekly_Sales Holiday_Flag Temperature Fuel_Price      CPI
## 1     1 2010-02-05      1643691            0       42.31      2.572 211.0964
## 2     1 2010-02-12      1641957            1       38.51      2.548 211.2422
## 3     1 2010-02-19      1611968            0       39.93      2.514 211.2891
## 4     1 2010-02-26      1409728            0       46.63      2.561 211.3196
## 5     1 2010-03-05      1554807            0       46.50      2.625 211.3501
## 6     1 2010-03-12      1439542            0       57.79      2.667 211.3806
##   Unemployment Month Day Year WeekYear WeekDay
## 1        8.106     2   5 2010        5       5
## 2        8.106     2  12 2010        6       5
## 3        8.106     2  19 2010        7       5
## 4        8.106     2  26 2010        8       5
## 5        8.106     3   5 2010        9       5
## 6        8.106     3  12 2010       10       5

Generar regresión lineal

df_ajustada <- df %>% select(-Store, -Date, -Fuel_Price, -Year:-Day)
regresion_ajustada <- lm(Weekly_Sales ~.,df_ajustada)
regresion_ajustada
## 
## Call:
## lm(formula = Weekly_Sales ~ ., data = df_ajustada)
## 
## Coefficients:
##  (Intercept)  Holiday_Flag   Temperature           CPI  Unemployment  
##      1593232         48021         -1467         -1497        -39957  
##        Month      WeekYear       WeekDay  
##        57686         -9921            NA
LS0tCnRpdGxlOiAid2FsbWFydCIKYXV0aG9yOiAiRW5yaXF1ZSBQYWJsb3MgQTAwODM1MDM3IgpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiCm91dHB1dDoKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICAgIHRoZW1lOiAic3BhY2VsYWIiCiAgICBoaWdobGlndGg6ICJrYXRlIgotLS0KCiFbXSgvVXNlcnMva2lrZXBhYmxvcy9Eb2N1bWVudHMvRGV2ZWxvcG1lbnQvZXNjdWVsYS9jb25jZW50cmFjaW9uX2FpL21vZHVsb182L2Fzc2V0cy93YWxtYXJ0LmpwZykKCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoZWNobyA9IFRSVUUpCmBgYAoKIyBJbXBvcnRhciBiYXNlIGRlIGRhdG9zOgpgYGB7ciB3YXJuaW5nPUZBTFNFfQpsaWJyYXJ5KGRwbHlyKQpkZiA8LSByZWFkLmNzdigiL1VzZXJzL2tpa2VwYWJsb3MvRG9jdW1lbnRzL0RldmVsb3BtZW50L2VzY3VlbGEvY29uY2VudHJhY2lvbl9haS9tb2R1bG9fNi9kYXRhX3NvdXJjZXMvV2FsbWFydF9TdG9yZV9zYWxlcy5jc3YiKQpgYGAKIyBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zOgpgYGB7cn0Kc3VtbWFyeShkZikKc3RyKGRmKQpkZiREYXRlIDwtIGFzLkRhdGUoZGYkRGF0ZSwgZm9ybWF0ID0gIiVkLSVtLSVZIikKZGYkTW9udGggPC0gYXMuaW50ZWdlcihmb3JtYXQoZGYkRGF0ZSwgIiVtIikpCmRmJERheSA8LSBhcy5pbnRlZ2VyKGZvcm1hdChkZiREYXRlLCAiJWQiKSkKZGYkWWVhciA8LSBhcy5pbnRlZ2VyKGZvcm1hdChkZiREYXRlLCAiJVkiKSkKZGYkV2Vla1llYXIgPC0gYXMuaW50ZWdlcihmb3JtYXQoZGYkRGF0ZSwgIiVXIikpCmRmJFdlZWtEYXkgPC0gYXMuaW50ZWdlcihmb3JtYXQoZGYkRGF0ZSwgIiV1IikpCmhlYWQoZGYpCmBgYAojIEdlbmVyYXIgcmVncmVzacOzbiBsaW5lYWwKYGBge3J9CmRmX2FqdXN0YWRhIDwtIGRmICU+JSBzZWxlY3QoLVN0b3JlLCAtRGF0ZSwgLUZ1ZWxfUHJpY2UsIC1ZZWFyOi1EYXkpCnJlZ3Jlc2lvbl9hanVzdGFkYSA8LSBsbShXZWVrbHlfU2FsZXMgfi4sZGZfYWp1c3RhZGEpCnJlZ3Jlc2lvbl9hanVzdGFkYQpgYGAKCgo=