#WALMART SALES ##El dataset walmart sales nos da información relevante sobre las ventas en walmart según varios parámetros como la temperatura de la región, el costo de combustible, tasa de desempleo y demás variables que nos ayudan a identificar patrones que puedan tener influencia en las ventas.

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr) ##Con esta función podemos leer el archivo csv que acabamos de cargar
Walmart_Sales<-read_csv("Walmart_Sales.csv") #ready_csv me ayuda a ver aspectos importantes 
## Rows: 6435 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (7): Store, Weekly_Sales, Holiday_Flag, Temperature, Fuel_Price, CPI, Un...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
especificacion_columnas <- spec(Walmart_Sales)
print(especificacion_columnas)
## cols(
##   Store = col_double(),
##   Date = col_character(),
##   Weekly_Sales = col_double(),
##   Holiday_Flag = col_double(),
##   Temperature = col_double(),
##   Fuel_Price = col_double(),
##   CPI = col_double(),
##   Unemployment = col_double()
## )
head(Walmart_Sales,5)
## # A tibble: 5 × 8
##   Store Date       Weekly_Sales Holiday_Flag Temperature Fuel_Price   CPI
##   <dbl> <chr>             <dbl>        <dbl>       <dbl>      <dbl> <dbl>
## 1     1 05-02-2010     1643691.            0        42.3       2.57  211.
## 2     1 12-02-2010     1641957.            1        38.5       2.55  211.
## 3     1 19-02-2010     1611968.            0        39.9       2.51  211.
## 4     1 26-02-2010     1409728.            0        46.6       2.56  211.
## 5     1 05-03-2010     1554807.            0        46.5       2.62  211.
## # ℹ 1 more variable: Unemployment <dbl>
Walmart_Sales %>% head(10)
## # A tibble: 10 × 8
##    Store Date       Weekly_Sales Holiday_Flag Temperature Fuel_Price   CPI
##    <dbl> <chr>             <dbl>        <dbl>       <dbl>      <dbl> <dbl>
##  1     1 05-02-2010     1643691.            0        42.3       2.57  211.
##  2     1 12-02-2010     1641957.            1        38.5       2.55  211.
##  3     1 19-02-2010     1611968.            0        39.9       2.51  211.
##  4     1 26-02-2010     1409728.            0        46.6       2.56  211.
##  5     1 05-03-2010     1554807.            0        46.5       2.62  211.
##  6     1 12-03-2010     1439542.            0        57.8       2.67  211.
##  7     1 19-03-2010     1472516.            0        54.6       2.72  211.
##  8     1 26-03-2010     1404430.            0        51.4       2.73  211.
##  9     1 02-04-2010     1594968.            0        62.3       2.72  211.
## 10     1 09-04-2010     1545419.            0        65.9       2.77  211.
## # ℹ 1 more variable: Unemployment <dbl>
10 %>% head(Walmart_Sales,.)
## # A tibble: 10 × 8
##    Store Date       Weekly_Sales Holiday_Flag Temperature Fuel_Price   CPI
##    <dbl> <chr>             <dbl>        <dbl>       <dbl>      <dbl> <dbl>
##  1     1 05-02-2010     1643691.            0        42.3       2.57  211.
##  2     1 12-02-2010     1641957.            1        38.5       2.55  211.
##  3     1 19-02-2010     1611968.            0        39.9       2.51  211.
##  4     1 26-02-2010     1409728.            0        46.6       2.56  211.
##  5     1 05-03-2010     1554807.            0        46.5       2.62  211.
##  6     1 12-03-2010     1439542.            0        57.8       2.67  211.
##  7     1 19-03-2010     1472516.            0        54.6       2.72  211.
##  8     1 26-03-2010     1404430.            0        51.4       2.73  211.
##  9     1 02-04-2010     1594968.            0        62.3       2.72  211.
## 10     1 09-04-2010     1545419.            0        65.9       2.77  211.
## # ℹ 1 more variable: Unemployment <dbl>
#SELECT
Walmart_Sales %>%
  select(Date, Weekly_Sales,Temperature,Fuel_Price)
## # A tibble: 6,435 × 4
##    Date       Weekly_Sales Temperature Fuel_Price
##    <chr>             <dbl>       <dbl>      <dbl>
##  1 05-02-2010     1643691.        42.3       2.57
##  2 12-02-2010     1641957.        38.5       2.55
##  3 19-02-2010     1611968.        39.9       2.51
##  4 26-02-2010     1409728.        46.6       2.56
##  5 05-03-2010     1554807.        46.5       2.62
##  6 12-03-2010     1439542.        57.8       2.67
##  7 19-03-2010     1472516.        54.6       2.72
##  8 26-03-2010     1404430.        51.4       2.73
##  9 02-04-2010     1594968.        62.3       2.72
## 10 09-04-2010     1545419.        65.9       2.77
## # ℹ 6,425 more rows
Walmart_Sales %>%
select(-'Fuel_Price')
## # A tibble: 6,435 × 7
##    Store Date       Weekly_Sales Holiday_Flag Temperature   CPI Unemployment
##    <dbl> <chr>             <dbl>        <dbl>       <dbl> <dbl>        <dbl>
##  1     1 05-02-2010     1643691.            0        42.3  211.         8.11
##  2     1 12-02-2010     1641957.            1        38.5  211.         8.11
##  3     1 19-02-2010     1611968.            0        39.9  211.         8.11
##  4     1 26-02-2010     1409728.            0        46.6  211.         8.11
##  5     1 05-03-2010     1554807.            0        46.5  211.         8.11
##  6     1 12-03-2010     1439542.            0        57.8  211.         8.11
##  7     1 19-03-2010     1472516.            0        54.6  211.         8.11
##  8     1 26-03-2010     1404430.            0        51.4  211.         8.11
##  9     1 02-04-2010     1594968.            0        62.3  211.         7.81
## 10     1 09-04-2010     1545419.            0        65.9  211.         7.81
## # ℹ 6,425 more rows

#MUTATE ##Con esta función puedo agregar o modificar columnas.

Walmart_Sales %>%
  select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
mutate(Sales_store1 = grepl(1, Store) & Weekly_Sales > 20000)%>%
select(Date, Weekly_Sales,Temperature,Fuel_Price,Store,Sales_store1)
## # A tibble: 6,435 × 6
##    Date       Weekly_Sales Temperature Fuel_Price Store Sales_store1
##    <chr>             <dbl>       <dbl>      <dbl> <dbl> <lgl>       
##  1 05-02-2010     1643691.        42.3       2.57     1 TRUE        
##  2 12-02-2010     1641957.        38.5       2.55     1 TRUE        
##  3 19-02-2010     1611968.        39.9       2.51     1 TRUE        
##  4 26-02-2010     1409728.        46.6       2.56     1 TRUE        
##  5 05-03-2010     1554807.        46.5       2.62     1 TRUE        
##  6 12-03-2010     1439542.        57.8       2.67     1 TRUE        
##  7 19-03-2010     1472516.        54.6       2.72     1 TRUE        
##  8 26-03-2010     1404430.        51.4       2.73     1 TRUE        
##  9 02-04-2010     1594968.        62.3       2.72     1 TRUE        
## 10 09-04-2010     1545419.        65.9       2.77     1 TRUE        
## # ℹ 6,425 more rows

#FILTER

Walmart_Sales %>%
  select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
  filter(Weekly_Sales>25000,Store==1 | Store==3)
## # A tibble: 286 × 5
##    Date       Weekly_Sales Temperature Fuel_Price Store
##    <chr>             <dbl>       <dbl>      <dbl> <dbl>
##  1 05-02-2010     1643691.        42.3       2.57     1
##  2 12-02-2010     1641957.        38.5       2.55     1
##  3 19-02-2010     1611968.        39.9       2.51     1
##  4 26-02-2010     1409728.        46.6       2.56     1
##  5 05-03-2010     1554807.        46.5       2.62     1
##  6 12-03-2010     1439542.        57.8       2.67     1
##  7 19-03-2010     1472516.        54.6       2.72     1
##  8 26-03-2010     1404430.        51.4       2.73     1
##  9 02-04-2010     1594968.        62.3       2.72     1
## 10 09-04-2010     1545419.        65.9       2.77     1
## # ℹ 276 more rows

#Distinc

Walmart_Sales %>%
  select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
  filter(Store==1)%>%
  distinct(Date)
## # A tibble: 143 × 1
##    Date      
##    <chr>     
##  1 05-02-2010
##  2 12-02-2010
##  3 19-02-2010
##  4 26-02-2010
##  5 05-03-2010
##  6 12-03-2010
##  7 19-03-2010
##  8 26-03-2010
##  9 02-04-2010
## 10 09-04-2010
## # ℹ 133 more rows

#Group_by & Summarise

Walmart_Sales %>%
  select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
  filter(Store==1)%>%
 group_by(Temperature)%>%
summarise(Weekly_Sales=sum(Weekly_Sales))
## # A tibble: 143 × 2
##    Temperature Weekly_Sales
##          <dbl>        <dbl>
##  1        35.4     1391014.
##  2        36.4     1649615.
##  3        38.5     1641957.
##  4        39.9     1611968.
##  5        42.3     1606630.
##  6        42.3     1643691.
##  7        43.8     1316899.
##  8        43.9     1799682.
##  9        44.0     1327405.
## 10        44.6     1497463.
## # ℹ 133 more rows

#Arrange

Walmart_Sales %>%
  select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
  filter(Store==1)%>%
 group_by(Temperature)%>%
summarise(Weekly_Sales=sum(Weekly_Sales))%>%
  arrange(desc(Weekly_Sales))%>%
head(10)
## # A tibble: 10 × 2
##    Temperature Weekly_Sales
##          <dbl>        <dbl>
##  1        52.3     2387950.
##  2        48.0     2270189.
##  3        60.1     2033321.
##  4        64.5     1955624.
##  5        70.4     1899677.
##  6        49.8     1891035.
##  7        51.6     1881177.
##  8        45.3     1819870 
##  9        48.0     1802477.
## 10        43.9     1799682.

#COUNT

Walmart_Sales %>%
  select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
  count(Fuel_Price)%>%
  head(10)%>%
arrange(desc(n))
## # A tibble: 10 × 2
##    Fuel_Price     n
##         <dbl> <int>
##  1       2.51    14
##  2       2.55    14
##  3       2.54     2
##  4       2.47     1
##  5       2.51     1
##  6       2.52     1
##  7       2.53     1
##  8       2.54     1
##  9       2.54     1
## 10       2.54     1