#WALMART SALES ##El dataset walmart sales nos da información relevante sobre las ventas en walmart según varios parámetros como la temperatura de la región, el costo de combustible, tasa de desempleo y demás variables que nos ayudan a identificar patrones que puedan tener influencia en las ventas.
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr) ##Con esta función podemos leer el archivo csv que acabamos de cargar
Walmart_Sales<-read_csv("Walmart_Sales.csv") #ready_csv me ayuda a ver aspectos importantes
## Rows: 6435 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Date
## dbl (7): Store, Weekly_Sales, Holiday_Flag, Temperature, Fuel_Price, CPI, Un...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
especificacion_columnas <- spec(Walmart_Sales)
print(especificacion_columnas)
## cols(
## Store = col_double(),
## Date = col_character(),
## Weekly_Sales = col_double(),
## Holiday_Flag = col_double(),
## Temperature = col_double(),
## Fuel_Price = col_double(),
## CPI = col_double(),
## Unemployment = col_double()
## )
head(Walmart_Sales,5)
## # A tibble: 5 × 8
## Store Date Weekly_Sales Holiday_Flag Temperature Fuel_Price CPI
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 05-02-2010 1643691. 0 42.3 2.57 211.
## 2 1 12-02-2010 1641957. 1 38.5 2.55 211.
## 3 1 19-02-2010 1611968. 0 39.9 2.51 211.
## 4 1 26-02-2010 1409728. 0 46.6 2.56 211.
## 5 1 05-03-2010 1554807. 0 46.5 2.62 211.
## # ℹ 1 more variable: Unemployment <dbl>
Walmart_Sales %>% head(10)
## # A tibble: 10 × 8
## Store Date Weekly_Sales Holiday_Flag Temperature Fuel_Price CPI
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 05-02-2010 1643691. 0 42.3 2.57 211.
## 2 1 12-02-2010 1641957. 1 38.5 2.55 211.
## 3 1 19-02-2010 1611968. 0 39.9 2.51 211.
## 4 1 26-02-2010 1409728. 0 46.6 2.56 211.
## 5 1 05-03-2010 1554807. 0 46.5 2.62 211.
## 6 1 12-03-2010 1439542. 0 57.8 2.67 211.
## 7 1 19-03-2010 1472516. 0 54.6 2.72 211.
## 8 1 26-03-2010 1404430. 0 51.4 2.73 211.
## 9 1 02-04-2010 1594968. 0 62.3 2.72 211.
## 10 1 09-04-2010 1545419. 0 65.9 2.77 211.
## # ℹ 1 more variable: Unemployment <dbl>
10 %>% head(Walmart_Sales,.)
## # A tibble: 10 × 8
## Store Date Weekly_Sales Holiday_Flag Temperature Fuel_Price CPI
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 05-02-2010 1643691. 0 42.3 2.57 211.
## 2 1 12-02-2010 1641957. 1 38.5 2.55 211.
## 3 1 19-02-2010 1611968. 0 39.9 2.51 211.
## 4 1 26-02-2010 1409728. 0 46.6 2.56 211.
## 5 1 05-03-2010 1554807. 0 46.5 2.62 211.
## 6 1 12-03-2010 1439542. 0 57.8 2.67 211.
## 7 1 19-03-2010 1472516. 0 54.6 2.72 211.
## 8 1 26-03-2010 1404430. 0 51.4 2.73 211.
## 9 1 02-04-2010 1594968. 0 62.3 2.72 211.
## 10 1 09-04-2010 1545419. 0 65.9 2.77 211.
## # ℹ 1 more variable: Unemployment <dbl>
#SELECT
Walmart_Sales %>%
select(Date, Weekly_Sales,Temperature,Fuel_Price)
## # A tibble: 6,435 × 4
## Date Weekly_Sales Temperature Fuel_Price
## <chr> <dbl> <dbl> <dbl>
## 1 05-02-2010 1643691. 42.3 2.57
## 2 12-02-2010 1641957. 38.5 2.55
## 3 19-02-2010 1611968. 39.9 2.51
## 4 26-02-2010 1409728. 46.6 2.56
## 5 05-03-2010 1554807. 46.5 2.62
## 6 12-03-2010 1439542. 57.8 2.67
## 7 19-03-2010 1472516. 54.6 2.72
## 8 26-03-2010 1404430. 51.4 2.73
## 9 02-04-2010 1594968. 62.3 2.72
## 10 09-04-2010 1545419. 65.9 2.77
## # ℹ 6,425 more rows
Walmart_Sales %>%
select(-'Fuel_Price')
## # A tibble: 6,435 × 7
## Store Date Weekly_Sales Holiday_Flag Temperature CPI Unemployment
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 05-02-2010 1643691. 0 42.3 211. 8.11
## 2 1 12-02-2010 1641957. 1 38.5 211. 8.11
## 3 1 19-02-2010 1611968. 0 39.9 211. 8.11
## 4 1 26-02-2010 1409728. 0 46.6 211. 8.11
## 5 1 05-03-2010 1554807. 0 46.5 211. 8.11
## 6 1 12-03-2010 1439542. 0 57.8 211. 8.11
## 7 1 19-03-2010 1472516. 0 54.6 211. 8.11
## 8 1 26-03-2010 1404430. 0 51.4 211. 8.11
## 9 1 02-04-2010 1594968. 0 62.3 211. 7.81
## 10 1 09-04-2010 1545419. 0 65.9 211. 7.81
## # ℹ 6,425 more rows
#MUTATE ##Con esta función puedo agregar o modificar columnas.
Walmart_Sales %>%
select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
mutate(Sales_store1 = grepl(1, Store) & Weekly_Sales > 20000)%>%
select(Date, Weekly_Sales,Temperature,Fuel_Price,Store,Sales_store1)
## # A tibble: 6,435 × 6
## Date Weekly_Sales Temperature Fuel_Price Store Sales_store1
## <chr> <dbl> <dbl> <dbl> <dbl> <lgl>
## 1 05-02-2010 1643691. 42.3 2.57 1 TRUE
## 2 12-02-2010 1641957. 38.5 2.55 1 TRUE
## 3 19-02-2010 1611968. 39.9 2.51 1 TRUE
## 4 26-02-2010 1409728. 46.6 2.56 1 TRUE
## 5 05-03-2010 1554807. 46.5 2.62 1 TRUE
## 6 12-03-2010 1439542. 57.8 2.67 1 TRUE
## 7 19-03-2010 1472516. 54.6 2.72 1 TRUE
## 8 26-03-2010 1404430. 51.4 2.73 1 TRUE
## 9 02-04-2010 1594968. 62.3 2.72 1 TRUE
## 10 09-04-2010 1545419. 65.9 2.77 1 TRUE
## # ℹ 6,425 more rows
#FILTER
Walmart_Sales %>%
select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
filter(Weekly_Sales>25000,Store==1 | Store==3)
## # A tibble: 286 × 5
## Date Weekly_Sales Temperature Fuel_Price Store
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 05-02-2010 1643691. 42.3 2.57 1
## 2 12-02-2010 1641957. 38.5 2.55 1
## 3 19-02-2010 1611968. 39.9 2.51 1
## 4 26-02-2010 1409728. 46.6 2.56 1
## 5 05-03-2010 1554807. 46.5 2.62 1
## 6 12-03-2010 1439542. 57.8 2.67 1
## 7 19-03-2010 1472516. 54.6 2.72 1
## 8 26-03-2010 1404430. 51.4 2.73 1
## 9 02-04-2010 1594968. 62.3 2.72 1
## 10 09-04-2010 1545419. 65.9 2.77 1
## # ℹ 276 more rows
#Distinc
Walmart_Sales %>%
select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
filter(Store==1)%>%
distinct(Date)
## # A tibble: 143 × 1
## Date
## <chr>
## 1 05-02-2010
## 2 12-02-2010
## 3 19-02-2010
## 4 26-02-2010
## 5 05-03-2010
## 6 12-03-2010
## 7 19-03-2010
## 8 26-03-2010
## 9 02-04-2010
## 10 09-04-2010
## # ℹ 133 more rows
#Group_by & Summarise
Walmart_Sales %>%
select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
filter(Store==1)%>%
group_by(Temperature)%>%
summarise(Weekly_Sales=sum(Weekly_Sales))
## # A tibble: 143 × 2
## Temperature Weekly_Sales
## <dbl> <dbl>
## 1 35.4 1391014.
## 2 36.4 1649615.
## 3 38.5 1641957.
## 4 39.9 1611968.
## 5 42.3 1606630.
## 6 42.3 1643691.
## 7 43.8 1316899.
## 8 43.9 1799682.
## 9 44.0 1327405.
## 10 44.6 1497463.
## # ℹ 133 more rows
#Arrange
Walmart_Sales %>%
select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
filter(Store==1)%>%
group_by(Temperature)%>%
summarise(Weekly_Sales=sum(Weekly_Sales))%>%
arrange(desc(Weekly_Sales))%>%
head(10)
## # A tibble: 10 × 2
## Temperature Weekly_Sales
## <dbl> <dbl>
## 1 52.3 2387950.
## 2 48.0 2270189.
## 3 60.1 2033321.
## 4 64.5 1955624.
## 5 70.4 1899677.
## 6 49.8 1891035.
## 7 51.6 1881177.
## 8 45.3 1819870
## 9 48.0 1802477.
## 10 43.9 1799682.
#COUNT
Walmart_Sales %>%
select(Date, Weekly_Sales,Temperature,Fuel_Price,Store)%>%
count(Fuel_Price)%>%
head(10)%>%
arrange(desc(n))
## # A tibble: 10 × 2
## Fuel_Price n
## <dbl> <int>
## 1 2.51 14
## 2 2.55 14
## 3 2.54 2
## 4 2.47 1
## 5 2.51 1
## 6 2.52 1
## 7 2.53 1
## 8 2.54 1
## 9 2.54 1
## 10 2.54 1