library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr) #Para leer archivo .csv
salesbike<-read_csv("Sales.csv")
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 113036 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): Date, Month, Age_Group, Customer_Gender, Country, State, Product_Ca...
## dbl (9): Day, Year, Customer_Age, Order_Quantity, Unit_Cost, Unit_Price, Pro...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(salesbike,10) #Muestra el encabezado de la data.frame por listas de diez filas
## # A tibble: 10 × 18
## Date Day Month Year Customer_Age Age_Group Customer_Gender Country State
## <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 2013-… 26 Nove… 2013 19 Youth (<… M Canada Brit…
## 2 2015-… 26 Nove… 2015 19 Youth (<… M Canada Brit…
## 3 2014-… 23 March 2014 49 Adults (… M Austra… New …
## 4 2016-… 23 March 2016 49 Adults (… M Austra… New …
## 5 2014-… 15 May 2014 47 Adults (… F Austra… New …
## 6 2016-… 15 May 2016 47 Adults (… F Austra… New …
## 7 2014-… 22 May 2014 47 Adults (… F Austra… Vict…
## 8 2016-… 22 May 2016 47 Adults (… F Austra… Vict…
## 9 2014-… 22 Febr… 2014 35 Adults (… M Austra… Vict…
## 10 2016-… 22 Febr… 2016 35 Adults (… M Austra… Vict…
## # ℹ 9 more variables: Product_Category <chr>, Sub_Category <chr>,
## # Product <chr>, Order_Quantity <dbl>, Unit_Cost <dbl>, Unit_Price <dbl>,
## # Profit <dbl>, Cost <dbl>, Revenue <dbl>
salesbike%>%head(10)
## # A tibble: 10 × 18
## Date Day Month Year Customer_Age Age_Group Customer_Gender Country State
## <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 2013-… 26 Nove… 2013 19 Youth (<… M Canada Brit…
## 2 2015-… 26 Nove… 2015 19 Youth (<… M Canada Brit…
## 3 2014-… 23 March 2014 49 Adults (… M Austra… New …
## 4 2016-… 23 March 2016 49 Adults (… M Austra… New …
## 5 2014-… 15 May 2014 47 Adults (… F Austra… New …
## 6 2016-… 15 May 2016 47 Adults (… F Austra… New …
## 7 2014-… 22 May 2014 47 Adults (… F Austra… Vict…
## 8 2016-… 22 May 2016 47 Adults (… F Austra… Vict…
## 9 2014-… 22 Febr… 2014 35 Adults (… M Austra… Vict…
## 10 2016-… 22 Febr… 2016 35 Adults (… M Austra… Vict…
## # ℹ 9 more variables: Product_Category <chr>, Sub_Category <chr>,
## # Product <chr>, Order_Quantity <dbl>, Unit_Cost <dbl>, Unit_Price <dbl>,
## # Profit <dbl>, Cost <dbl>, Revenue <dbl>
10%>%head(salesbike, .)
## # A tibble: 10 × 18
## Date Day Month Year Customer_Age Age_Group Customer_Gender Country State
## <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 2013-… 26 Nove… 2013 19 Youth (<… M Canada Brit…
## 2 2015-… 26 Nove… 2015 19 Youth (<… M Canada Brit…
## 3 2014-… 23 March 2014 49 Adults (… M Austra… New …
## 4 2016-… 23 March 2016 49 Adults (… M Austra… New …
## 5 2014-… 15 May 2014 47 Adults (… F Austra… New …
## 6 2016-… 15 May 2016 47 Adults (… F Austra… New …
## 7 2014-… 22 May 2014 47 Adults (… F Austra… Vict…
## 8 2016-… 22 May 2016 47 Adults (… F Austra… Vict…
## 9 2014-… 22 Febr… 2014 35 Adults (… M Austra… Vict…
## 10 2016-… 22 Febr… 2016 35 Adults (… M Austra… Vict…
## # ℹ 9 more variables: Product_Category <chr>, Sub_Category <chr>,
## # Product <chr>, Order_Quantity <dbl>, Unit_Cost <dbl>, Unit_Price <dbl>,
## # Profit <dbl>, Cost <dbl>, Revenue <dbl>
salesbike %>%
select(Date, Customer_Age, Customer_Gender, Country, 'Unit_Price')
## # A tibble: 113,036 × 5
## Date Customer_Age Customer_Gender Country Unit_Price
## <chr> <dbl> <chr> <chr> <dbl>
## 1 2013-11-26 19 M Canada 120
## 2 2015-11-26 19 M Canada 120
## 3 2014-03-23 49 M Australia 120
## 4 2016-03-23 49 M Australia 120
## 5 2014-05-15 47 F Australia 120
## 6 2016-05-15 47 F Australia 120
## 7 2014-05-22 47 F Australia 120
## 8 2016-05-22 47 F Australia 120
## 9 2014-02-22 35 M Australia 120
## 10 2016-02-22 35 M Australia 120
## # ℹ 113,026 more rows
salesbike %>%
select(Date:Country, State='Unit_Price')
## # A tibble: 113,036 × 9
## Date Day Month Year Customer_Age Age_Group Customer_Gender Country State
## <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr> <dbl>
## 1 2013-… 26 Nove… 2013 19 Youth (<… M Canada 120
## 2 2015-… 26 Nove… 2015 19 Youth (<… M Canada 120
## 3 2014-… 23 March 2014 49 Adults (… M Austra… 120
## 4 2016-… 23 March 2016 49 Adults (… M Austra… 120
## 5 2014-… 15 May 2014 47 Adults (… F Austra… 120
## 6 2016-… 15 May 2016 47 Adults (… F Austra… 120
## 7 2014-… 22 May 2014 47 Adults (… F Austra… 120
## 8 2016-… 22 May 2016 47 Adults (… F Austra… 120
## 9 2014-… 22 Febr… 2014 35 Adults (… M Austra… 120
## 10 2016-… 22 Febr… 2016 35 Adults (… M Austra… 120
## # ℹ 113,026 more rows
salesbike %>%
select(-'Unit_Cost', -'Unit_Price')
## # A tibble: 113,036 × 16
## Date Day Month Year Customer_Age Age_Group Customer_Gender Country State
## <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 2013-… 26 Nove… 2013 19 Youth (<… M Canada Brit…
## 2 2015-… 26 Nove… 2015 19 Youth (<… M Canada Brit…
## 3 2014-… 23 March 2014 49 Adults (… M Austra… New …
## 4 2016-… 23 March 2016 49 Adults (… M Austra… New …
## 5 2014-… 15 May 2014 47 Adults (… F Austra… New …
## 6 2016-… 15 May 2016 47 Adults (… F Austra… New …
## 7 2014-… 22 May 2014 47 Adults (… F Austra… Vict…
## 8 2016-… 22 May 2016 47 Adults (… F Austra… Vict…
## 9 2014-… 22 Febr… 2014 35 Adults (… M Austra… Vict…
## 10 2016-… 22 Febr… 2016 35 Adults (… M Austra… Vict…
## # ℹ 113,026 more rows
## # ℹ 7 more variables: Product_Category <chr>, Sub_Category <chr>,
## # Product <chr>, Order_Quantity <dbl>, Profit <dbl>, Cost <dbl>,
## # Revenue <dbl>
salesbike %>%
select(Date:Product, Unit_Price='Unit_Price') %>%
mutate(is_collab = grepl('Bike', Sub_Category) & grepl('Hitch Rack', Product)) %>%
select(Sub_Category,Product, is_collab, everything()) #La función grepl() se utiliza para buscar patrones en texto, y en este caso, está buscando la presencia de la palabra 'Featuring' en la columna "Product".
## # A tibble: 113,036 × 14
## Sub_Category Product is_collab Date Day Month Year Customer_Age Age_Group
## <chr> <chr> <lgl> <chr> <dbl> <chr> <dbl> <dbl> <chr>
## 1 Bike Racks Hitch … TRUE 2013… 26 Nove… 2013 19 Youth (<…
## 2 Bike Racks Hitch … TRUE 2015… 26 Nove… 2015 19 Youth (<…
## 3 Bike Racks Hitch … TRUE 2014… 23 March 2014 49 Adults (…
## 4 Bike Racks Hitch … TRUE 2016… 23 March 2016 49 Adults (…
## 5 Bike Racks Hitch … TRUE 2014… 15 May 2014 47 Adults (…
## 6 Bike Racks Hitch … TRUE 2016… 15 May 2016 47 Adults (…
## 7 Bike Racks Hitch … TRUE 2014… 22 May 2014 47 Adults (…
## 8 Bike Racks Hitch … TRUE 2016… 22 May 2016 47 Adults (…
## 9 Bike Racks Hitch … TRUE 2014… 22 Febr… 2014 35 Adults (…
## 10 Bike Racks Hitch … TRUE 2016… 22 Febr… 2016 35 Adults (…
## # ℹ 113,026 more rows
## # ℹ 5 more variables: Customer_Gender <chr>, Country <chr>, State <chr>,
## # Product_Category <chr>, Unit_Price <dbl>
salesbike %>%
select(Date, Customer_Age, Customer_Gender, Country, Unit_Price='Unit_Price') %>%
filter(Unit_Price >= 110, Customer_Age >= '30' & Customer_Gender == 'F')
## # A tibble: 372 × 5
## Date Customer_Age Customer_Gender Country Unit_Price
## <chr> <dbl> <chr> <chr> <dbl>
## 1 2014-05-15 47 F Australia 120
## 2 2016-05-15 47 F Australia 120
## 3 2014-05-22 47 F Australia 120
## 4 2016-05-22 47 F Australia 120
## 5 2013-07-30 32 F Australia 120
## 6 2015-07-30 32 F Australia 120
## 7 2014-01-02 48 F Canada 120
## 8 2016-01-02 48 F Canada 120
## 9 2014-03-13 48 F Canada 120
## 10 2016-03-13 48 F Canada 120
## # ℹ 362 more rows
salesbike %>%
select(Date:Customer_Gender, Unit_Price='Unit_Price') %>%
filter(Customer_Gender == 'M')
## # A tibble: 32,108 × 8
## Date Day Month Year Customer_Age Age_Group Customer_Gender Unit_Price
## <chr> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2013-11-… 26 Nove… 2013 19 Youth (<… M 120
## 2 2015-11-… 26 Nove… 2015 19 Youth (<… M 120
## 3 2014-03-… 23 March 2014 49 Adults (… M 120
## 4 2016-03-… 23 March 2016 49 Adults (… M 120
## 5 2014-02-… 22 Febr… 2014 35 Adults (… M 120
## 6 2016-02-… 22 Febr… 2016 35 Adults (… M 120
## 7 2013-07-… 15 July 2013 34 Young Ad… M 120
## 8 2015-07-… 15 July 2015 34 Young Ad… M 120
## 9 2013-08-… 2 Augu… 2013 29 Young Ad… M 120
## 10 2015-08-… 2 Augu… 2015 29 Young Ad… M 120
## # ℹ 32,098 more rows
distinct <- salesbike %>%
select(Date:Customer_Gender, Unit_Price='Unit_Price') %>%
filter(Customer_Gender == 'M') %>%
distinct(Customer_Age)
salesbike %>%
select(Date:Customer_Gender, Product, Unit_Price='Unit_Price') %>%
filter(Customer_Gender == 'M'| Customer_Gender =='F') %>%
group_by(Product) %>% #solo funciono con esta variable group_by
summarise(total_Unit_Price = mean(Unit_Price))
## # A tibble: 20 × 2
## Product total_Unit_Price
## <chr> <dbl>
## 1 AWC Logo Cap 9
## 2 All-Purpose Bike Stand 159
## 3 Bike Wash - Dissolver 8
## 4 Fender Set - Mountain 22
## 5 HL Mountain Tire 35
## 6 HL Road Tire 33
## 7 Hitch Rack - 4-Bike 120
## 8 Hydration Pack - 70 oz. 55
## 9 LL Mountain Tire 25
## 10 LL Road Tire 21
## 11 ML Mountain Tire 30
## 12 ML Road Tire 25
## 13 Mountain Bottle Cage 10
## 14 Mountain Tire Tube 5
## 15 Patch Kit/8 Patches 2
## 16 Road Bottle Cage 9
## 17 Road Tire Tube 4
## 18 Touring Tire 29
## 19 Touring Tire Tube 5
## 20 Water Bottle - 30 oz. 5
salesbike %>%
select(Date:Customer_Gender, Unit_Price='Unit_Price') %>%
filter(Customer_Gender == 'M') %>%
group_by(Year) %>%
summarise(total_Unit_Price = mean(Unit_Price)) %>%
arrange(desc(total_Unit_Price), Year) %>%
head(10)
## # A tibble: 4 × 2
## Year total_Unit_Price
## <dbl> <dbl>
## 1 2013 13.1
## 2 2015 13.1
## 3 2014 12.9
## 4 2016 12.9
salesbike %>%
select(Date:Customer_Gender, Unit_Price='Unit_Price') %>%
count(Customer_Gender) %>%
arrange(desc(n))
## # A tibble: 3 × 2
## Customer_Gender n
## <chr> <int>
## 1 <NA> 50714
## 2 M 32108
## 3 F 30214