hw_5

# Load the required library for reading Excel files
library(readxl)

# Read the data from the Excel file
bike_orderlines <- read_excel("bike_orderlines.xlsx")

# Find unique categories for category_1
unique_categories_1 <- unique(bike_orderlines$category_1)

# Find unique categories for category_2
unique_categories_2 <- unique(bike_orderlines$category_2)

# Find unique categories for frame_material
unique_frame_materials <- unique(bike_orderlines$frame_material)

# Print the unique categories
unique_categories_1

## [1] "Mountain" "Road"

unique_categories_2

## [1] "Over Mountain"      "Trail"              "Elite Road"        
## [4] "Endurance Road"     "Sport"              "Cross Country Race"
## [7] "Cyclocross"         "Triathalon"         "Fat Bike"

unique_frame_materials

## [1] "Carbon"   "Aluminum"

# Load the required library for reading Excel files
library(readxl)

# Read the data from the Excel file
bike_orderlines <- read_excel("bike_orderlines.xlsx")

# Calculate the sales for each product category
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

# Primary Category (category_1)
primary_category_sales <- bike_orderlines %>%
  group_by('Primary Category' = category_1) %>%
  summarise(Sales = sum(total_price))

# Secondary Category (category_2)
secondary_category_sales <- bike_orderlines %>%
  group_by('Secondary Category' = category_2) %>%
  summarise(Sales = sum(total_price))

# Frame Material (frame_material)
frame_material_sales <- bike_orderlines %>%
  group_by('Frame Material' = frame_material) %>%
  summarise(Sales = sum(total_price))

# Find the product categories with the most sales
primary_max_sales <- primary_category_sales %>%
  filter(Sales == max(Sales))

secondary_max_sales <- secondary_category_sales %>%
  filter(Sales == max(Sales))

frame_material_max_sales <- frame_material_sales %>%
  filter(Sales == max(Sales))

# Print the product categories with the most sales
primary_max_sales

## # A tibble: 1 × 2
##   `Primary Category`    Sales
##   <chr>                 <dbl>
## 1 Mountain           39154735

secondary_max_sales

## # A tibble: 1 × 2
##   `Secondary Category`    Sales
##   <chr>                   <dbl>
## 1 Cross Country Race   19224630

frame_material_max_sales

## # A tibble: 1 × 2
##   `Frame Material`    Sales
##   <chr>               <dbl>
## 1 Carbon           52940540

# Load the required libraries
library(readxl)
library(dplyr)

# Read the data from the Excel file
bike_orderlines <- read_excel("bike_orderlines.xlsx")

# Group the data by primary and secondary categories and summarize the frame materials
combinations_summary <- bike_orderlines %>%
  group_by('Primary Category' = category_1, 'Secondary Category' = category_2) %>%
  summarise(Aluminum = sum(ifelse(frame_material == 'Aluminum', total_price, 0)),
            Carbon = sum(ifelse(frame_material == 'Carbon', total_price, 0)),
            'Total Sales' = sum(total_price))

## `summarise()` has grouped output by 'Primary Category'. You can override using
## the `.groups` argument.

# Check if both Aluminum and Carbon are present in each combination
combinations_summary <- combinations_summary %>%
  mutate(Contains_Aluminum_and_Carbon = Aluminum > 0 & Carbon > 0)

# Print the summary
combinations_summary

## # A tibble: 9 × 6
## # Groups:   Primary Category [2]
##   `Primary Category` `Secondary Category` Aluminum   Carbon `Total Sales`
##   <chr>              <chr>                   <dbl>    <dbl>         <dbl>
## 1 Mountain           Cross Country Race    3318560 15906070      19224630
## 2 Mountain           Fat Bike              1052620        0       1052620
## 3 Mountain           Over Mountain               0  7571270       7571270
## 4 Mountain           Sport                 1932755        0       1932755
## 5 Mountain           Trail                 4537610  4835850       9373460
## 6 Road               Cyclocross                  0  2108120       2108120
## 7 Road               Elite Road            5637795  9696870      15334665
## 8 Road               Endurance Road        1612450  8768610      10381060
## 9 Road               Triathalon                  0  4053750       4053750
## # ℹ 1 more variable: Contains_Aluminum_and_Carbon <lgl>

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)

##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

hw_5

Baasanjav

2023-10-30

R Markdown

Including Plots