data <- read_excel("C:\\Users\\sjtha\\OneDrive\\Desktop\\shopify\\Data.xlsx")
str(data)
## tibble [5,000 x 7] (S3: tbl_df/tbl/data.frame)
## $ order_id : num [1:5000] 16 61 521 1105 1363 ...
## $ shop_id : num [1:5000] 42 42 42 42 42 42 42 42 42 42 ...
## $ user_id : num [1:5000] 607 607 607 607 607 607 607 607 607 607 ...
## $ order_amount : num [1:5000] 704000 704000 704000 704000 704000 704000 704000 704000 704000 704000 ...
## $ total_items : num [1:5000] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## $ payment_method: chr [1:5000] "credit_card" "credit_card" "credit_card" "credit_card" ...
## $ created_at : POSIXct[1:5000], format: "2017-03-07 04:00:00" "2017-03-04 04:00:00" ...
data$order_id <- as.factor(data$order_id)
data$shop_id <- as.factor(data$shop_id)
data$user_id <- as.factor(data$user_id)
data$payment_method <- as.factor(data$payment_method)
str(data)
## tibble [5,000 x 7] (S3: tbl_df/tbl/data.frame)
## $ order_id : Factor w/ 5000 levels "1","2","3","4",..: 16 61 521 1105 1363 1437 1563 1603 2154 2298 ...
## $ shop_id : Factor w/ 100 levels "1","2","3","4",..: 42 42 42 42 42 42 42 42 42 42 ...
## $ user_id : Factor w/ 301 levels "607","700","701",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ order_amount : num [1:5000] 704000 704000 704000 704000 704000 704000 704000 704000 704000 704000 ...
## $ total_items : num [1:5000] 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## $ payment_method: Factor w/ 3 levels "cash","credit_card",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ created_at : POSIXct[1:5000], format: "2017-03-07 04:00:00" "2017-03-04 04:00:00" ...
total_revenue <- sum(data$order_amount)
AOV <- total_revenue / 5000
AOV
## [1] 3145.128
trial <- data %>% group_by(total_items) %>% summarise(n=n())
trial
## # A tibble: 8 x 2
## total_items n
## <dbl> <int>
## 1 1 1830
## 2 2 1832
## 3 3 941
## 4 4 293
## 5 5 77
## 6 6 9
## 7 8 1
## 8 2000 17
trial$total_items <- as.factor(trial$total_items)
ggplot(data = trial,
mapping =
aes(x = total_items,
y = n , fill = total_items
)) +
geom_bar(stat = "identity")
medi <- median(data$order_amount)
medi
## [1] 284
First Question Solution Screenshot
Second Question Solution Screenshot
Third Question Solution Screenshot