Warning: package 'tidyverse' was built under R version 4.5.3
Warning: package 'ggplot2' was built under R version 4.5.3
Warning: package 'tibble' was built under R version 4.5.3
Warning: package 'tidyr' was built under R version 4.5.3
Warning: package 'readr' was built under R version 4.5.3
Warning: package 'purrr' was built under R version 4.5.3
Warning: package 'dplyr' was built under R version 4.5.3
Warning: package 'stringr' was built under R version 4.5.3
Warning: package 'forcats' was built under R version 4.5.3
Warning: package 'lubridate' was built under R version 4.5.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.2.0 ✔ readr 2.2.0
✔ forcats 1.0.1 ✔ stringr 1.6.0
✔ ggplot2 4.0.2 ✔ tibble 3.3.1
✔ lubridate 1.9.5 ✔ tidyr 1.3.2
✔ purrr 1.2.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)library(dplyr)library(readxl)
Warning: package 'readxl' was built under R version 4.5.3
# Load dataset (replace with correct path)superstore <-read_excel("superstore (1).xlsx")
Question 1
# Filter countries with >200 records# Keep rows with shipping cost above country average# Create profit/shipping ratio and rank top 15superstore %>%group_by(Country) %>%filter(n() >200) %>%mutate(avg_ship =mean(Shipping.Cost, na.rm=TRUE)) %>%filter(Shipping.Cost > avg_ship) %>%mutate(profit_per_ship = Profit / Shipping.Cost) %>%arrange(desc(profit_per_ship)) %>%select(Customer.Name, Product.Name, Country, Sales, Profit, Shipping.Cost, profit_per_ship) %>%head(15)
# A tibble: 15 × 7
# Groups: Country [9]
Customer.Name Product.Name Country Sales Profit Shipping.Cost profit_per_ship
<chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 Tom Ashbrook Canon image… United… 11200 3920. 46.0 85.3
2 Pauline John… Barricks Tr… Ukraine 4487 1436. 50.4 28.5
3 Christopher … Canon PC106… United… 2800 945. 35.0 27.0
4 Tamara Chand Canon image… United… 17500 8400. 349. 24.1
5 Justin Hirsh KitchenAid … Iraq 3172 1237. 53.6 23.1
6 Maribeth Sch… Samsung Sma… China 2545 1069. 52.3 20.4
7 Michael Moore Cisco Smart… Germany 2617 1151. 57.0 20.2
8 Kelly Collis… Logitech P7… United… 3347 636. 32.2 19.7
9 Ben Peterman Dania Class… Austra… 2596 923. 47.8 19.3
10 Brad Thomas HP Fax Mach… Saudi … 1800 720. 38.5 18.7
11 Shirley Dani… Fellowes PB… United… 3813 1906. 103. 18.6
12 Katherine No… HP Fax and … United… 1553 776. 42.6 18.2
13 Robert Marley Office Star… Colomb… 930 456. 26.3 17.3
14 Adam Bellava… GBC DocuBin… United… 4355 1415. 82.7 17.1
15 Keith Herrera Sharp AL-15… United… 1200 435. 26.0 16.7
Question 2
# Market-year average profit per order (exclude zero/negative sales)# Keep groups with >=50 orders# Identify markets never below dataset median profit# Show top 10 by average salesmedian_profit <-median(superstore$Profit, na.rm=TRUE)superstore %>%filter(Sales >0) %>%group_by(Market, Year) %>%filter(n() >=50) %>%summarise(avg_profit =mean(Profit), avg_sales =mean(Sales), .groups="drop") %>%group_by(Market) %>%filter(all(avg_profit >= median_profit)) %>%arrange(desc(avg_sales)) %>%head(10)
# A tibble: 10 × 4
# Groups: Market [3]
Market Year avg_profit avg_sales
<chr> <dbl> <dbl> <dbl>
1 APAC 2013 42.6 337.
2 APAC 2011 42.8 329.
3 APAC 2012 37.6 321.
4 APAC 2014 37.0 319.
5 EU 2013 38.4 297.
6 EU 2014 36.5 295.
7 EU 2012 37.2 291.
8 EU 2011 37.2 289.
9 US 2011 24.9 243.
10 US 2013 31.7 236.
Question 3
# Typical discount per sub-category# Flag suspiciously high discounts# Label as loss-making vs non-loss-making# Count by market & segment, keep loss share >40%superstore %>%group_by(Sub.Category) %>%mutate(median_disc =median(Discount, na.rm=TRUE)) %>%filter(Discount > median_disc) %>%mutate(label =ifelse(Profit <0, "loss-making", "non-loss-making")) %>%group_by(Market, Segment, label) %>%summarise(count =n(), .groups="drop") %>%group_by(Market, Segment) %>%mutate(loss_share = count[label=="loss-making"] /sum(count)) %>%filter(loss_share >0.4) %>%arrange(desc(loss_share))
# A tibble: 30 × 5
# Groups: Market, Segment [15]
Market Segment label count loss_share
<chr> <chr> <chr> <int> <dbl>
1 EMEA Consumer loss-making 810 0.999
2 EMEA Consumer non-loss-making 1 0.999
3 EMEA Corporate loss-making 503 0.998
4 EMEA Corporate non-loss-making 1 0.998
5 EMEA Home Office loss-making 293 0.997
6 EMEA Home Office non-loss-making 1 0.997
7 Africa Home Office loss-making 191 0.979
8 Africa Home Office non-loss-making 4 0.979
9 Africa Corporate loss-making 307 0.975
10 Africa Corporate non-loss-making 8 0.975
# ℹ 20 more rows
Question 4
# Running total of profit, skip quantity <2# Stop when >25,000# Compare original order vs sorted by salesrunning_total <-0stop_row <-NAfor(i in1:nrow(superstore)){if(superstore$Quantity[i] >=2){ running_total <- running_total + superstore$Profit[i]if(running_total >25000){ stop_row <- ibreak } }}list(row=stop_row,order_id=superstore$Order.ID[stop_row],customer=superstore$Customer.Name[stop_row],total=running_total)