#Loading Library
library(tidyverse)
library(janitor)
library(skimr)
library(lubridate)
library(readxl)
library(treemapify)
#Importing and reading the data frame
superstore <- read_excel("Superstore.xls",
col_types = c("numeric", "text", "date",
"date", "text", "text", "text", "text",
"text", "text", "text", "numeric",
"text", "text", "text", "text", "text",
"numeric", "numeric", "numeric",
"numeric"))
#First six row of my data
head(superstore)
## # A tibble: 6 × 21
## `Row ID` `Order ID` `Order Date` `Ship Date` Ship …¹ Custo…²
## <dbl> <chr> <dttm> <dttm> <chr> <chr>
## 1 1 CA-2016-1521… 2016-11-08 00:00:00 2016-11-11 00:00:00 Second… CG-125…
## 2 2 CA-2016-1521… 2016-11-08 00:00:00 2016-11-11 00:00:00 Second… CG-125…
## 3 3 CA-2016-1386… 2016-06-12 00:00:00 2016-06-16 00:00:00 Second… DV-130…
## 4 4 US-2015-1089… 2015-10-11 00:00:00 2015-10-18 00:00:00 Standa… SO-203…
## 5 5 US-2015-1089… 2015-10-11 00:00:00 2015-10-18 00:00:00 Standa… SO-203…
## 6 6 CA-2014-1158… 2014-06-09 00:00:00 2014-06-14 00:00:00 Standa… BH-117…
## # … with 15 more variables: `Customer Name` <chr>, Segment <chr>,
## # Country <chr>, City <chr>, State <chr>, `Postal Code` <dbl>, Region <chr>,
## # `Product ID` <chr>, Category <chr>, `Sub-Category` <chr>,
## # `Product Name` <chr>, Sales <dbl>, Quantity <dbl>, Discount <dbl>,
## # Profit <dbl>, and abbreviated variable names ¹`Ship Mode`, ²`Customer ID`
#The structure of the data
glimpse(superstore)
## Rows: 9,994
## Columns: 21
## $ `Row ID` <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,…
## $ `Order ID` <chr> "CA-2016-152156", "CA-2016-152156", "CA-2016-138688", …
## $ `Order Date` <dttm> 2016-11-08, 2016-11-08, 2016-06-12, 2015-10-11, 2015-…
## $ `Ship Date` <dttm> 2016-11-11, 2016-11-11, 2016-06-16, 2015-10-18, 2015-…
## $ `Ship Mode` <chr> "Second Class", "Second Class", "Second Class", "Stand…
## $ `Customer ID` <chr> "CG-12520", "CG-12520", "DV-13045", "SO-20335", "SO-20…
## $ `Customer Name` <chr> "Claire Gute", "Claire Gute", "Darrin Van Huff", "Sean…
## $ Segment <chr> "Consumer", "Consumer", "Corporate", "Consumer", "Cons…
## $ Country <chr> "United States", "United States", "United States", "Un…
## $ City <chr> "Henderson", "Henderson", "Los Angeles", "Fort Lauderd…
## $ State <chr> "Kentucky", "Kentucky", "California", "Florida", "Flor…
## $ `Postal Code` <dbl> 42420, 42420, 90036, 33311, 33311, 90032, 90032, 90032…
## $ Region <chr> "South", "South", "West", "South", "South", "West", "W…
## $ `Product ID` <chr> "FUR-BO-10001798", "FUR-CH-10000454", "OFF-LA-10000240…
## $ Category <chr> "Furniture", "Furniture", "Office Supplies", "Furnitur…
## $ `Sub-Category` <chr> "Bookcases", "Chairs", "Labels", "Tables", "Storage", …
## $ `Product Name` <chr> "Bush Somerset Collection Bookcase", "Hon Deluxe Fabri…
## $ Sales <dbl> 261.9600, 731.9400, 14.6200, 957.5775, 22.3680, 48.860…
## $ Quantity <dbl> 2, 3, 2, 5, 2, 7, 4, 6, 3, 5, 9, 4, 3, 3, 5, 3, 6, 2, …
## $ Discount <dbl> 0.00, 0.00, 0.00, 0.45, 0.20, 0.00, 0.00, 0.20, 0.20, …
## $ Profit <dbl> 41.9136, 219.5820, 6.8714, -383.0310, 2.5164, 14.1694,…
#Cleaning up the variable names
superstore <- clean_names(superstore)
#Checking my variables or column name of the data frame after cleaning up
colnames(superstore)
## [1] "row_id" "order_id" "order_date" "ship_date"
## [5] "ship_mode" "customer_id" "customer_name" "segment"
## [9] "country" "city" "state" "postal_code"
## [13] "region" "product_id" "category" "sub_category"
## [17] "product_name" "sales" "quantity" "discount"
## [21] "profit"
#Checking out the dimension of the data frame
dim(superstore)
## [1] 9994 21
#Checking for missing value in the data frame
sum(is.na(superstore))
## [1] 0
#checking out the data type of the variables
sapply(superstore, class)
## $row_id
## [1] "numeric"
##
## $order_id
## [1] "character"
##
## $order_date
## [1] "POSIXct" "POSIXt"
##
## $ship_date
## [1] "POSIXct" "POSIXt"
##
## $ship_mode
## [1] "character"
##
## $customer_id
## [1] "character"
##
## $customer_name
## [1] "character"
##
## $segment
## [1] "character"
##
## $country
## [1] "character"
##
## $city
## [1] "character"
##
## $state
## [1] "character"
##
## $postal_code
## [1] "numeric"
##
## $region
## [1] "character"
##
## $product_id
## [1] "character"
##
## $category
## [1] "character"
##
## $sub_category
## [1] "character"
##
## $product_name
## [1] "character"
##
## $sales
## [1] "numeric"
##
## $quantity
## [1] "numeric"
##
## $discount
## [1] "numeric"
##
## $profit
## [1] "numeric"
#Getting the summary of the variables
summary(superstore)
## row_id order_id order_date
## Min. : 1 Length:9994 Min. :2014-01-03 00:00:00.00
## 1st Qu.:2499 Class :character 1st Qu.:2015-05-23 00:00:00.00
## Median :4998 Mode :character Median :2016-06-26 00:00:00.00
## Mean :4998 Mean :2016-04-30 00:07:12.25
## 3rd Qu.:7496 3rd Qu.:2017-05-14 00:00:00.00
## Max. :9994 Max. :2017-12-30 00:00:00.00
## ship_date ship_mode customer_id
## Min. :2014-01-07 00:00:00.00 Length:9994 Length:9994
## 1st Qu.:2015-05-27 00:00:00.00 Class :character Class :character
## Median :2016-06-29 00:00:00.00 Mode :character Mode :character
## Mean :2016-05-03 23:06:58.56
## 3rd Qu.:2017-05-18 00:00:00.00
## Max. :2018-01-05 00:00:00.00
## customer_name segment country city
## Length:9994 Length:9994 Length:9994 Length:9994
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## state postal_code region product_id
## Length:9994 Min. : 1040 Length:9994 Length:9994
## Class :character 1st Qu.:23223 Class :character Class :character
## Mode :character Median :56431 Mode :character Mode :character
## Mean :55190
## 3rd Qu.:90008
## Max. :99301
## category sub_category product_name sales
## Length:9994 Length:9994 Length:9994 Min. : 0.444
## Class :character Class :character Class :character 1st Qu.: 17.280
## Mode :character Mode :character Mode :character Median : 54.490
## Mean : 229.858
## 3rd Qu.: 209.940
## Max. :22638.480
## quantity discount profit
## Min. : 1.00 Min. :0.0000 Min. :-6599.978
## 1st Qu.: 2.00 1st Qu.:0.0000 1st Qu.: 1.729
## Median : 3.00 Median :0.2000 Median : 8.666
## Mean : 3.79 Mean :0.1562 Mean : 28.657
## 3rd Qu.: 5.00 3rd Qu.:0.2000 3rd Qu.: 29.364
## Max. :14.00 Max. :0.8000 Max. : 8399.976
#### We would not need the row_id and the country variable in our analysis. Now, we would go ahead to remove row_id and the country Columns(Variable) since the data frame is for United State
#Dropping row_id and country column(variables)
superstore <- within(superstore, rm(row_id, country))
#confirming the removal of row_id and country variable
superstore %>%
head(5)
## # A tibble: 5 × 19
## order_id order_date ship_date ship_…¹ custo…² custo…³
## <chr> <dttm> <dttm> <chr> <chr> <chr>
## 1 CA-2016-152156 2016-11-08 00:00:00 2016-11-11 00:00:00 Second… CG-125… Claire…
## 2 CA-2016-152156 2016-11-08 00:00:00 2016-11-11 00:00:00 Second… CG-125… Claire…
## 3 CA-2016-138688 2016-06-12 00:00:00 2016-06-16 00:00:00 Second… DV-130… Darrin…
## 4 US-2015-108966 2015-10-11 00:00:00 2015-10-18 00:00:00 Standa… SO-203… Sean O…
## 5 US-2015-108966 2015-10-11 00:00:00 2015-10-18 00:00:00 Standa… SO-203… Sean O…
## # … with 13 more variables: segment <chr>, city <chr>, state <chr>,
## # postal_code <dbl>, region <chr>, product_id <chr>, category <chr>,
## # sub_category <chr>, product_name <chr>, sales <dbl>, quantity <dbl>,
## # discount <dbl>, profit <dbl>, and abbreviated variable names ¹ship_mode,
## # ²customer_id, ³customer_name
#Let us look at the product category available
unique(superstore$category)
## [1] "Furniture" "Office Supplies" "Technology"
#Number of products in each category
superstore %>%
group_by(category) %>%
summarise(total=n()) %>%
arrange(desc(total))
## # A tibble: 3 × 2
## category total
## <chr> <int>
## 1 Office Supplies 6026
## 2 Furniture 2121
## 3 Technology 1847
#sub_category products are divided into
n_distinct(superstore$sub_category)
## [1] 17
#number of products in each sub-category
superstore %>%
group_by(sub_category) %>%
summarise(total=n()) %>%
arrange(desc(total))
## # A tibble: 17 × 2
## sub_category total
## <chr> <int>
## 1 Binders 1523
## 2 Paper 1370
## 3 Furnishings 957
## 4 Phones 889
## 5 Storage 846
## 6 Art 796
## 7 Accessories 775
## 8 Chairs 617
## 9 Appliances 466
## 10 Labels 364
## 11 Tables 319
## 12 Envelopes 254
## 13 Bookcases 228
## 14 Fasteners 217
## 15 Supplies 190
## 16 Machines 115
## 17 Copiers 68
#The distribution of subcategory
ggplot(superstore, aes(x= sub_category, y = category, fill=category)) +
geom_col(position = position_dodge(),show.legend = FALSE)+ coord_flip()+
theme_classic() + labs(title = "Category by Sub_Category",
subtitle ="Distribution of Subcategory")+
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))
#Count by Category and Subcategory
ggplot(superstore, aes(x= sub_category, fill = category)) +
geom_bar() + theme_bw() + coord_flip() +
labs(title = "Count by Category and Sub_Category",
x = "Sub_Category", y = "Frequency")+
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))
#Total Profit and Sale by Sub_Category
superstore %>%
group_by(sub_category) %>%
summarise( total_sales = sum(sales),total_profit =sum(profit)) %>%
pivot_longer(c("total_sales", "total_profit")) %>%
ggplot(aes(x=sub_category, y = value, fill = name)) +
geom_col(position = position_dodge()) + theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
theme(legend.title = element_blank()) +
scale_y_continuous(labels = scales::comma)+
labs(title = "Total Profit and Sales by Sub_Category") +
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))
#The total number of products available in the store is
n_distinct(superstore$product_name)
## [1] 1850
#Distribution of Top 10 Product
superstore %>%
group_by(product_name) %>%
summarize(count_of_products=n()) %>%
arrange(desc(count_of_products)) %>%
slice_head(n=10) %>%
ggplot(superstore,
mapping = aes(x= product_name,
y= count_of_products))+
geom_segment(aes(x = reorder(product_name, count_of_products), xend = product_name,
y = 0, yend = count_of_products), size =1,)+
geom_point(size=10, color= "orange",
shape = 20, stroke = 2) + coord_flip() + theme_bw() +
labs(title = "Distribution of Top 10 Product",
x = NULL) +
theme(plot.title = element_text(size=15, face="bold", hjust = 0.5)) +
geom_text(aes(label= count_of_products),
color="black", size=4, vjust=0.5, fontface='bold')
#Count of Sub-Category by region
superstore %>%
ggplot(aes(x=sub_category, fill= region)) +
geom_bar(position = position_dodge(width = 0.7))+
theme_minimal()+ theme(axis.text.x = element_text(angle = 90))+
scale_y_continuous(expand = c(0,0)) +
labs(title = "Count of Sub-Category Product Sales by Region")+
theme(plot.title = element_text(size=15, face="bold",hjust = 0.5))
#Calculating Cost
superstore <- superstore %>%
mutate(cost = sales - profit)
#showing the first five rows of the new data frame
superstore %>%
head(5)
## # A tibble: 5 × 20
## order_id order_date ship_date ship_…¹ custo…² custo…³
## <chr> <dttm> <dttm> <chr> <chr> <chr>
## 1 CA-2016-152156 2016-11-08 00:00:00 2016-11-11 00:00:00 Second… CG-125… Claire…
## 2 CA-2016-152156 2016-11-08 00:00:00 2016-11-11 00:00:00 Second… CG-125… Claire…
## 3 CA-2016-138688 2016-06-12 00:00:00 2016-06-16 00:00:00 Second… DV-130… Darrin…
## 4 US-2015-108966 2015-10-11 00:00:00 2015-10-18 00:00:00 Standa… SO-203… Sean O…
## 5 US-2015-108966 2015-10-11 00:00:00 2015-10-18 00:00:00 Standa… SO-203… Sean O…
## # … with 14 more variables: segment <chr>, city <chr>, state <chr>,
## # postal_code <dbl>, region <chr>, product_id <chr>, category <chr>,
## # sub_category <chr>, product_name <chr>, sales <dbl>, quantity <dbl>,
## # discount <dbl>, profit <dbl>, cost <dbl>, and abbreviated variable names
## # ¹ship_mode, ²customer_id, ³customer_name
#Top 10 product by Cost
superstore %>%
select(product_name, cost) %>%
slice(1:10) %>%
arrange(desc(cost))
## # A tibble: 10 × 2
## product_name cost
## <chr> <dbl>
## 1 Bretford CR4500 Series Slim Rectangular Table 1341.
## 2 Mitel 5320 IP Phone VoIP phone 816.
## 3 Hon Deluxe Fabric Upholstered Stacking Chairs, Rounded Back 512.
## 4 Bush Somerset Collection Bookcase 220.
## 5 Belkin F5C206VTEL 6 Outlet Surge 80.4
## 6 Eldon Expressions Wood and Plastic Desk Accessories, Cherry Wood 34.7
## 7 Eldon Fold 'N Roll Cart System 19.9
## 8 DXL Angle-View Binders with Locking Rings by Samsill 12.7
## 9 Self-Adhesive Address Labels for Typewriters by Universal 7.75
## 10 Newell 322 5.31
#Also calculating percentage profit
superstore <- superstore %>%
mutate(profit_percent = (profit/cost *100))
#Category and Products with 100% profits
superstore %>%
select(category, product_name, profit_percent) %>%
filter(profit_percent==100) %>%
arrange(category)
## # A tibble: 140 × 3
## category product_name profi…¹
## <chr> <chr> <dbl>
## 1 Furniture "GE 48\" Fluorescent Tube, Cool White Energy Saver, … 100
## 2 Furniture "GE 48\" Fluorescent Tube, Cool White Energy Saver, … 100
## 3 Furniture "GE 48\" Fluorescent Tube, Cool White Energy Saver, … 100
## 4 Furniture "GE 48\" Fluorescent Tube, Cool White Energy Saver, … 100
## 5 Furniture "GE 48\" Fluorescent Tube, Cool White Energy Saver, … 100
## 6 Office Supplies "Prang Dustless Chalk Sticks" 100
## 7 Office Supplies "Adams Telephone Message Book w/Frequently-Called Nu… 100
## 8 Office Supplies "OIC Binder Clips" 100
## 9 Office Supplies "Southworth Structures Collection" 100
## 10 Office Supplies "Alphabetical Labels for Top Tab Filing" 100
## # … with 130 more rows, and abbreviated variable name ¹profit_percent
#Products by Category with 100 percent Profit
superstore %>%
select(category, product_name, profit_percent) %>%
filter(profit_percent==100) %>%
arrange(category) %>%
ggplot(aes(x= category, fill = "Red")) +
geom_bar(show.legend = FALSE)+ theme_bw() +
labs(title = "Count of Category with 100% profit")+
theme(plot.title = element_text(size=20, face="bold",hjust = 0.5))
#Total Customer Count
n_distinct(superstore$customer_name)
## [1] 793
#Top Ten Customer who ordered frequently from the store
superstore %>%
group_by(customer_name) %>%
summarise(top_10_customer=n()) %>%
arrange(desc(top_10_customer)) %>%
slice_head(n=10)
## # A tibble: 10 × 2
## customer_name top_10_customer
## <chr> <int>
## 1 William Brown 37
## 2 John Lee 34
## 3 Matt Abelman 34
## 4 Paul Prost 34
## 5 Chloris Kastensmidt 32
## 6 Edward Hooks 32
## 7 Jonathan Doherty 32
## 8 Seth Vernon 32
## 9 Arthur Prichep 31
## 10 Emily Phan 31
#The most Common ship_mode and segment used by customers
superstore %>%
group_by(ship_mode, segment) %>%
summarise(total=n()) %>%
ggplot(aes(x=ship_mode, y = total, fill= segment)) +
geom_col(position=position_dodge()) + facet_wrap(~segment)+ theme_bw()+
theme(axis.text.x = element_text(angle = 45))+
labs(title = "Distribution by Segment and Ship Mode") +
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))+
geom_text(aes(ship_mode, label = total),vjust = -0.4)
## `summarise()` has grouped output by 'ship_mode'. You can override using the
## `.groups` argument.
#Top 20 Customers by Profit and by State
superstore %>%
mutate(summarise(superstore,
profit_2 = round(profit, digits = 0))) %>%
select(customer_name, state, profit_2) %>%
arrange(desc(profit_2)) %>%
slice(1:20) %>%
ggplot(aes(x= reorder(customer_name, profit_2),
y= profit_2, fill = reorder(state, -profit_2))) +
geom_col() + coord_flip() + theme_classic() +
labs(title = "Top 20 Customers by Profit and State",
x= "Customer Name", y = "Profit (Dollars)") +
theme(legend.title = element_blank()) +
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5)) +
geom_text(aes(customer_name, label=profit_2),
size = 2.9,hjust = -0.01, fontface= "bold")
#TOP 10 CUSTOMER by Sale and by state
superstore %>%
mutate(summarise(superstore,
sales_2 = round(sales, digits = 0))) %>%
select(customer_name, state, sales_2) %>%
arrange(desc(sales_2)) %>%
slice(1:10) %>%
ggplot(aes(x=reorder(customer_name, -sales_2),
y= sales_2, fill= reorder(state, -sales_2)))+
geom_col() + theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust=1))+
labs(title = "Top 10 Customers by Sales and State",
x= "Customer Name", y = "Total Sales") +
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))+
theme(legend.title = element_blank()) +
geom_text(aes(label = sales_2),
vjust=0.1, alpha=1, size = 4, fontface= "bold")
#Sales by Region
superstore %>%
group_by(region) %>%
summarize(total_sales = round(sum(sales))) %>%
ggplot(aes(area = total_sales, fill = region,
label= paste0(region, "\n",prettyNum(total_sales, ",")))) +
geom_treemap() + geom_treemap_text(color= "white",
place="centre", fontface = "bold", size = 25) +
theme(legend.position = "none")+ labs(title = "Sales by Region") +
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))
#Profit by Region
superstore %>%
group_by(region)%>%
summarize(total_profit = round(sum(profit))) %>%
ggplot(aes(area = total_profit , fill = region,
label= paste0(region, "\n",prettyNum(total_profit, ",")))) +
geom_treemap() +
geom_treemap_text(color= "white",
place="centre", fontface = "bold", size = 25) +
theme(legend.position = "none") + labs(title = "Profit by Region") +
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))
#Calculating shipment duration
superstore <- superstore %>%
mutate(duration_to_ship =
as.Date(superstore$ship_date) - as.Date(superstore$order_date)) %>%
arrange(desc(duration_to_ship))
#Highest shipment duration and ship mode by top 20 product
superstore %>%
select(product_id, ship_mode,duration_to_ship) %>%
arrange(desc(duration_to_ship)) %>%
slice(1:20)
## # A tibble: 20 × 3
## product_id ship_mode duration_to_ship
## <chr> <chr> <drtn>
## 1 FUR-TA-10000577 Standard Class 7 days
## 2 OFF-ST-10000760 Standard Class 7 days
## 3 OFF-ST-10004186 Standard Class 7 days
## 4 TEC-AC-10001998 Standard Class 7 days
## 5 OFF-LA-10000134 Standard Class 7 days
## 6 OFF-AR-10000380 Standard Class 7 days
## 7 OFF-BI-10003981 Standard Class 7 days
## 8 OFF-BI-10003274 Standard Class 7 days
## 9 OFF-ST-10002974 Standard Class 7 days
## 10 OFF-ST-10001414 Standard Class 7 days
## 11 OFF-BI-10000343 Standard Class 7 days
## 12 OFF-PA-10002749 Standard Class 7 days
## 13 OFF-LA-10004544 Standard Class 7 days
## 14 FUR-BO-10004695 Standard Class 7 days
## 15 TEC-PH-10002844 Standard Class 7 days
## 16 TEC-MA-10000864 Standard Class 7 days
## 17 TEC-AC-10000109 Standard Class 7 days
## 18 OFF-AR-10001026 Standard Class 7 days
## 19 FUR-TA-10003748 Standard Class 7 days
## 20 OFF-ST-10002485 Standard Class 7 days
#Average shipping duration by ship mode
superstore %>%
group_by(ship_mode) %>%
summarise(avg_duration = round(mean(duration_to_ship))) %>%
ggplot(aes(x= ship_mode,
y= avg_duration, color = ship_mode ))+
geom_segment(aes(x = reorder(ship_mode, avg_duration),
xend = ship_mode, y = 0, yend = avg_duration), size =2,
show.legend = FALSE) +
geom_point( size=10, shape = 20, stroke = 2, show.legend = FALSE) + coord_flip() + theme_bw()+
labs(title = "Average shipping duration by ship mode",
x = "Ship Mode", y= "Average Duration (days)") +
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5)) +
geom_text(aes(label= avg_duration),
color="black", size=4, vjust=0.5, fontface='bold')
## Don't know how to automatically pick scale for object of type difftime. Defaulting to continuous.
#Extracting year from the order date
superstore <- superstore %>%
mutate(order_year = year(order_date))
#Profit and sales by year
superstore %>%
group_by(order_year) %>%
summarize(total_sales = round(sum(sales)),
total_profit= round(sum(profit))) %>%
pivot_longer(c("total_sales", "total_profit")) %>%
ggplot(aes(x= order_year, y= value, fill = name))+
geom_col(position = "dodge")+
scale_y_continuous(labels=scales::comma) + theme_bw()+
geom_text(aes(order_year, label = value), vjust =0.001,
size = 3, fontface= "bold",
position = position_dodge(width = 1))+
labs(title = "Profit and Sale by Year",
x= "Year", y = "Value (dollars)") +
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))+
theme(legend.title = element_blank())
#profit and sales by year for each region
superstore %>%
group_by(order_year,region) %>%
summarize(Total_Sales = round(sum(sales)),
Total_Profit= round(sum(profit))) %>%
pivot_longer(c("Total_Sales", "Total_Profit")) %>%
ggplot(aes(x= order_year, y= value, fill = name))+
geom_col(position = position_dodge(width = 1)) + facet_wrap(~region)+
theme_bw()+
geom_text(aes(order_year, label = value), vjust =0.001,
size = 2.5, fontface= "bold",
position = position_dodge(width = 1))+
labs(title = "Sales and Profit by Year for each Region",
x= "Year", y = "Value (dollars)")+
theme(plot.title = element_text(size=18, face="bold", hjust = 0.5))+
theme(legend.title = element_blank())
## `summarise()` has grouped output by 'order_year'. You can override using the
## `.groups` argument.
#Sales by Category for each year
superstore %>%
group_by(category, order_year) %>%
summarize(total_sales=sum(sales)) %>%
ggplot(aes(x=category, y= total_sales, fill = category)) +
geom_col() + facet_grid (~order_year) + theme_bw()+
scale_y_continuous(labels=scales::comma) +
theme(axis.text.x = element_blank()) +
labs(title = "Sales by Category for each Year",
x= NULL, y = "Sales (dollars)")+
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))
## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.
#Profit Trend by Category for each Year
superstore %>%
group_by(category, order_year) %>%
summarize(total_profit=sum(profit)) %>%
ggplot(aes(x= order_year, y=total_profit, color = category)) +
geom_point(size = 5, alpha=0.5) + geom_line(size=1) +
theme_bw() +
labs(title = "Profit Trend by Year for each Category",
x= "Year", y = "Profit (dollars)")+
theme(plot.title = element_text(size=20, face="bold", hjust = 0.5))
## `summarise()` has grouped output by 'category'. You can override using the
## `.groups` argument.
#Sales, Profit and Discount Relationship
superstore %>%
ggplot(aes(x=sales, y=profit, color=discount))+
geom_point(size=3) + geom_rug() + theme_bw() +
labs(title="Sales,Profit and Discount Relationship")+
theme(plot.title=element_text(size=20, face="bold", hjust=0.5))