library(ggplot2)
library(dplyr)
library(plotly)
#library(shiny)
library(tidyr)
df <- read.csv('https://raw.githubusercontent.com/SubhalaxmiRout002/DATA-608/main/Final%20Project/Supermarket%20Sales%20-%20Stock.csv')
df <- data.frame(df)
df <- df %>% rename(Tax = Tax.5.)
df$Date <- as.Date(df$Date, format = "%m/%d/%Y")
df$Month <- months(as.Date(df$Date))
df$Time <- as.factor(df$Time)
names(df)
## [1] "Invoice.ID" "Branch"
## [3] "City" "Customer.type"
## [5] "Gender" "Product.line"
## [7] "Unit.price" "Quantity"
## [9] "Tax" "Total"
## [11] "Date" "Time"
## [13] "Payment" "COGS"
## [15] "Gross.Margin.Precentage" "Gross.Income"
## [17] "Rating" "Month"
table(df$Product.line)
##
## Electronic accessories Fashion accessories Food and beverages
## 170 178 174
## Health and beauty Home and lifestyle Sports and travel
## 152 160 166
df2 <- df %>% select(Month, Branch, Product.line,Gross.Income) %>% group_by(Month, Branch, Product.line) %>% summarise(across(everything(), sum)) %>% filter(Product.line == 'Electronic accessories')
df2
## # A tibble: 9 x 4
## # Groups: Month, Branch [9]
## Month Branch Product.line Gross.Income
## <chr> <chr> <chr> <dbl>
## 1 February A Electronic accessories 248.
## 2 February B Electronic accessories 318.
## 3 February C Electronic accessories 261.
## 4 January A Electronic accessories 305.
## 5 January B Electronic accessories 319.
## 6 January C Electronic accessories 273.
## 7 March A Electronic accessories 320.
## 8 March B Electronic accessories 175.
## 9 March C Electronic accessories 370.
#ggplot(df2, aes(x = Month, y = Gross.Income)) + geom_bar(aes(fill = factor(Branch)))
ggplot(data=df2, aes(x=Month, y=Gross.Income, group=Branch)) +
geom_line(aes(color=Branch))+
geom_point(aes(color=Branch)) +
labs(title="Gross Profit Trend",x="Month", y = "Gross Profit") +
theme(panel.background = element_rect(fill = "white", color = NA),
plot.title = element_text(hjust = 0.5, size = 20),
axis.title.y = element_text(size = 20),
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x=element_blank()
)

# unit Net sales
df3 <- df %>% select(Month,City,Product.line, Total) %>% group_by(Month,City,Product.line) %>% summarise(across(everything(), sum)) %>%
filter(Product.line == 'Electronic accessories')
ggplot(data=df3, aes(x=Month, y=Total, group=City)) +
geom_line(aes(color=City))+
geom_point(aes(color=City)) +
scale_colour_manual(values=c(Mandalay="#F4A460",Naypyitaw="#A0522D",Yangon="#A9A9A9"))+
labs(title="Net Sales Trend",x="Month", y = "Net Sales") +
theme(panel.background = element_rect(fill = "white", color = NA),
plot.title = element_text(hjust = 0.5, size = 15),
axis.title.y = element_text(size = 10),
axis.text.x = element_blank(),
axis.ticks.x=element_blank()
)

# Unit Sold
df4 <- df %>% select(Month,City,Product.line, Quantity) %>% group_by(Month,City,Product.line) %>% summarise(across(everything(), sum)) %>%
filter(Month == 'January')
ggplot(data=df4, aes(x=Product.line, y=Quantity, fill = City)) +
geom_bar(stat = "identity",
position = position_dodge())+
coord_flip() +
scale_fill_manual(values=c(Mandalay="#F4A460",Naypyitaw="#A0522D",Yangon="#A9A9A9"))+
labs(title="Units Sold",x="Product Types") +
geom_text(aes(label = Quantity),
position = position_dodge(width = 1), size = 3, hjust = -0.10) +
theme(panel.background = element_rect(fill = "white", color = NA),
plot.title = element_text(hjust = 0.5, size = 15),
axis.title.y = element_text(size = 10),
axis.title.x=element_blank(),
axis.text.x = element_blank(),
axis.ticks.x=element_blank()
)

library(scales)
# Gender
df5 <- df %>% select(Month,City,Product.line, Gender) %>% group_by(Month,City,Product.line, Gender) %>%
summarise(gender_n=n()) %>%
group_by(Month,City,Product.line) %>%
mutate(Percentage=round(gender_n/sum(gender_n)*100, 2)) %>%
filter(Product.line == 'Fashion accessories' & Month == 'January', City == 'Mandalay')
ggplot(df5, aes(x="", y=Percentage, fill=Gender))+
geom_bar(width = 1,stat = "identity") +
coord_polar("y", start=0) +
scale_fill_manual(values=c("#E69F00", "#999999")) +
labs(title="Gender") +
geom_text(aes(y = c(0, cumsum(Percentage)[-length(Percentage)]),
label = percent(Percentage/100)), size=4, position = position_stack(vjust = 0.5)) +
theme_minimal() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, size = 15),
axis.text.x=element_blank()
)

# Payment Type
df6 <- df %>% select(Month,City,Product.line, Payment) %>% group_by(Month,City,Product.line, Payment) %>%
summarise(payment_n=n()) %>%
group_by(Month,City,Product.line) %>%
mutate(Percentage=round(payment_n/sum(payment_n)*100, 2)) %>%
filter(Product.line == 'Fashion accessories' & Month == 'January', City == 'Mandalay')
ggplot(df6, aes(x="", y=Percentage, fill=Payment))+
geom_bar(width = 1,stat = "identity") +
coord_polar("y", start=0) +
scale_fill_manual(values=c("#E69F00", "#999999", "#A0522D")) +
labs(title="Payment Type") +
geom_text(aes(y = Percentage,
label = percent(Percentage/100)), size=4, position = position_stack(vjust = 0.5)) +
theme_minimal() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, size = 15),
axis.text.x=element_blank()
)

# Customer Type
df7 <- df %>% select(Month,City,Product.line, Customer.type) %>% group_by(Month,City,Product.line, Customer.type) %>%
summarise(cust_n=n()) %>%
group_by(Month,City,Product.line) %>%
mutate(Percentage=round(cust_n/sum(cust_n)*100, 2)) %>%
filter(Product.line == 'Fashion accessories' & Month == 'March', City == 'Mandalay')
ggplot(df7, aes(x="", y=Percentage, fill=Customer.type))+
geom_bar(width = 1,stat = "identity") +
coord_polar("y", start=0) +
scale_fill_manual(values=c("#E69F00", "#999999")) +
labs(title="Customer Type") +
geom_text(aes(y = Percentage,
label = percent(Percentage/100)), size=4, position = position_stack(vjust = 0.5)) +
theme_minimal() +
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, size = 15),
axis.text.x=element_blank()
)

# animation
library(plotly)
library(lazyeval)
library(dplyr)
accumulate_by <- function(dat, var) {
var <- f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
bind_rows(dats)
}
df8 <- df %>% select(Month,City,Product.line, Gender, Date, Total) %>%
group_by(Month,City,Product.line, Gender, Date) %>% summarise(across(everything(), sum)) %>%
filter(Product.line == 'Electronic accessories', Month == 'January', City == 'Mandalay')
#df8$Date <- as.Date(df8$Date, format= "%Y-%m-%d")
#df8$Date <- as.numeric(df8$Date)
#df8$day <- as.numeric(format(df8$Date, "%d"))
df8 <- df8 %>% accumulate_by(~Date)
df8
## # A tibble: 85 x 7
## # Groups: Month, City, Product.line, Gender [2]
## Month City Product.line Gender Date Total frame
## <chr> <chr> <chr> <chr> <date> <dbl> <date>
## 1 January Mandalay Electronic accessories Female 2019-01-01 471. 2019-01-01
## 2 January Mandalay Electronic accessories Female 2019-01-01 471. 2019-01-02
## 3 January Mandalay Electronic accessories Male 2019-01-02 139. 2019-01-02
## 4 January Mandalay Electronic accessories Female 2019-01-01 471. 2019-01-04
## 5 January Mandalay Electronic accessories Male 2019-01-02 139. 2019-01-04
## 6 January Mandalay Electronic accessories Male 2019-01-04 75.8 2019-01-04
## 7 January Mandalay Electronic accessories Female 2019-01-01 471. 2019-01-10
## 8 January Mandalay Electronic accessories Female 2019-01-10 57.9 2019-01-10
## 9 January Mandalay Electronic accessories Male 2019-01-02 139. 2019-01-10
## 10 January Mandalay Electronic accessories Male 2019-01-04 75.8 2019-01-10
## # … with 75 more rows
fig <- df8 %>%
plot_ly(
x = ~Date,
y = ~Total,
split = ~Gender,
frame = ~frame,
type = 'scatter',
mode = 'lines',
line = list(simplyfy = F)
)
fig <- fig %>% layout(
title = "Daily Net Sales",
xaxis = list(
type = "date",
title = "Date",
zeroline = F
),
yaxis = list(
title = "Net Sales",
zeroline = F
)
)
fig <- fig %>% animation_opts(
frame = 100,
transition = 0,
redraw = FALSE
)
fig <- fig %>% animation_slider(
currentvalue = list(prefix = "Daily", font = list(color="red"))
)
fig <- fig %>% animation_button(
x = 1, xanchor = "left", y = 0, yanchor = "bottom"
)
fig
# plot_ly(data = df8,
# x = ~Date,
# y = ~Total,
# split = ~Gender,
# frame = ~frame,
# type = "scatter",
# mode = "lines",
# line = list(simplyfy = T)
# ) %>%
# layout(
# title = "Daily Sales",
# xaxis = list(
# zeroline = F,
# range= ~Date,
# title="Date"
# ),
# yaxis = list(title = 'Net Sales', zeroline = F) #
# ) %>%
# animation_opts(
# frame = 100, transition = 0, redraw = F
# ) %>%
# animation_button(
# x = 1, xanchor = "right", y = 0, yanchor = "bottom"
# ) %>%
# animation_slider(
# currentvalue = list(prefix = "Daily", font = list(color="red"))
# )
# if (interactive()) {
# p <- ggplot(df8, aes(Date, Total)) +
# geom_line(aes(group = Date), alpha = 0.3) +
# geom_smooth() +
# geom_line(aes(frame = Date, ids = day), color = "red") +
# facet_wrap(~ Gender)
#
# ggplotly(p, width = 600, height = 400) %>%
# animation_opts(1000)
# }
library(plotly)
library(quantmod)
getSymbols("AAPL",src='yahoo')
## [1] "AAPL"
## [1] "AAPL"
df_app <- data.frame(Date=index(AAPL),coredata(AAPL))
df_app <- tail(df_app, 30)
df_app$ID <- seq.int(nrow(df_app))
accumulate_by <- function(dat, var) {
var <- lazyeval::f_eval(var, dat)
lvls <- plotly:::getLevels(var)
dats <- lapply(seq_along(lvls), function(x) {
cbind(dat[var %in% lvls[seq(1, x)], ], frame = lvls[[x]])
})
dplyr::bind_rows(dats)
}
df_app <- df_app %>% accumulate_by(~ID)
#df_app
df_tax <- txhousing
#df_tax$date <- as.Date(df_tax$date, format = '%Y-%m-%d')
#df_tax$date <- as.numeric(df_tax$date)
df_tax
## # A tibble: 8,602 x 9
## city year month sales volume median listings inventory date
## <chr> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Abilene 2000 1 72 5380000 71400 701 6.3 2000
## 2 Abilene 2000 2 98 6505000 58700 746 6.6 2000.
## 3 Abilene 2000 3 130 9285000 58100 784 6.8 2000.
## 4 Abilene 2000 4 98 9730000 68600 785 6.9 2000.
## 5 Abilene 2000 5 141 10590000 67300 794 6.8 2000.
## 6 Abilene 2000 6 156 13910000 66900 780 6.6 2000.
## 7 Abilene 2000 7 152 12635000 73500 742 6.2 2000.
## 8 Abilene 2000 8 131 10710000 75000 765 6.4 2001.
## 9 Abilene 2000 9 104 7615000 64500 771 6.5 2001.
## 10 Abilene 2000 10 101 7040000 59300 764 6.6 2001.
## # … with 8,592 more rows