SharkTank information was obtained from Kaggle.com for the full US Seasons of 1 through 11. This data is based on the US business reality television series in which contestants pitch their company to which they are looking for investors in the hopes of high returns based on high stakes.
From this dataset, the information of Season Numbers, Total Pitch Amounts, Industry, Pitchers Gender, and Pitchers State were utilized to create visuals to analyse the data from financial and demographical standpoints.
Financial: It is incredibly interesting to see in the earlier seasons how low the pitched requests for businesses; it wasn’t until roughly seasons 4 and 5 that the amounts started to climb. Breaking out this information by Industry provides great insights into the range of businesses that were pitched.
Demographic: The US Map portraying the coloring of total pitches by state is a great representation of where the businesses being pitched most likely will be, with California showing the most number of pitches. What is the most interesting about this is that there have been no pitches seen on Shark Tank from North Dakota, South Dakota, Wyoming, New Mexico, Maine, or West Virginia.
The visuals were created based on the following questions: 1. Which season had the highest deal amount? - Bar Chart 2. Which industry per season had the highest deal amount? - Trellis Bar Chart 3. By industry, what is the total requested amount based on number of pitches? - Dual Axis
library(data.table)
library(dplyr)
library(ggplot2)
library(scales)
library(usmap)
library(RColorBrewer)
library(ggthemes)
library(plyr)
setwd("U:/R_datafiles")
filename <- "Shark Tank US dataset.csv"
df <- fread(filename, na.strings=c(NA, ""))
df <- df %>% replace(is.na(.), 0)
df$SeasonNumber <- as.numeric(as.integer(df$SeasonNumber))
df$TotalDealAmount <- as.numeric(as.integer(df$TotalDealAmount))
df$GotDeal <- as.numeric(as.integer(df$GotDeal))
p <- ggplot(df, aes(SeasonNumber, TotalDealAmount, fill=SeasonNumber)) +
geom_bar(stat = "identity") +
labs (title = "Total Requested Deal Amounts by Season", x = "Season Number", y = "Total Requested Deal Amount", fill = "Season Number") +
theme(plot.title = element_text(hjust = 0.5), legend.position = "none") +
scale_y_continuous(
labels = dollar,
limits = c(0, 30000000)
) +
scale_x_continuous(
breaks = c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11),
labels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11")
)
p
t <- ggplot(data = df, aes(x=Industry, y=TotalDealAmount, fill=SeasonNumber)) +
geom_bar(stat = "identity") +
facet_wrap(~SeasonNumber) +
ggtitle("Industry by Total Deal Amount per Season") +
labs (x = "Industry", y = "Total Deal Amount") +
scale_y_continuous(labels = dollar) +
theme(plot.title = element_text(hjust = 0.5), legend.position = "none") +
theme(axis.text.x = element_text(angle = 90))
t
new_df <- df %>%
select(SeasonNumber, Industry, TotalDealAmount) %>%
group_by(Industry, SeasonNumber) %>%
dplyr::summarise(n = length(Industry), .groups = 'keep') %>%
data.frame()
new_df2 <- df %>%
select(SeasonNumber, Industry, TotalDealAmount) %>%
group_by(Industry, SeasonNumber)
new_df$SeasonNumber <- as.factor(new_df$SeasonNumber)
agg_tot <- new_df %>%
select(Industry, n) %>%
group_by(Industry) %>%
dplyr::summarise(tot = sum(n), .groups = 'keep') %>%
data.frame()
total_amt <- df %>%
select(Industry, TotalDealAmount) %>%
group_by(Industry) %>%
dplyr::summarise(totamount = sum(TotalDealAmount)) %>%
data.frame()
ylab <- seq(0, max(total_amt$totamount)/1e6, 5)
my_labels <- paste0("$", ylab, "M")
ggplot(new_df, aes(x = reorder(Industry, n, sum), y = n, fill = SeasonNumber)) +
geom_bar(position = "stack", stat = "identity") +
coord_flip() +
labs(title = "Industry Pitch Count and Total Requested Amount", x = "", y = "Pitch Count", fill = "Season Number") +
theme_clean() +
theme(plot.title = element_text(hjust = 0.5)) +
geom_text(data = agg_tot, aes(x = Industry, y = tot, label = tot, fill = NULL), hjust = -0.1) +
geom_line(inherit.aes = FALSE, data = total_amt,
aes(x = Industry, y = totamount/60000, color = "Total Pitch", group=1), size = 1) +
scale_color_manual(NULL, values="black") +
scale_y_continuous(labels = comma,
sec.axis = sec_axis(~. *60000, name = "Total Pitch Amount", labels = my_labels,
breaks = ylab*1e6)) +
geom_point(inherit.aes = FALSE, data=total_amt,
aes(x=Industry, y = totamount/60000, group = 1),
size = 3, shape = 21, fill = "dark green", color = "black")
The visuals were created based on the following questions: 1. What is percentage breakout of pitches made by gender? - Pie Chart 2. Which state had the most number of pitches? - US Maps
gender <- data.frame(count(df$PitchersGender))
pct <- round(100*gender$n/sum(gender$n))
pie(gender$freq,
label = paste(c("Female", "Males", "Mixed Team"), sep = " ", pct, "%"),
col = c("palevioletred2", "seagreen3", "seashell1"),
main = "Total Percentage of Pitches by Gender")
states3 <- data.frame(count(df$PitchersState))
states4 <- states3[-1,]
states5 <- na.omit(states4)
colnames(states5)[1] = "state"
plot_usmap("states", data= states5, values = "freq", exclude = "Canada", labels = TRUE, color = "dark green") +
scale_fill_continuous(low = "white", high = "dark green", name = "Pitches by State") +
labs(title = "Pitches Made by State") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "right")
plot_usmap("states", data= states5, values = "freq", include = c("ME", "VT", "NH", "MA", "NY", "CT", "RI", "PA", "NJ", "MD", "DC", "DE", "WV", "VA"), exclude = "Canada", labels = TRUE, color = "dark green") +
scale_fill_continuous(low = "white", high = "dark green", name = "Pitches by State") +
labs(title = "North Eastern US States") +
theme(plot.title = element_text(hjust = 0.5)) +
theme(legend.position = "right")
From this analysis, one can see that the Food and Beverage industry is the leading total deal amounts and number of pitches, coming in as the top industry in nearly every season. From the significant number of pitches made out of California, this is in line with the knowledge in the US that this state has the highest number of start-ups; coming to Shark Tank is a great way to place branding of the company while gaining investors.