Basketball Analysis
Task 1 - Use the PlayerBBall.csv dataset to visually represent
(summarize) the number of players in each position.
# Load necessary libraries
library(dplyr)
library(ggplot2)
library(plotly)
library(ggthemes)
#Read CSV into R
bb = read.csv("C:/Users/gerde/Downloads/PlayersBBall.csv")
# Count number of players by position
poscnt <- bb[bb$position != "",] %>%
group_by(position) %>%
summarise(count = n())
poscnt
## # A tibble: 7 × 2
## position count
## <chr> <int>
## 1 C 502
## 2 C-F 219
## 3 F 1290
## 4 F-C 388
## 5 F-G 216
## 6 G 1574
## 7 G-F 360
# Create a bar chart with ggplot2
p <- ggplot(poscnt, aes(x = position, y = count, fill = position)) +
geom_bar(stat = "identity") + labs(title = "Number of Players by Position",
x = "Position", y = "Number of Players") + theme(legend.position = "none")
# Output an interactive plot
ggplotly(p)
Task 2 - Visually investigate distribution of weight for Centers and
Forwards
# Plot histograms of the distribution of weights between positions.
ggplot(bb %>% filter(position %in% c("F","C")), aes(x = weight, fill = position)) +
geom_histogram(position = "identity", binwidth = 10) + facet_wrap(~ position) +
labs(title = "Histogram of Weight by Position", x = "Weight", y = "Count") +
theme(legend.position = "none")

Task 3 - Visually investigate distribution of height for Centers and
Forwards
# Plot histograms of the distribution of heights between positions.
p1 = ggplot(bb %>% filter(position %in% c("F","C")), aes(x = height, fill = position)) +
geom_bar() + facet_wrap(~ position) +
labs(title = "Histogram of Height by Position", x = "Height", y = "Count") +
theme(axis.text.x = element_text(angle = 90)) +
theme(legend.position = "none")
ggplotly(p1)
Task 4 - Visually investigate distribution of height for all
positions
# Plot histograms of the distribution of heights between positions.
p2 = ggplot(bb[bb$position !="",] , aes(x = height, fill = position)) +
geom_bar() + facet_wrap(~ position) +
labs(title = "Histogram of Height by Position", x = "Height", y = "Count") +
theme(axis.text.x = element_text(angle = 90)) +
theme(legend.position = "none")
ggplotly(p2)
Task 5 - Investigate player height v. weight
#Plot height v. weight of players using ggplot.
p3 = bb[bb$position !="",] %>% ggplot(mapping = aes(x = height, y = weight, color = position)) + geom_point(position = "jitter") + ggtitle("Player Height v. Weight")
ggplotly(p3)
#The output of the above plot is not correct due to the ordering of the heights.
#To correct this a manual ordering is needed.
#Set the order:
horder = c("5-3","5-5","5-6","5-7","5-8","5-9","5-10","5-11","6-0","6-1","6-2","6-3","6-4","6-5","6-6","6-7","6-8","6-9","6-10","6-11","7-0","7-1","7-2","7-3","7-4","7-5","7-6","7-7")
#
bbo = bb[bb$position !="",] %>% mutate(height = factor(height, levels = horder))
p4 = ggplot(bbo, aes(x=height,y=weight, color=position)) + geom_point(position = "jitter") + ggtitle("Player Height v. Weight") + theme(axis.text.x = element_text(angle = 90))
ggplotly(p4)
Task 6 - Investigate whether there is a difference in the height
vs. weight trend by positon
#Add facets to see each position on it own.
p5 = ggplot(bbo, aes(x=height,y=weight, color=position)) + geom_point(position = "jitter") + ggtitle("Player Height v. Weight") + theme(axis.text.x = element_text(angle = 90)) + facet_wrap(~position)
ggplotly(p5)
Task 7 - Investigate whether there is a difference in the height by
year played.
#Box plots for heights for each year_start .
p6 = ggplot(bbo, aes(y=height,x=year_start)) + geom_boxplot(color="blue") + ggtitle("Player Height v. Weight") + theme(axis.text.x = element_text(angle = 90)) #
ggplotly(p6)
Task 8 - Create a 3D plot of height vs. weight vs. year and color
code the points by position.
#Create 3D Plot of height v. weight v. year_start with positions color coded.
p7 = plot_ly(bbo, x=~height, y=~weight, z=~year_start, color=~position) %>%
add_markers() %>%
layout(scene = list(xaxis = list(title = "Player Height"),
yaxis = list(title = "Player Weight"),
zaxis = list(title = "Year Started")))
ggplotly(p7)
Task 9 - Choose a visualization from online resource and modify to
use basketball data
library(ggplot2)
theme_set(theme_classic())
# Plot a density chart of player weight by position.
g <- ggplot(bbo, aes(weight))
p8 = g + geom_density(aes(fill=factor(position)), alpha=0.8) +
labs(title="Density plot",
subtitle="Weight by Position",
x="Player Weight",
fill="Position")
ggplotly(p8)
Task 10 - Separate data set analysis of income v. education
level.
#Load data from csv file.
inc = read.csv("C:/Users/gerde/Downloads/Education_Income.csv")
#Plot income v. education level as a box plot
inc2 = ggplot(inc, aes(y=Income2005,x=Educ, fill=Educ)) + geom_boxplot(color="blue") + ggtitle("Player Height v. Weight") + theme(axis.text.x = element_text(angle = 90)) + scale_y_continuous(labels = scales::comma)
ggplotly(inc2)