DS_6306 FLS Work

Basketball Analysis

Task 1 - Use the PlayerBBall.csv dataset to visually represent (summarize) the number of players in each position.

# Load necessary libraries
library(dplyr)
library(ggplot2)
library(plotly)
library(ggthemes)
#Read CSV into R
bb = read.csv("C:/Users/gerde/Downloads/PlayersBBall.csv")

# Count number of players by position
poscnt <- bb[bb$position != "",] %>%
  group_by(position) %>%
  summarise(count = n())
poscnt
## # A tibble: 7 × 2
##   position count
##   <chr>    <int>
## 1 C          502
## 2 C-F        219
## 3 F         1290
## 4 F-C        388
## 5 F-G        216
## 6 G         1574
## 7 G-F        360
# Create a bar chart with ggplot2
p <- ggplot(poscnt, aes(x = position, y = count, fill = position)) +
  geom_bar(stat = "identity") + labs(title = "Number of Players by Position",
       x = "Position", y = "Number of Players") + theme(legend.position = "none")
# Output an interactive plot
ggplotly(p)

Task 2 - Visually investigate distribution of weight for Centers and Forwards

# Plot histograms of the distribution of weights between positions.

ggplot(bb %>% filter(position %in% c("F","C")), aes(x = weight, fill = position)) +
  geom_histogram(position = "identity", binwidth = 10) + facet_wrap(~ position) +
  labs(title = "Histogram of Weight by Position", x = "Weight", y = "Count") + 
  theme(legend.position = "none")

Task 3 - Visually investigate distribution of height for Centers and Forwards

# Plot histograms of the distribution of heights between positions.

p1 = ggplot(bb %>% filter(position %in% c("F","C")), aes(x = height, fill = position)) +
  geom_bar() + facet_wrap(~ position) +
  labs(title = "Histogram of Height by Position", x = "Height", y = "Count") +
  theme(axis.text.x = element_text(angle = 90)) + 
  theme(legend.position = "none")
ggplotly(p1)

Task 4 - Visually investigate distribution of height for all positions

# Plot histograms of the distribution of heights between positions.

p2 = ggplot(bb[bb$position !="",] , aes(x = height, fill = position)) +
  geom_bar() + facet_wrap(~ position) +
  labs(title = "Histogram of Height by Position", x = "Height", y = "Count") +
  theme(axis.text.x = element_text(angle = 90)) + 
  theme(legend.position = "none")
ggplotly(p2)

Task 5 - Investigate player height v. weight

#Plot height v. weight of players using ggplot.
p3 = bb[bb$position !="",] %>% ggplot(mapping = aes(x = height, y = weight, color = position)) + geom_point(position = "jitter") + ggtitle("Player Height v. Weight")
ggplotly(p3)
#The output of the above plot is not correct due to the ordering of the heights.
#To correct this a manual ordering is needed.
#Set the order:
horder = c("5-3","5-5","5-6","5-7","5-8","5-9","5-10","5-11","6-0","6-1","6-2","6-3","6-4","6-5","6-6","6-7","6-8","6-9","6-10","6-11","7-0","7-1","7-2","7-3","7-4","7-5","7-6","7-7")
#
bbo = bb[bb$position !="",] %>% mutate(height = factor(height, levels = horder))
      
p4 = ggplot(bbo, aes(x=height,y=weight, color=position)) + geom_point(position = "jitter")  + ggtitle("Player Height v. Weight") +  theme(axis.text.x = element_text(angle = 90)) 
ggplotly(p4)

Task 6 - Investigate whether there is a difference in the height vs. weight trend by positon

#Add facets to see each position on it own.
p5 = ggplot(bbo, aes(x=height,y=weight, color=position)) + geom_point(position = "jitter")  + ggtitle("Player Height v. Weight") +  theme(axis.text.x = element_text(angle = 90)) + facet_wrap(~position)
ggplotly(p5)

Task 7 - Investigate whether there is a difference in the height by year played.

#Box plots for heights for each year_start .
p6 = ggplot(bbo, aes(y=height,x=year_start)) + geom_boxplot(color="blue")  + ggtitle("Player Height v. Weight") +  theme(axis.text.x = element_text(angle = 90)) #
ggplotly(p6)

Task 8 - Create a 3D plot of height vs. weight vs. year and color code the points by position.

#Create 3D Plot of height v. weight v. year_start with positions color coded.
p7 = plot_ly(bbo, x=~height, y=~weight, z=~year_start, color=~position) %>%
  add_markers() %>%
  layout(scene = list(xaxis = list(title = "Player Height"),
                      yaxis = list(title = "Player Weight"),
                      zaxis = list(title = "Year Started")))
ggplotly(p7)

Task 9 - Choose a visualization from online resource and modify to use basketball data

library(ggplot2)
theme_set(theme_classic())

# Plot a density chart of player weight by position.
g <- ggplot(bbo, aes(weight)) 
p8 = g + geom_density(aes(fill=factor(position)), alpha=0.8) + 
    labs(title="Density plot", 
         subtitle="Weight by Position",
         x="Player Weight",
         fill="Position")

ggplotly(p8)

Task 10 - Separate data set analysis of income v. education level.

#Load data from csv file.
inc = read.csv("C:/Users/gerde/Downloads/Education_Income.csv")

#Plot income v. education level as a box plot
inc2 = ggplot(inc, aes(y=Income2005,x=Educ, fill=Educ)) + geom_boxplot(color="blue")  + ggtitle("Player Height v. Weight") + theme(axis.text.x = element_text(angle = 90)) + scale_y_continuous(labels = scales::comma)
ggplotly(inc2)