getwd() setwd(“/Users/taylorbosse/Desktop”) install.packages(“tidyverse”) library(tidyverse) install.packages(“readr”) library(readr) install.packages(“dplyr”) library(dplyr) install.packages(“lubridate”) library(lubridate) install.packages(“scales”) library(scales) install.packages(“ggplot2”) library(ggplot2) install.packages(“ggthemes”) library(ggthemes) install.packages(“RColorBrewer”) library(RColorBrewer)
library(data.table) filename<-“free_throws.csv” df<-fread(filename) df colnames(df) head(df) tail(df) dim(df) summary(df) str(df) sum(is.na(df)) sum(!is.na(df)) rowSums(is.na(df)) rowSums(!is.na(df)) colSums(is.na(df)) colSums(!is.na(df))
one<-ggplot(df,aes(x=season))+ geom_histogram(bins=10,stat=“count”,color=“darkblue”,fill=“lightblue”)+ labs(title=“Histogram of Free Throws by Season”,x=“Season”,y=“# of Free Throws”)+ scale_y_continuous(labels=comma) one
playercount<-data.frame(count(df,player)) head(playercount) playercount<-playercount[order(playercount\(n,decreasing=TRUE),] head(playercount) str(playercount) playercount\)n<-as.numeric(playercount$n) str(playercount)
head(playercount,10) two<-ggplot(playercount[1:10,],aes(x=reorder(player,-n),y=n))+ geom_bar(color=“black”,fill=“gray76”,stat=“identity”)+ labs(title=“Number of Free Throws by Player (Top 10) 2006-2016”,x=“Player”,y=“# of Free Throws”)+ theme(plot.title=element_text(hjust=0.5)) two
periods<-df[df$period<=4,] periods_df<-periods%>% select(season,shot_made)%>% mutate(ft_status=ifelse(shot_made==“1”,“Made”,ifelse(shot_made==“0”,“Missed”,“NA”)))%>% group_by(season,ft_status)%>% summarise(n=length(season))%>% group_by(season)%>% mutate(percent_of_total=round(100*n/sum(n),1))%>% ungroup()%>% data.frame() head(periods_df) str(periods_df)
three<-ggplot(data=periods_df,aes(x=““,y=n,fill=ft_status))+ geom_bar(stat=”identity”,position=“fill”)+ coord_polar(theta=“y”,start=0)+ labs(fill=“Shot”,x=NULL,y=NULL,title=“Pie Chart: # of Made vs Missed Free Throws per Season”, caption=“Only using Free Throw Statistics for Periods 1-4”)+ theme_light()+ theme(plot.title = element_text(hjust=0.5), axis.text = element_blank(), axis.ticks = element_blank(), panel.grid=element_blank())+ facet_wrap(~season,ncol=5,nrow=2)+ scale_fill_brewer(palette = “Blues”)+ geom_text(aes(x=1.7,label=paste0(percent_of_total,“%”)), size=4, position=position_fill(vjust=0.5)) three
periods_s3<-periods[periods$season==“2008 - 2009”,] head(periods_s3)
periods_s3_df<-periods_s3%>% select(period,season,shot_made)%>% mutate(shot_made=ifelse(shot_made==“1”,“Made”,ifelse(shot_made==“0”,“Missed”,“NA”)))%>% group_by(period,season,ft_status)%>% summarise(n=length(period))%>% group_by(period)%>% mutate(percent_of_total=round(100*n/sum(n),1))%>% ungroup()%>% data.frame() head(periods_s3_df) str(periods_s3_df) periods_s3_df\(period<-as.character(periods_s3_df\)period) str(periods_s3_df)
four<-ggplot(data=periods_s3_df,aes(x=““,y=n,fill=ft_status))+ geom_bar(stat=”identity”,position=“fill”)+ coord_polar(theta=“y”,start=0)+ labs(fill=“Shot”,x=NULL,y=NULL,title=“Pie Chart: Made vs Missed Free Throws by Period for the 2008-2009 Season”, caption=“Only using Free Throw Statistics for Periods 1-4”)+ theme_light()+ theme(plot.title = element_text(hjust=0.5), axis.text = element_blank(), axis.ticks = element_blank(), panel.grid=element_blank())+ facet_wrap(~period,ncol=2,nrow=2)+ scale_fill_brewer(palette = “Blues”)+ geom_text(aes(x=1.7,label=paste0(percent_of_total,“%”)), size=4, position=position_fill(vjust=0.5)) four
heatmap_periods_df<-periods%>% select(period,season,shot_made)%>% mutate(ft_status=ifelse(shot_made==“1”,“Made”,ifelse(shot_made==“0”,“Missed”,“NA”)))%>% group_by(period,season,ft_status)%>% summarise(n=length(season))%>% data.frame() head(heatmap_periods_df)
madeftperiods<-heatmap_periods_df[heatmap_periods_df\(ft_status=="Made",] str(madeftperiods) madeftperiods\)period<-as.character(madeftperiods$period) str(madeftperiods) head(madeftperiods)
five<-ggplot(madeftperiods,aes(x=season,y=period,fill=n))+ geom_tile(color=“black”)+ geom_text(aes(label=n))+ coord_equal(ratio=1)+ labs(title=“Heatmap: # of Made Free Throws by Period per Season”,x=“Season”,y=“Period”,fill=“# of FT”)+ theme_minimal()+ theme(plot.title=element_text(hjust=0.5))+ scale_y_discrete(limits=rev(levels(madeftperiods$Season)))+ scale_fill_continuous(low=“white”,high=“red”)+ guides(fill=guide_legend(reverse=TRUE,override.aes=list(colour=“black”))) five
games<-df%>% select(playoffs,season,shot_made)%>% mutate(gametype=ifelse(playoffs==“regular”,“Regular”,ifelse(playoffs==“playoffs”,“Playoffs”,“NA”)))%>% group_by(gametype,season)%>% summarise(n=length(playoffs))%>% data.frame() head(games) str(gametype)
six<-ggplot(games,aes(x=season,y=n,group=gametype))+ geom_line(aes(color=gametype),size=3)+ labs(title=“Multiple Line Plot: # of Free Throws per Season by Game Type”,x=“Season”,y=“# of Free Throws”)+ theme_light()+ theme(plot.title=element_text(hjust=0.5))+ geom_point(shape=21,linewidth=5,color=“black”,fill=“white”)+ scale_color_brewer(palette=“Paired”,name=“Game Type”) six