1. First I am going to create an object that is a tibble of the Master dataset. Then I am going to create a bar chart showing the frequencies of each batting stance.
#Create tibble of Master dataset
myMaster=as_tibble(Master)
#Remove NA column
myMaster=myMaster%>%drop_na(bats)
#Create bar chart
batChart=ggplot(data=myMaster,aes(x=bats))
#Add additional details
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(lables=c("Switch","Left","Right"))
## Error in discrete_scale(c("x", "xmin", "xmax", "xend"), "position_d", : unused argument (lables = c("Switch", "Left", "Right"))

Does this mean I have an extra row that I don’t know about that I didn’t name?

#Check number of bars in graph
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")

No, there is only three columns in the graph. Why can’t I rename them? Is it an error in my syntax? I’m reading the help page for scale_x_discrete and I’m not seeing anything wrong in my syntax. Maybe I had a typo. I will type it out again and see.

#Add additional details
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(lables=c("Switch","Left","Right"))
## Error in discrete_scale(c("x", "xmin", "xmax", "xend"), "position_d", : unused argument (lables = c("Switch", "Left", "Right"))

Why is the argument unused? Maybe I should specify which labels go to which column.

#Add additional details
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(lables=c("Switch"="B","Left"="L","Right"="R"))
## Error in discrete_scale(c("x", "xmin", "xmax", "xend"), "position_d", : unused argument (lables = c("B", "L", "R"))

No that didn’t work either. What happens if I take out the “c” in the code?

#Add additional details
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(lables=("Switch","Left","Right"))
## Error: <text>:2:158: unexpected ','
## 1: #Add additional details
## 2: batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(lables=("Switch",
##                                                                                                                                                                 ^

That definitely didn’t work. I’m going to move on to question 2 and come back to this later.
I figured it out. It’s still giving a name to the NA column even though it doesn’t exist. That’s what it did in the legend for question 2. So I need to give the NA column a NULL name so that it doesn’t appear on the chart but doesn’t throw up an error either.

#Add additional details
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(lables=c("Switch","Left","Right"),na.value=FALSE)
## Error in discrete_scale(c("x", "xmin", "xmax", "xend"), "position_d", : unused argument (lables = c("Switch", "Left", "Right"))

What if I use NULL instead of FALSE?

#Add additional details
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(lables=c("Switch","Left","Right"),na.value=NULL)
## Error in discrete_scale(c("x", "xmin", "xmax", "xend"), "position_d", : unused argument (lables = c("Switch", "Left", "Right"))
#Putting NULL in a different place to see if that works
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(lables=c("Switch","Left","Right"))
## Error in discrete_scale(c("x", "xmin", "xmax", "xend"), "position_d", : unused argument (lables = c("Switch", "Left", "Right"))

I’m really confused on why this isn’t working. It works fine in question 2. I’m going to use the piece of code from question 2 and see if it works instead.

#Add additional details
batChart+geom_bar(color="red",fill="orange")+labs(x="Batting Stance",y="Frequency",title="Batting Stances for MLB Players")+scale_x_discrete(labels=c("Switch","Left","Right"))

Well that worked. I must have had some typo somewhere in my code that I overlooked. I guess the NA column wasn’t the problem after all.
2. Now I’m going to create a bar chart with the frequencies for batting stance and throwing hand.

#Remove NA values from throws column
myMaster=myMaster%>%drop_na(throws,bats)
#Create filled bar chart
batChart+geom_bar(aes(fill=as.factor(throws)))+scale_fill_discrete(name="Throwing Hand")

#Add details to chart
batChart+geom_bar(aes(fill=as.factor(throws)))+labs(x="Batting Stance/Throwing Hand",y="Frequency",title="Frequency of Batting Stances and Throwing Hand for MLB Players")+scale_x_discrete(labels=c("Switch","Left","Right"))+scale_fill_discrete(name="Throwing Hand",labels=c("Left","Right","Switch"))

Alright, that is working. I had to go back a few times and troubleshoot some syntax errors but other than that there were no problems. However, NA still shows up in the legend even though there are no NA values so I’m going to try and remove it after looking at the help function for scale_fill_discrete. I will use na.value to remove the NA box from the legend.

#Remove NA from legend
batChart+geom_bar(aes(fill=as.factor(throws)))+labs(x="Batting Stance/Throwing Hand",y="Frequency",title="Frequency of Batting Stances and Throwing Hand for MLB Players")+scale_x_discrete(labels=c("Switch","Left","Right"))+scale_fill_discrete(name="Throwing Hand",labels=c("Left","Right","Switch"),na.value=NULL)
## Warning in rep(yes, length.out = len): 'x' is NULL so the result will be NULL
## Error in ans[ypos] <- rep(yes, length.out = len)[ypos]: replacement has length zero

I did the wrong thing. I think I need to use na.translate instead of na.value.

#Remove NA from legend
batChart+geom_bar(aes(fill=as.factor(throws)))+labs(x="Batting Stance/Throwing Hand",y="Frequency",title="Frequency of Batting Stances and Throwing Hand for MLB Players")+scale_x_discrete(labels=c("Switch","Left","Right"))+scale_fill_discrete(name="Throwing Hand",labels=c("Left","Right","Switch"),na.translate=FALSE)

  1. To start out I’m going to convert the Batting dataset into a tibble. Then I’m going to create a histogram of the games played variable.
#Create tibble myBatting
myBatting=as_tibble(Batting)
#Drop NA values from tibble
myBatting=myBatting%>%drop_na(G)
#Create the histogram of games played
gamesChart=ggplot(data=myBatting,aes(x=G))
#Add details to chart
gamesChart+geom_histogram(color="blue",fill="purple",size=1,binwidth=5)+labs(x="Games Played",y="Frequency",title="Frequency of Games Played by Players")

  1. Now I am going to create a box plot of the hits variable with the league variable as the grouping variable. I’m not entirely sure what I am doing here so this will probably require some experimentation.
#Remove NA values
myBatting=myBatting%>%drop_na(H)
#Create table of values
table1=myBatting%>%group_by(H,lgID)%>%summarize(Count=n())
## `summarise()` regrouping output by 'H' (override with `.groups` argument)
#Create box plot
hitsChart=ggplot(data=table1,aes(x=lgID,y=Count,fill=as.factor(H)))
#Add details
hitsChart+geom_boxplot(stat="identify",position="dodge")+labs(x="League",y="Number of Hits",title="Number of Hits Per League")
## Error: Can't find `stat` called 'identify'

I’m going to try dropping the stats and position and see what happens.

#Add details
hitsChart+geom_boxplot()+labs(x="League",y="Number of Hits",title="Number of Hits Per League")

Whoa. I have no idea what that is but I don’t think it’s right. Let’s see if I can clean it up a bit. I’ll try adding the position back.

#Add back position
hitsChart+geom_boxplot(position="dodge")+labs(x="League",y="Number of Hits",title="Number of Hits Per League")

That looks exactly the same. I think I messed up earlier in the code, by summarizing the Count instead of the hits.

#Edit table of values
table1=myBatting%>%group_by(H,lgID)%>%summarize(H=n())
## `summarise()` regrouping output by 'H' (override with `.groups` argument)
#Edit box plot code
hitsChart=ggplot(data=table1,aes(x=lgID,y=H))
#Edit details
hitsChart+geom_boxplot(position="dodge")+labs(x="League",y="Number of Hits",title="Number of Hits Per League")

That actually looks like a chart, so I think it is correct.