data <- read.csv("/Users/eunseokim/Desktop/mlb_players.csv", header = TRUE)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
head(data)
## Name Team Position Height.inches. Weight.lbs. Age
## 1 Adam Donachie BAL Catcher 74 180 22.99
## 2 Paul Bako BAL Catcher 74 215 34.69
## 3 Ramon Hernandez BAL Catcher 72 210 30.78
## 4 Kevin Millar BAL First Baseman 72 210 35.43
## 5 Chris Gomez BAL First Baseman 73 188 35.71
## 6 Brian Roberts BAL Second Baseman 69 176 29.39
str(data)
## 'data.frame': 1034 obs. of 6 variables:
## $ Name : chr "Adam Donachie" "Paul Bako" "Ramon Hernandez" "Kevin Millar" ...
## $ Team : chr " BAL" " BAL" " BAL" " BAL" ...
## $ Position : chr " Catcher" " Catcher" " Catcher" " First Baseman" ...
## $ Height.inches.: int 74 74 72 72 73 69 69 71 76 71 ...
## $ Weight.lbs. : int 180 215 210 210 188 176 209 200 231 180 ...
## $ Age : num 23 34.7 30.8 35.4 35.7 ...
summary(data)
## Name Team Position Height.inches.
## Length:1034 Length:1034 Length:1034 Min. :67.0
## Class :character Class :character Class :character 1st Qu.:72.0
## Mode :character Mode :character Mode :character Median :74.0
## Mean :73.7
## 3rd Qu.:75.0
## Max. :83.0
##
## Weight.lbs. Age
## Min. :150.0 Min. :20.90
## 1st Qu.:187.0 1st Qu.:25.44
## Median :200.0 Median :27.93
## Mean :201.7 Mean :28.74
## 3rd Qu.:215.0 3rd Qu.:31.23
## Max. :290.0 Max. :48.52
## NA's :1
hist(data$Weight.lbs.,
xlim = range(150, 290),
breaks = seq(150, 290, by = 10),
xaxt = 'n',
xlab = "Weight (lbs)",
main = "Histogram of Weight",
col = "yellow")
axis(1, at = seq(150, 290, by = 10), labels = seq(150, 290, by = 10))

range(data$Weight.lbs., na.rm = TRUE)
## [1] 150 290
# Answer 2:
# This histogram shows the distribution of Weight.lbs.
# Custom x-axis labels make it easier to interpret the weight ranges.
boxplot(data$Weight.lbs. ~ data$Position,
xlab = "Position",
ylab = "Weight (lbs)",
main = "Box Plot of Weight by Position",
col = "lightblue")

# Answer 3:
# This box plot illustrates the weight distribution for each player position.
#It enables us to observe variations in the median and distribution of weight among positions,
# while also identifying any outliers within each category.
plot(data$Height.inches., data$Weight.lbs.,
xlab = "Height (inches)",
ylab = "Weight (lbs)",
main = "Scatter Plot of Height vs Weight",
col = "purple")

# Answer 4: This scatter plot illustrates the correlation between height and weight.
# A positive correlation indicates that taller players generally possess greater weight.
# We can also detect any anomalies or clusters within the data.