data <- read.csv("/Users/jinvy/Downloads/mlb_players.csv", header = TRUE)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
head(data)
##              Name Team        Position Height.inches. Weight.lbs.   Age
## 1   Adam Donachie  BAL         Catcher             74         180 22.99
## 2       Paul Bako  BAL         Catcher             74         215 34.69
## 3 Ramon Hernandez  BAL         Catcher             72         210 30.78
## 4    Kevin Millar  BAL   First Baseman             72         210 35.43
## 5     Chris Gomez  BAL   First Baseman             73         188 35.71
## 6   Brian Roberts  BAL  Second Baseman             69         176 29.39
str(data)
## 'data.frame':    1034 obs. of  6 variables:
##  $ Name          : chr  "Adam Donachie" "Paul Bako" "Ramon Hernandez" "Kevin Millar" ...
##  $ Team          : chr  " BAL" " BAL" " BAL" " BAL" ...
##  $ Position      : chr  " Catcher" " Catcher" " Catcher" " First Baseman" ...
##  $ Height.inches.: int  74 74 72 72 73 69 69 71 76 71 ...
##  $ Weight.lbs.   : int  180 215 210 210 188 176 209 200 231 180 ...
##  $ Age           : num  23 34.7 30.8 35.4 35.7 ...
summary(data)
##      Name               Team             Position         Height.inches.
##  Length:1034        Length:1034        Length:1034        Min.   :67.0  
##  Class :character   Class :character   Class :character   1st Qu.:72.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :74.0  
##                                                           Mean   :73.7  
##                                                           3rd Qu.:75.0  
##                                                           Max.   :83.0  
##                                                                         
##   Weight.lbs.         Age       
##  Min.   :150.0   Min.   :20.90  
##  1st Qu.:187.0   1st Qu.:25.44  
##  Median :200.0   Median :27.93  
##  Mean   :201.7   Mean   :28.74  
##  3rd Qu.:215.0   3rd Qu.:31.23  
##  Max.   :290.0   Max.   :48.52  
##  NA's   :1
hist(data$Weight.lbs., 
     xlim = range(150, 290),
     breaks = seq(150, 290, by = 10),
     xaxt = 'n',
     xlab = "Weight (lbs)",
     main = "Histogram of Weight",
     col = "yellow")

axis(1, at = seq(150, 290, by = 10), labels = seq(150, 290, by = 10))

range(data$Weight.lbs., na.rm = TRUE)
## [1] 150 290
# Answer 2:
# This histogram shows the distribution of Weight.lbs.
# Custom x-axis labels make it easier to interpret the weight ranges.

boxplot(data$Weight.lbs. ~ data$Position,
        xlab = "Position",
        ylab = "Weight (lbs)",
        main = "Box Plot of Weight by Position",
        col = "lightblue") 

# Answer 3:
# This box plot illustrates the weight distribution for each player position. 
#It enables us to observe variations in the median and distribution of weight among positions,
# while also identifying any outliers within each category.

plot(data$Height.inches., data$Weight.lbs.,
     xlab = "Height (inches)",
     ylab = "Weight (lbs)",
     main = "Scatter Plot of Height vs Weight",
     col = "purple")

# Answer 4: This scatter plot illustrates the correlation between height and weight. 
# A positive correlation indicates that taller players generally possess greater weight. 
# We can also detect any anomalies or clusters within the data.