LIBRARIES USED

ISLR

ggplot2

dplyr

library(ggplot2)
library(dplyr)
library(ISLR)
data(package="ISLR")
data1= ISLR::Hitters
summary(Hitters)
     AtBat            Hits         HmRun            Runs       
 Min.   : 16.0   Min.   :  1   Min.   : 0.00   Min.   :  0.00  
 1st Qu.:255.2   1st Qu.: 64   1st Qu.: 4.00   1st Qu.: 30.25  
 Median :379.5   Median : 96   Median : 8.00   Median : 48.00  
 Mean   :380.9   Mean   :101   Mean   :10.77   Mean   : 50.91  
 3rd Qu.:512.0   3rd Qu.:137   3rd Qu.:16.00   3rd Qu.: 69.00  
 Max.   :687.0   Max.   :238   Max.   :40.00   Max.   :130.00  
                                                               
      RBI             Walks            Years            CAtBat       
 Min.   :  0.00   Min.   :  0.00   Min.   : 1.000   Min.   :   19.0  
 1st Qu.: 28.00   1st Qu.: 22.00   1st Qu.: 4.000   1st Qu.:  816.8  
 Median : 44.00   Median : 35.00   Median : 6.000   Median : 1928.0  
 Mean   : 48.03   Mean   : 38.74   Mean   : 7.444   Mean   : 2648.7  
 3rd Qu.: 64.75   3rd Qu.: 53.00   3rd Qu.:11.000   3rd Qu.: 3924.2  
 Max.   :121.00   Max.   :105.00   Max.   :24.000   Max.   :14053.0  
                                                                     
     CHits            CHmRun           CRuns             CRBI        
 Min.   :   4.0   Min.   :  0.00   Min.   :   1.0   Min.   :   0.00  
 1st Qu.: 209.0   1st Qu.: 14.00   1st Qu.: 100.2   1st Qu.:  88.75  
 Median : 508.0   Median : 37.50   Median : 247.0   Median : 220.50  
 Mean   : 717.6   Mean   : 69.49   Mean   : 358.8   Mean   : 330.12  
 3rd Qu.:1059.2   3rd Qu.: 90.00   3rd Qu.: 526.2   3rd Qu.: 426.25  
 Max.   :4256.0   Max.   :548.00   Max.   :2165.0   Max.   :1659.00  
                                                                     
     CWalks        League  Division    PutOuts          Assists     
 Min.   :   0.00   A:175   E:157    Min.   :   0.0   Min.   :  0.0  
 1st Qu.:  67.25   N:147   W:165    1st Qu.: 109.2   1st Qu.:  7.0  
 Median : 170.50                    Median : 212.0   Median : 39.5  
 Mean   : 260.24                    Mean   : 288.9   Mean   :106.9  
 3rd Qu.: 339.25                    3rd Qu.: 325.0   3rd Qu.:166.0  
 Max.   :1566.00                    Max.   :1378.0   Max.   :492.0  
                                                                    
     Errors          Salary       NewLeague
 Min.   : 0.00   Min.   :  67.5   A:176    
 1st Qu.: 3.00   1st Qu.: 190.0   N:146    
 Median : 6.00   Median : 425.0            
 Mean   : 8.04   Mean   : 535.9            
 3rd Qu.:11.00   3rd Qu.: 750.0            
 Max.   :32.00   Max.   :2460.0            
                 NA's   :59                

##Plots

Histogram

Histogram of Salary: Shows the distribution of salaries.

ggplot(Hitters, aes(x = Salary)) +
  geom_histogram(binwidth = 100, fill = "steelblue", color = "black") +
  labs(title = "Salary Distribution", x = "Salary", y = "Count") +
  theme_minimal()
## Warning: Removed 59 rows containing non-finite outside the scale range
## (`stat_bin()`).

Line plot

Career At-Bats by Years : Shows the trend of career at-bats over the years in the league.

ggplot(Hitters, aes(x = Years, y = CAtBat, group = 1)) +
  geom_line(color = "darkred") +
  labs(title = "Career At-Bats Over Years", x = "Years", y = "Career At-Bats") +
  theme_minimal()

Box plot

Salary by League : Compares salary distributions across different leagues.

ggplot(Hitters, aes(x = League, y = Salary)) +
  geom_boxplot(fill = "orange") +
  labs(title = "Salary Distribution by League", x = "League", y = "Salary") +
  theme_minimal()
## Warning: Removed 59 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

Bar plot

Count of Players by League and Division : counts players by league and division.

ggplot(Hitters, aes(x = League, fill = Division)) +
  geom_bar(position = "dodge") +
  labs(title = "Player Count by League and Division", x = "League", y = "Count", fill = "Division") +
  theme_minimal()

Donut

Distribution of League : Visualizes the proportion of players in each league.

league_data <- Hitters %>% 
  count(League) %>% 
  mutate(percentage = n / sum(n),
         ypos = cumsum(percentage) - 0.5 * percentage)

ggplot(league_data, aes(x = 2, y = percentage, fill = League)) +
  geom_bar(stat = "identity", color = "white") +
  coord_polar("y", start = 0) +
  xlim(0.5, 2.5) +
  theme_void() +
  labs(title = "Distribution of League") +
  geom_text(aes(y = ypos, label = scales::percent(percentage)), color = "white") +
  theme(legend.position = "none") +
  scale_fill_manual(values = c("A" = "skyblue", "N" = "coral"))

Scatter plot

Hits vs. At-Bats : Examines the relationship between hits and at-bats.

ggplot(Hitters, aes(x = AtBat, y = Hits)) +
  geom_point(color = "purple", alpha = 0.6) +
  labs(title = "Hits vs. At-Bats", x = "At-Bats", y = "Hits") +
  theme_minimal()