data1 <- read.csv("C:\\Statistics\\nba.csv")
head(data1)
##       bbrID       Date  Tm Opp TRB AST STL BLK PTS GmSc  Season Playoffs Year
## 1 abdelal01 1993-03-16 BOS GSW  10   2   0   0  25 22.7 1992-93    false 1993
## 2 abdulma02 1991-04-02 DEN DAL   2   6   4   0  30 29.7 1990-91    false 1991
## 3 abdulta01 1998-04-19 SAC VAN   2   3   1   0  31 26.4 1997-98    false 1998
## 4 abdursh01 2001-11-23 ATL DET  12   5   2   1  50 46.0 2001-02    false 2002
## 5 abrinal01 2018-11-01 OKC CHO   2   0   0   0  25 17.1 2018-19    false 2019
## 6 achiupr01 2021-01-12 MIA PHI  13   3   0   1  17 16.9 2020-21    false 2021
##   GameIndex GmScMovingZ GmScMovingZTop2Delta      Date2 GmSc2 GmScMovingZ2
## 1       181        4.13                 0.24 1991-12-04  18.6         3.89
## 2        64        3.82                 0.64 1995-12-07  40.1         3.18
## 3        58        4.11                 1.67 1998-01-14  16.9         2.44
## 4       386        4.06                 0.84 2003-11-28  34.3         3.22
## 5       160        3.37                 0.18 2018-11-30  16.6         3.19
## 6         8        2.58                 0.05 2021-02-28  16.8         2.53
# Example: For a categorical column "Team"
data1 |>
  group_by(Tm) |>
  summarise(Count = n())
## # A tibble: 38 × 2
##    Tm    Count
##    <chr> <int>
##  1 ATL      62
##  2 BOS      61
##  3 BRK      24
##  4 CHA      17
##  5 CHH      20
##  6 CHI      50
##  7 CHO      14
##  8 CLE      68
##  9 DAL      53
## 10 DEN      57
## # ℹ 28 more rows
# Example: For a numeric column "Points"
summary(data1$PTS)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00   19.00   24.00   26.06   32.00   81.00
# Additional statistics
quantile(data1$PTS, probs = seq(0, 1, 0.25))  # Quartiles
##   0%  25%  50%  75% 100% 
##    4   19   24   32   81

##`Set of novel questions ## What are the top 5teams with the highest avg points per game?

Is there a relationship between players’ height and their average points scored?

How does the distribution of salaries vary across different positions?

{r}

Top 5 teams with the highest average points

top_teams <- data1 |> group_by(Tm) |> summarise(AveragePoints = mean(PTS, na.rm = TRUE)) |> arrange(desc(AveragePoints)) |> head(5)

print(top_teams)

`

# Distributive visualization
ggplot(data1, aes(x = PTS)) +
  geom_histogram(binwidth = 5, fill = "blue", color = "black") +
  ggtitle("Distribution of Points")

## The distribution visualization using the histogram provides the following insights:
## The majority of values fall within a specific range of points, likely around 15–25 .
## This range represents the typical performance level.
## Performance Analysis: Helps in understanding whether scoring is typically consistent or varies.
## Do any specific player or team attributes correlate with high/low scoring ranges?
## Are the outliers associated with specific players, teams, or time periods?



# Example: Scatter plot of Points (PTS) vs. Game Score (GmSc):

ggplot(data1, aes(x = PTS, y = GmSc)) +
  geom_point(alpha = 0.6, color = "blue") +  # Scatter plot points
  geom_smooth(method = "lm", color = "red", se = FALSE) +  # Trend line
  labs(title = "Points vs. Game Score",
       x = "Points Scored",
       y = "Game Score",
       caption = "Data Source: Your Dataset") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

##Scatter Plot Analysis: Points vs. Game Score
## scatter plot highlights a strong correlation between points scored and overall game impact. Most values fall within 10–30 points, aligning with typical performance levels.
##performance Analysis:Higher points generally result in a higher game score.Outliers (e.g., 60+ points) may indicate exceptional performances.
##Key ouestions:specific players or teams consistently achieve high game scores?Are outliers linked to certain seasons or game conditions?
#library(tidyverse)