library(ggplot2)

Data Preparation

# load data
df=read.csv("https://github.com/mianshariq/SPS/raw/dab24b98c3c1d48b96ea619c01caacfefa916386/Data%20606/Projects/NFL%20Data.csv")

df1=read.csv("https://github.com/mianshariq/SPS/raw/4fe676d9723fea08abb22f2021d644177dc16698/Data%20606/Projects/NFL%20Data%20Min.csv")

Research question

You should phrase your research question in a way that matches up with the scope of inference your dataset allows for.

Why you should or shouldnt draft a Running Back in the first round of NFL Draft.

Cases

What are the cases, and how many are there? 5 Years of 7 rounds of Data 277 Cases

Data collection

Describe the method of data collection.

Data was extracted and saved on to a CSV from Pro Football Focus PFF Website and DraftHistory.com

Type of study

What type of study is this (observational/experiment)?

Observational

Data Source

If you collected the data, state self-collected. If not, provide a citation/link.

http://www.drafthistory.com/index.php/positions/rb

Dependent Variable

What is the response variable? Is it quantitative or qualitative?

Yards Per Game Quantatative Variable

Independent Variable

You should have two independent variables, one quantitative and one qualitative.

Draft Order Quantatative Round Quaitative

Relevant summary statistics

Provide summary statistics for each the variables. Also include appropriate visualizations related to your research question (e.g. scatter plot, boxplots, etc). This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.

summary(df)
##       Year          Name             College              POS           
##  Min.   :2015   Length:277         Length:277         Length:277        
##  1st Qu.:2016   Class :character   Class :character   Class :character  
##  Median :2017   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :2017                                                           
##  3rd Qu.:2019                                                           
##  Max.   :2020                                                           
##                                                                         
##    Height_in       Weight_lbs     Hand_Size_in    Arm_Length_in  
##  Min.   :65.75   Min.   :170.0   Min.   : 8.250   Min.   :27.38  
##  1st Qu.:69.13   1st Qu.:203.0   1st Qu.: 8.880   1st Qu.:30.00  
##  Median :70.38   Median :213.0   Median : 9.250   Median :31.00  
##  Mean   :70.43   Mean   :212.1   Mean   : 9.237   Mean   :30.88  
##  3rd Qu.:71.75   3rd Qu.:222.0   3rd Qu.: 9.500   3rd Qu.:31.63  
##  Max.   :75.00   Max.   :259.0   Max.   :10.500   Max.   :33.75  
##                                  NA's   :1        NA's   :1      
##   X40_Yard_sec    Bench_Press     Vert_Leap_In   Broad_Jump_in  
##  Min.   :4.280   Min.   : 5.00   Min.   :27.00   Min.   :106.0  
##  1st Qu.:4.490   1st Qu.:16.00   1st Qu.:32.00   1st Qu.:116.0  
##  Median :4.560   Median :19.00   Median :34.50   Median :120.0  
##  Mean   :4.558   Mean   :18.79   Mean   :34.55   Mean   :119.7  
##  3rd Qu.:4.630   3rd Qu.:22.00   3rd Qu.:36.50   3rd Qu.:123.0  
##  Max.   :4.850   Max.   :34.00   Max.   :42.50   Max.   :135.0  
##  NA's   :14      NA's   :28      NA's   :16      NA's   :25     
##  Shuttle_Shuttle     X3Cone          Team              Round          
##  Min.   :3.900   Min.   :6.570   Length:277         Length:277        
##  1st Qu.:4.225   1st Qu.:6.980   Class :character   Class :character  
##  Median :4.320   Median :7.110   Mode  :character   Mode  :character  
##  Mean   :4.326   Mean   :7.101                                        
##  3rd Qu.:4.420   3rd Qu.:7.220                                        
##  Max.   :4.630   Max.   :7.680                                        
##  NA's   :46      NA's   :58                                           
##  Draft_Order        RushYard_Per_Game       GP            Total_Yrd_pg      
##  Length:277         Length:277         Length:277         Length:277        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##       TD           
##  Length:277        
##  Class :character  
##  Mode  :character  
##                    
##                    
##                    
## 
cols.num <- c("Draft_Order","RushYard_Per_Game", "GP", "TD", "Total_Yrd_pg")
df[cols.num] <- sapply(df[cols.num],as.numeric)
## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion

## Warning in lapply(X = X, FUN = FUN, ...): NAs introduced by coercion
sapply(df, class)
##              Year              Name           College               POS 
##         "integer"       "character"       "character"       "character" 
##         Height_in        Weight_lbs      Hand_Size_in     Arm_Length_in 
##         "numeric"         "integer"         "numeric"         "numeric" 
##      X40_Yard_sec       Bench_Press      Vert_Leap_In     Broad_Jump_in 
##         "numeric"         "integer"         "numeric"         "integer" 
##   Shuttle_Shuttle            X3Cone              Team             Round 
##         "numeric"         "numeric"       "character"       "character" 
##       Draft_Order RushYard_Per_Game                GP      Total_Yrd_pg 
##         "numeric"         "numeric"         "numeric"         "numeric" 
##                TD 
##         "numeric"
chart=ggplot(data=df, aes(x=Round, y=Total_Yrd_pg))+
geom_boxplot(color="#69b3a2")+
geom_smooth(method='lm')
chart
## Warning: Removed 130 rows containing non-finite values (stat_boxplot).
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 130 rows containing non-finite values (stat_smooth).