In Class Activity 5 Frank Vega

Question 1 Now, you must complete the problem below which represents a similar case scenario. You may use the steps that we executed in Case-scenario 1 as a template for your solution.

This is the sixth season of outfielder Juan Soto in the majors. If during the first five seasons he received 79, 108,41,145, and 135 walks, how many does he need on this season for his overall number of walks per season to be at least 100?

##creating a simple data source based on specifications by the questions
# walks so far
Walks_current <- c(79, 108,41,145,135)
# Average Number of walks per season wanted
wanted_Walks <- 100
# Number of seasons
n_seasons <- 6
# Needed Walks on season 6
Nwalks <- n_seasons*wanted_Walks - sum(Walks_current)
# Minimum number of walks needed by Robert
Nwalks
## [1] 92

Naswer check of previous result

Rwalks <- c(79,108,41,145,135,92)

mean(Rwalks)
## [1] 100

Question 2 The average salary of 7 basketball players is 102,000 dollars a week and the average salary of 9 NFL players is 91,000. Find the mean salary of all 16 professional players.

n_1 <- 7
n_2 <- 9
y_1 <- 102000
y_2 <- 91000
# Mean salary overall
salary_ave <-  (n_1*y_1 + n_2*y_2)/(n_1+n_2)
salary_ave
## [1] 95812.5

Question 3 Use the skills learned in case scenario number 3 on one the following data sets. You may choose only one dataset. They are both available in Canvas.

doubles_hit.csv and triples_hit.csv

##created dataframe, this is what we will use the rest of the assignment
CaseScenario3 = read.csv("C:\\Users\\Frank\\Desktop\\Special Topics Sports  Analytics\\R\\triples_hit.csv")
CaseScenario3
##     triples_hit
## 1             2
## 2             5
## 3             4
## 4             5
## 5             1
## 6             3
## 7             1
## 8             3
## 9             5
## 10            1
## 11            4
## 12            2
## 13            4
## 14            4
## 15            2
## 16            7
## 17            6
## 18            3
## 19            4
## 20            5
## 21            6
## 22            5
## 23            7
## 24            5
## 25            4
## 26            2
## 27            5
## 28            7
## 29            6
## 30            1
## 31            6
## 32            3
## 33            7
## 34            5
## 35            4
## 36            5
## 37            2
## 38            6
## 39            6
## 40            5
## 41            4
## 42            6
## 43            2
## 44            1
## 45            1
## 46            6
## 47            4
## 48            7
## 49            7
## 50            5
## 51            3
## 52            3
## 53            4
## 54            6
## 55            6
## 56            2
## 57            5
## 58            3
## 59            1
## 60            3
## 61            7
## 62            5
## 63            5
## 64            6
## 65            6
## 66            4
## 67            2
## 68            7
## 69            7
## 70            2
## 71            2
## 72            3
## 73            2
## 74            6
## 75            4
## 76            4
## 77            2
## 78            3
## 79            5
## 80            3
## 81            2
## 82            5
## 83            5
## 84            2
## 85            3
## 86            5
## 87            4
## 88           11
## 89           12
## 90           14
## 91           10
## 92            5
## 93            9
## 94           10
## 95           11
## 96           10
## 97           12
## 98           14
## 99            9
## 100          11

part 2 of passing the dataframe

triples_hitting <- CaseScenario3$triples_hit

mean of the data

triples_mean <- mean(triples_hitting)
triples_mean
## [1] 4.96
##The Mean and the median of the dataset 
triples_median <- median(triples_hitting)
triples_median
## [1] 5

##Finding the standard deviation of 1,2, and 3

triples_n <- length(triples_hitting)

triples_sd <- sd(triples_hitting)

What percentage of the data lies within one standard deviations of the mean?

triples_hit_Percentage1 <- sum((triples_hitting - triples_mean)/triples_sd < 1)/ triples_n
# Percentage of observation within one standard deviation of the mean
triples_hit_Percentage1
## [1] 0.88
##Finding if our hypothesis is null or not based off the standard deviation to find correlation
## Difference from empirical 
triples_hit_Percentage1 - 0.95
## [1] -0.07
  1. What percentage of the data lies within two standard deviations of the mean?
triples_hit_Percentage2 <- sum((triples_hitting - triples_mean)/triples_sd < 2)/ triples_n
triples_hit_Percentage2
## [1] 0.93

What percent of the data lies within three standard deviations of the mean?

##The answer to this question is 0.98
triples_hit_Percentage3 <- sum((triples_hitting - triples_mean)/triples_sd < 3)/ triples_n
triples_hit_Percentage3 - 0.9973
## [1] -0.0173
##Simple histogram of Triples hit and their frequency
hist(triples_hitting,xlab = "Triples Hit",col = "green",border = "red", xlim = c(0,8), ylim = c(0,50), breaks = 5)