Assignment 5

#Case Scenario 1

# Home-runs so far
HR_before <- c(11, 13, 12)
# Average Number of Home-runs per season wanted
wanted_HR <- 20
# Number of seasons
n_seasons <- 4
# Needed Home-runs on season 4
x_4 <- n_seasons*wanted_HR - sum(HR_before)
# Minimum number of Home-runs needed by Robert
x_4

## [1] 44

# Robert's performance
Robert_HRs <- c(11, 13, 12,44)
# Find mean
mean(Robert_HRs)

## [1] 20

# Find standard deviation
sd(Robert_HRs)

## [1] 16.02082

# Find the maximum number of home-runs during the four seasons period
max(Robert_HRs)

## [1] 44

# Find the minimum number of home-runs during the four seasons period
min(Robert_HRs)

## [1] 11

summary(Robert_HRs)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.00   11.75   12.50   20.00   20.75   44.00

#Question1

sixth_season_stats <- c(79,108,41,145,135)
wanted_avg <- 100
pred_season <- 6
x_6 <- pred_season*wanted_avg - sum(sixth_season_stats)
x_6

## [1] 92

#Case Scenario 2

n_1 <- 10
n_2 <- 4
y_1 <- 72000
y_2 <- 84000
# Mean salary overall
salary_ave <-  (n_1*y_1 + n_2*y_2)/(n_1+n_2)
salary_ave

## [1] 75428.57

#Question 2

n_1 <- 7
n_2 <- 9
y_1 <- 102000
y_2 <- 91000
# Mean salary overall
salary_ave <-  (n_1*y_1 + n_2*y_2)/(n_1+n_2)
salary_ave

## [1] 95812.5

#Case Scenario 3

Years = c(6, 5, 4, 3, 2, 1)
Number_of_players = c(28, 72, 201, 109, 56, 34)

contracts_lengths <- data.frame (Years, 
                   Number_of_players)

contract_length <- read.table("allcontracts copy.csv", header = TRUE, sep = ",")
contract_years <- contract_length$years

# Mean 
contracts_mean  <- mean(contract_years)
contracts_mean

## [1] 3.458918

# Median
contracts_median <- median(contract_years)
contracts_median

## [1] 3

# Find number of observations
contracts_n <- length(contract_years)
# Find standard deviation
contracts_sd <- sd(contract_years)

contracts_w1sd <- sum((contract_years - contracts_mean)/contracts_sd < 1)/ contracts_n
# Percentage of observation within one standard deviation of the mean
contracts_w1sd

## [1] 0.8416834

## Difference from empirical 
contracts_w1sd - 0.68

## [1] 0.1616834

## Within 2 sd
contracts_w2sd <- sum((contract_years - contracts_mean)/ contracts_sd < 2)/contracts_n
contracts_w2sd

## [1] 1

## Difference from empirical 
contracts_w2sd - 0.95

## [1] 0.05

## Within 3 sd 
contracts_w3sd <- sum((contract_years - contracts_mean)/ contracts_sd < 3)/contracts_n
contracts_w3sd

## [1] 1

## Difference from empirical 
contracts_w3sd - 0.9973

## [1] 0.0027

# Create histogram
hist(contract_years,xlab = "Years Left in Contract",col = "green",border = "red", xlim = c(0,8), ylim = c(0,225),
   breaks = 5)

# Question 3

on_base <- read.table("doubles_hit.csv", header = TRUE, sep = ",")
doubles_hits <- on_base$doubles
on_base

# Max 
doubles_hits_max  <- max(doubles_hits)
doubles_hits_max

## [1] 49

# Mean 
doubles_hits_mean  <- mean(doubles_hits)
doubles_hits_mean

## [1] 23.55

#Median
doubles_hits_mean  <- median(doubles_hits)
doubles_hits_mean

## [1] 23.5

# Find number of observations
doubles_hits_n <- length(doubles_hits)
# Find standard deviation
doubles_hits_sd <- sd(doubles_hits)

doubles_hits_w1sd <- sum((doubles_hits - doubles_hits_mean)/doubles_hits_sd < 1)/ doubles_hits_n
# Percentage of observation within one standard deviation of the mean
doubles_hits_w1sd

## [1] 0.79

## Difference from empirical 
doubles_hits_w1sd - 0.68

## [1] 0.11

## Within 2 sd
doubles_hits_w2sd <- sum((doubles_hits - doubles_hits_mean)/ doubles_hits_sd < 2)/doubles_hits_n
doubles_hits_w2sd

## [1] 1

## Difference from empirical 
doubles_hits_w2sd - 0.95

## [1] 0.05

## Within 3 sd 
doubles_hits_w3sd <- sum((doubles_hits - doubles_hits_mean)/ doubles_hits_sd < 3)/doubles_hits_n
doubles_hits_w3sd

## [1] 1

## Difference from empirical 
doubles_hits_w3sd - 0.9973

## [1] 0.0027

# Create histogram
hist(doubles_hits,xlab = "Number of Doubles",col = "green",border = "red", xlim = c(0,50), ylim = c(0,30),
   breaks = 5)