Module2_Activity2

# Robert's performance
Robert_HRs <- c(11, 13, 12,44)
# Find mean
mean(Robert_HRs)

## [1] 20

# Find standard deviation
sd(Robert_HRs)

## [1] 16.02082

# Find the maximum number of home-runs during the four seasons period
max(Robert_HRs)

## [1] 44

# Find the minimum number of home-runs during the four seasons period
min(Robert_HRs)

## [1] 11

summary(Robert_HRs)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.00   11.75   12.50   20.00   20.75   44.00

##Case Scenario 1

#This is the sixth season of outfielder Juan Soto in the majors. If during the first five seasons he received 79, 108,41,145, and 135 walks, how many does he need on this season for his overall number of walks per season to be at least 100?

soto_walks<-c(79,108,41,145,135)
wanted_walks<-100
number_seasons<-6
#Needed wals on season 6
walks_6<-number_seasons*wanted_walks-sum(soto_walks)
walks_6

## [1] 92

##Case Scenario 2

#The average salary of 10 baseball players is 72,000 dollars a week and the average salary of 4 soccer players is 84,000. Find the mean salary of all 14 professional players.

n_1<-10
n_2<-4
y_1<-72000
y_2<-84000
#Mean Salary Overall
salary_ave<-(n_1*y_1+n_2*y_2)/(n_1+n_2)
salary_ave

## [1] 75428.57

#The average salary of 7 basketball players is 102,000 dollars a week and the average salary of 9 NFL players is 91,000. Find the mean salary of all 16 professional players.
bp_1<-7
avg_salary<-102000
bp_2<-9
avg_salary_1<-91000

mean_salary<-((bp_1*avg_salary)+(bp_2*avg_salary_1))/(bp_1+bp_2)
mean_salary

## [1] 95812.5

#Using the imported CSV
allcontracts <- read.csv("./r_datasets/allcontracts.csv")
head(allcontracts)

##   years  X X.1 X.2
## 1     6 NA  NA   4
## 2     5 NA  NA  NA
## 3     3 NA  NA  NA
## 4     6 NA  NA  NA
## 5     5 NA  NA  NA
## 6     1 NA  NA  NA

contract_years <- allcontracts$years

contracts_mean  <- mean(contract_years)
contracts_mean

## [1] 3.458918

contracts_median <- median(contract_years)
contracts_median

## [1] 3

# Find number of observations
contracts_n <- length(contract_years)
# Find standard deviation
contracts_sd <- sd(contract_years)

#What percentage of the data lies within one standard deviation of the mean?
contracts_w1sd <- sum((contract_years - contracts_mean)/contracts_sd < 1)/ contracts_n
# Percentage of observation within one standard deviation of the mean
contracts_w1sd

## [1] 0.8416834

## Difference from empirical 
contracts_w1sd - 0.68

## [1] 0.1616834

#What percentage of the data lies within two standard deviations of the mean?
## Within 2 sd
contracts_w2sd <- sum((contract_years - contracts_mean)/ contracts_sd < 2)/contracts_n
contracts_w2sd

## [1] 1

## Difference from empirical 
contracts_w2sd - 0.95

## [1] 0.05

#What percent of the data lies within three standard deviations of the mean?
## Within 3 sd 
contracts_w3sd <- sum((contract_years - contracts_mean)/ contracts_sd < 3)/contracts_n
contracts_w3sd

## [1] 1

## Difference from empirical 
contracts_w3sd - 0.9973

## [1] 0.0027

hist(contract_years,xlab = "Years Left in Contract",col = "green",border = "red", xlim = c(0,6), ylim = c(0,225),breaks = 3)

boxplot(contract_years, 
        main = "Contract Years Distribution", 
        ylab = "Years Left in Contract", 
        col = "lightblue", 
        border = "darkblue")

barplot(table(contract_years), 
        main = "Frequency of Contract Years", 
        xlab = "Years Left in Contract", 
        ylab = "Count", 
        col = "green", 
        border = "red")

plot(density(contract_years), 
     main = "Density Plot of Contract Years", 
     xlab = "Years Left in Contract", 
     col = "blue", 
     lwd = 2)
polygon(density(contract_years), col = rgb(0,0,1,0.3), border = "blue")

## Question 3
doubles_hit<-read.csv("./r_datasets/doubles_hit.csv")
head(doubles_hit)

##   doubles_hit
## 1          37
## 2           4
## 3           6
## 4           7
## 5           9
## 6          25

doubles_hit<-doubles_hit$doubles_hit

doubles_hit_mean<-mean(doubles_hit)
doubles_hit_mean

## [1] 23.55

doubles_hit_median<-median(doubles_hit)
doubles_hit_median

## [1] 23.5

doubles_hit_n<-length(doubles_hit)
doubles_hit_sd<-sd(doubles_hit)

#What percentage of the data lies within one standard deviation of the mean?
doubles_w1sd<-sum((doubles_hit - doubles_hit_mean)/doubles_hit_sd < 1)/doubles_hit_n
doubles_w1sd

## [1] 0.79

## Difference from empirical 
doubles_w1sd - 0.68

## [1] 0.11

#What percentage of the data lies within two standard deviations of the mean?
## Within 2 sd
doubles_w2sd <- sum((doubles_hit - doubles_hit_mean)/doubles_hit_sd <2)/doubles_hit_n
doubles_w2sd

## [1] 1

## Difference from empirical 
doubles_w2sd - 0.95

## [1] 0.05

#What percent of the data lies within three standard deviations of the mean?
## Within 3 sd 
doubles_w3sd <- sum((doubles_hit - doubles_hit_mean)/doubles_hit_sd <3)/doubles_hit_n
doubles_w3sd

## [1] 1

## Difference from empirical 
doubles_w3sd - 0.9973

## [1] 0.0027

hist(doubles_hit,xlab = "Doubles Hit",col = "orange",border = "red", xlim = c(0,50), ylim = c(0,30),breaks = 10)

barplot(table(doubles_hit), 
        main = "Frequency of Doubles Hit", 
        xlab = "Doubles Hit", 
        ylab = "Count", 
        col = "green", 
        border = "red")

Module2_Activity2_Activity3

Kevin Alvarez

2025-03-08