This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
Addition
2-3
[1] -1
Division
2/3
[1] 0.6666667
Exponentiation
2^3
[1] 8
Square root
sqrt(2)
[1] 1.414214
Logarithms
log(2)
[1] 0.6931472
Question_1: Compute the log base 5 of 10 and the log of 10.
log(10, base = 5)
[1] 1.430677
log(10)
[1] 2.302585
Computing some offensive metrics in Baseball
#Batting Average=(No. of Hits)/(No. of At Bats)
#What is the batting average of a player that bats 29 hits in 112 at bats?
BA=(29)/(112)
BA
[1] 0.2589286
Batting_Average=round(BA,digits = 3)
Batting_Average
[1] 0.259
Question_2:What is the batting average of a player that bats 42 hits in 212 at bats?
hits <- 42
at_bats <- 212
Batting_Average = hits/at_bats
Batting_Average
[1] 0.1981132
#On Base Percentage
#OBP=(H+BB+HBP)/(At Bats+BB+HBP+SF)
#Let us compute the OBP for a player with the following general stats
#AB=515,H=172,BB=84,HBP=5,SF=6
OBP=(172+84+5)/(515+84+5+6)
OBP
[1] 0.4278689
On_Base_Percentage=round(OBP,digits = 3)
On_Base_Percentage
[1] 0.428
Question_3:Compute the OBP for a player with the following general stats: AB=565,H=156,BB=65,HBP=3,SF=7
AB <- 565 # At-bats
H <- 156 # Hits
BB <- 65 # Walks (Bases on Balls)
HBP <- 3 # Hit by Pitch
SF <- 7 # Sacrifice Flies
OBP <- (H + BB + HBP) / (AB + BB + HBP + SF)
OBP
[1] 0.35
3 == 8# Does 3 equals 8?
[1] FALSE
3 != 8# Is 3 different from 8?
[1] TRUE
3 <= 8# Is 3 less than or equal to 8?
[1] TRUE
3>4
[1] FALSE
# Logical Disjunction (or)
FALSE | FALSE # False OR False
[1] FALSE
# Logical Conjunction (and)
TRUE & FALSE #True AND False
[1] FALSE
# Negation
! FALSE # Not False
[1] TRUE
# Combination of statements
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
[1] TRUE
Assigning Values to Variables
Total_Bases <- 6 + 5
Total_Bases*3
[1] 33
ls()
[1] "AB" "at_bats" "BA" "Batting_Average"
[5] "BB" "H" "HBP" "hits"
[9] "OBP" "On_Base_Percentage" "SF" "Total_Bases"
rm(Total_Bases)
Warning in rm(Total_Bases) : object 'Total_Bases' not found
Vectors
pitches_by_innings <- c(12, 15, 10, 20, 10)
pitches_by_innings
[1] 12 15 10 20 10
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
[1] 9 12 6 14 9
Question_4: Define two vectors,runs_per_9innings and hits_per_9innings, each with five elements.
runs_per_9innings <- c(4.5, 3.8, 5.2, 4.1, 3.9)
hits_per_9innings <- c(8.7, 7.9, 9.2, 8.3, 7.8)
runs_per_9innings
[1] 4.5 3.8 5.2 4.1 3.9
hits_per_9innings
[1] 8.7 7.9 9.2 8.3 7.8
# replicate function
rep(2, 5)
[1] 2 2 2 2 2
rep(1,4)
[1] 1 1 1 1
# consecutive numbers
1:5
[1] 1 2 3 4 5
2:10
[1] 2 3 4 5 6 7 8 9 10
seq(1, 10, by=2)
[1] 1 3 5 7 9
seq(2,13,by=3)
[1] 2 5 8 11
# add vectors
pitches_by_innings+strikes_by_innings
[1] 21 27 16 34 19
# compare vectors
pitches_by_innings == strikes_by_innings
[1] FALSE FALSE FALSE FALSE FALSE
# find length of vector
length(pitches_by_innings)
[1] 5
# find minimum value in vector
min(pitches_by_innings)
[1] 10
# find average value in vector
mean(pitches_by_innings)
[1] 13.4
pitches_by_innings
[1] 12 15 10 20 10
# If you want to get the first element:
pitches_by_innings[1]
[1] 12
Question_5: Get the first element of hits_per_9innings.
hits_per_9innings[1]
[1] 8.7
pitches_by_innings[length(pitches_by_innings)]
[1] 10
Question_6: Get the last element of hits_per_9innings.
hits_per_9innings[5]
[1] 7.8
pitches_by_innings[c(2, 3, 4)]
[1] 15 10 20
Data Frames
data.frame(bonus = c(2, 3, 1),#in millions
active_roster = c("yes", "no", "yes"),
salary = c(1.5, 2.5, 1))#in millions
How to Make a Random Sample
sample(1:10, size=5)
[1] 8 1 7 2 9
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)
# Check data frame
bar
n <- 5
samplerows <- sample(1:nrow(bar), size=n)
# print sample rows
samplerows
[1] 8 2 7 10 5
# extract rows
barsample <- bar[samplerows, ]
# print sample
print(barsample)
bar[sample(1:nrow(bar), n), ]
Using Tables
x <- c("Yes","No","No","Yes","Yes")
table(x)
x
No Yes
2 3
Numerical measures of center and spread
sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
# the average
mean(sals)
[1] 8.565
# the variance
var(sals)
[1] 225.5145
# the standard deviation
sd(sals)
[1] 15.01714
# the median
median(sals)
[1] 3.5
# Tukey's five number summary, usefull for boxplots
# five numbers: min, lower hinge, median, upper hinge, max
fivenum(sals)
[1] 0.25 1.00 3.50 8.00 50.00
# summary statistics
summary(sals)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.250 1.250 3.500 8.565 7.250 50.000
How about the mode?
# Function to find the mode, i.e. most frequent value
getMode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
# Most frequent value in pitches_by_innings
getMode(pitches_by_innings)
[1] 10
Question_7: Find the most frequent value of hits_per_9innings.
getMode(hits_per_9innings)
[1] 8.7
Question_8: Summarize the following survey with the
table()
command: What is your favorite day of the week to
watch baseball? A total of 10 fans submitted this survey. Saturday,
Saturday, Sunday, Monday, Saturday,Tuesday, Sunday, Friday, Friday,
Monday
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
table(game_day)
game_day
Friday Monday Saturday Sunday Tuesday
2 2 3 2 1
Question_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results.
getMode(game_day)
[1] "Saturday"