#Addition
2+2
[1] 4
#Subtraction
4-2
[1] 2
#Multiplication
9*6
[1] 54
#Division
25/5
[1] 5
25/2
[1] 12.5
#Exponentiation
2^2
[1] 4
5^3
[1] 125
#Square root
sqrt(25)
[1] 5
# Logarithms
log(2)
[1] 0.6931472
Question_1: Compute the log base 5 of 10 and the log of 10
log10(5)
[1] 0.69897
log10(10)
[1] 1
log(10,5) #log of 10, base 5
[1] 1.430677
log(10,10) #log of 10, base 10
[1] 1
log(100,4) #log of 100, base 4
[1] 3.321928
#Batting Average=(No. of Hits)/(No. of At Bats)
#What is the batting average of a player that bats 129 hits in 412 at bats?
BA=129/412
BA
[1] 0.3131068
#Alternative Solution
N_Hits=129
At_Bats=412
BA<-N_Hits/At_Bats
BA
[1] 0.3131068
Batting_Average=round(BA,digits = 3)
Batting_Average
[1] 0.313
Question_2:What is the batting average of a player that bats 42 hits in 212 at bats?
#Answers
N_Hits=42
At_Bats1=212
Batting_Average<-N_Hits/At_Bats1
BattingAverage=round(Batting_Average,digits = 3)
BattingAverage
[1] 0.198
#On Base Percentage
#OBP=(H+BB+HBP)/(At Bats+BB+HBP+SF)
#Let us compute the OBP for a player with the following general stats
#AB=515,H=172,BB=84,HBP=5,SF=6
OBP=(172+84+5)/(515+84+5+6)
OBP
[1] 0.4278689
OBP_Adj=round(OBP,digits = 3)
OBP_Adj
[1] 0.428
Question_3:Compute the OBP for a player with the following general stats:
#AB=565,H=156,BB=65,HBP=3,SF=7
OBP=(156+65+3)/(565+65+3+156+7)
OBP_ad=round(OBP,digits = 3)
OBP_ad
[1] 0.281
Often you will want to test whether something is less than, greater than or equal to something.
3==8
[1] FALSE
2==3
[1] FALSE
1==1
[1] TRUE
3>=1
[1] TRUE
3>=9
[1] FALSE
7<=10
[1] TRUE
7<=6
[1] FALSE
3!=4
[1] TRUE
The logical operators are & for logical AND, | for logical OR, and ! for NOT. These are some examples:
# Logical Disjunction (or)
FALSE | FALSE # False OR False
[1] FALSE
FALSE | TRUE
[1] TRUE
# Logical Conjunction (and)
TRUE & FALSE #True AND False
[1] FALSE
# Negation
! FALSE # Not False
[1] TRUE
! TRUE # Not True
[1] FALSE
# Combination of statements
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
[1] TRUE
2<1|2==3
[1] FALSE
total_bases <- 7 + 4
total_bases*4
[1] 44
ls()
[1] "At_Bats" "At_Bats1" "BA"
[4] "Batting_Average" "BattingAverage" "ceo_salaries"
[7] "hits_per_innings" "N_Hits" "OBP"
[10] "OBP_ad" "OBP_Adj" "pitches_by_innings"
[13] "player_positions" "runs_per_innings" "strikes_by_innings"
[16] "total_bases" "x"
rm(total_bases)
ls()
[1] "At_Bats" "At_Bats1" "BA"
[4] "Batting_Average" "BattingAverage" "ceo_salaries"
[7] "hits_per_innings" "N_Hits" "OBP"
[10] "OBP_ad" "OBP_Adj" "pitches_by_innings"
[13] "player_positions" "runs_per_innings" "strikes_by_innings"
[16] "x"
Vectors
pitches_by_innings<- c(12, 15, 10, 20, 10)
pitches_by_innings
[1] 12 15 10 20 10
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
[1] 9 12 6 14 9
Question_4: Define two vectors,runs_per_innings and hits_per_innings, each with five elements.
hits_per_innings <- c(3, 3, 1, 4, 1)
hits_per_innings
[1] 3 3 1 4 1
runs_per_innings <- c(1, 1, 0, 2, 0)
runs_per_innings
[1] 1 1 0 2 0
# replicate function
rep(2,5)
[1] 2 2 2 2 2
rep(3,3)
[1] 3 3 3
#consecutive numbers
1:6
[1] 1 2 3 4 5 6
2:7
[1] 2 3 4 5 6 7
# sequence from 1 to 10 with a step of 3
seq(1,10,by=3)
[1] 1 4 7 10
# sequence from 2 to 13 with a step of 3
seq(2,13,by=3)
[1] 2 5 8 11
#adding vectors
pitches_by_innings+strikes_by_innings # + operator
[1] 21 27 16 34 19
#compare two vectors
pitches_by_innings
[1] 12 15 10 20 10
strikes_by_innings
[1] 9 12 6 14 9
pitches_by_innings==strikes_by_innings
[1] FALSE FALSE FALSE FALSE FALSE
length(pitches_by_innings)
[1] 5
min(pitches_by_innings)
[1] 10
mean(pitches_by_innings)
[1] 13.4
pitches_by_innings[1]
[1] 12
hits_per_innings[1]
[1] 3
pitches_by_innings[length(pitches_by_innings)]
[1] 10
hits_per_innings
[1] 3 3 1 4 1
hits_per_innings[length(hits_per_innings)]
[1] 1
pitches_by_innings
[1] 12 15 10 20 10
pitches_by_innings[c(1:3)]
[1] 12 15 10
hits_per_innings
[1] 3 3 1 4 1
hits_per_innings[c(1:4)]
[1] 3 3 1 4
player_positions<-c("catcher", "pitcher", "infielders", "outfielders")
player_positions
[1] "catcher" "pitcher" "infielders" "outfielders"
Data Frames
data.frame(bonus=c(2,3,1), active_roster=c("yes","No","Yes"),salary=c(1.5, 2.5, 1))
Using Tables
x<-c("Yes", "No", "No", "Yes", "Yes")
table(x)
x
No Yes
2 3
Numerical measures and center of a spread
ceo_salaries<-c(12, .4, 2, 15, 8, 3, 1, 4, .25)
mean(ceo_salaries)
[1] 5.072222
var(ceo_salaries)
[1] 28.95944
sd(ceo_salaries)
[1] 5.381398
median(ceo_salaries)
[1] 3
fivenum(ceo_salaries)
[1] 0.25 1.00 3.00 8.00 15.00
# 12, .4, 2, 15, 8, 3, 1, 4, .25
# .25, .4, 1, 2, 3, 4, 8, 12, 15
getMode<-function(x) {
ux<-unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
pitches_by_innings
[1] 12 15 10 20 10
getMode(pitches_by_innings)
[1] 10
#Question_7: Find the most frequent value of hits_per_innings & Find the most frequent value of strikes_per_innings..
hits_per_innings
[1] 3 3 1 4 1
getMode(hits_per_innings)
[1] 3
strikes_by_innings
[1] 9 12 6 14 9
getMode(strikes_by_innings)
[1] 9
#Question_8: Summarize the following survey with the
table() command:
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday", "Tuesday", "Sunday", "Friday", "Monday")
table(game_day)
game_day
Friday Monday Saturday Sunday Tuesday
1 2 3 2 1
getMode(game_day)
[1] "Saturday"