1+1
7-8
[1] -1
(2+5i) + (3-1i)
[1] 5+4i
4/2
[1] 2
2^5
[1] 32
sqrt(25)
[1] 5
sqrt(16)
[1] 4
sqrt(144)
[1] 12
log(2)
log(10)
[1] 2.302585
log(2.72)
[1] 1.000632
log10(10)
[1] 1
Quetstion 1: Compute the log base 5 of 10 and the log of 10.
log(10,5)
[1] 1.430677
log(10,10)
[1] 1
log(100,4)
[1] 3.321928
#Batting Average=(No. of Hits)/(No. of At Bats)
#What is the batting average of a player that bats 29 hits in 112 at bats?
BA=(129)/(412)
BA
[1] 0.3131068
#ALternative Solution
N_Hits =129
At_Bats = 412
BA<-N_Hits/At_Bats
BA
[1] 0.3131068
Batting_Average=round(BA,digits = 3)
Batting_Average
[1] 0.313
#Question_2:What is the batting average of a player that bats 42 hits in 212 at bats?
#Answer
N_Hits = 42
At_Bats1 = 212
Bat_Average<-N_Hits/At_Bats1
BattingAverage=round(Bat_Average,digits =3)
BattingAverage
[1] 0.198
#On Base Percentage
#OBP=(H+BB+HBP)/(At Bats+BB+HBP+SF)
#Let us compute the OBP for a player with the following general stats
#AB=515,H=172,BB=84,HBP=5,SF=6
OBP=(172+84+5)/(515+84+5+6)
OBP
[1] 0.4278689
OBP_Adj = round(OBP,digits =3)
OBP_Adj
[1] 0.428
*#Question_3:Compute the OBP for a player with the following general stats:
#AB=565,H=156,BB=65,HBP=3,SF=7
OBP = (156+65+3)/(565+65+3+156+7)
OBP_adj = round(OBP, digits = 3)
OBP_adj
[1] 0.281
3==8
[1] FALSE
2==3
[1] FALSE
1==1
[1] TRUE
3>=1
[1] TRUE
3>=9
[1] FALSE
7<=10
[1] TRUE
7<=6
[1] FALSE
Often you will want to test whether something is less than, greater than or equal to something.
# Logical Disjunction (or)
FALSE | FALSE # False OR False
[1] FALSE
# Logical Conjunction (and)
TRUE & FALSE #True AND False
[1] FALSE
# Logical Conjunction (and)
TRUE & FALSE #True AND False
[1] FALSE
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
[1] TRUE
Total_Bases <- 7 + 4
Total_Bases*4
[1] 44
ls()
[1] "At_Bats" "At_Bats1" "BA"
[4] "Bat_Average" "Batting_Average" "BattingAverage"
[7] "N_Hits" "OBP" "OBP_adj"
[10] "OBP_Adj" "Total_Bases"
rm(Total_Bases)
ls()
[1] "At_Bats" "At_Bats1" "BA"
[4] "Bat_Average" "Batting_Average" "BattingAverage"
[7] "N_Hits" "OBP" "OBP_adj"
[10] "OBP_Adj"
Vectors
pitches_by_innings <-c(12,15,10,20,10)
pitches_by_innings
[1] 12 15 10 20 10
strikes_by_innings <- c(9,12,6,14,9)
strikes_by_innings
[1] 9 12 6 14 9
Question_4: Define two vectors,runs_per_9innings and hits_per_9innings, each with five elements.
runs_per_9innings <- c(2,1,0,0,1)
runs_per_9innings
[1] 2 1 0 0 1
hits_per_innings <- c(3,3,4,6,1)
hits_per_innings
[1] 3 3 4 6 1
rep(2, 5)
[1] 2 2 2 2 2
rep(1,4)
[1] 1 1 1 1
1:6
[1] 1 2 3 4 5 6
2:7
[1] 2 3 4 5 6 7
seq(1, 10, by=3)
[1] 1 4 7 10
seq(2,13,by=3)
[1] 2 5 8 11
pitches_by_innings+strikes_by_innings
[1] 21 27 16 34 19
#compare two vecors
pitches_by_innings
[1] 12 15 10 20 10
strikes_by_innings
[1] 9 12 6 14 9
pitches_by_innings == strikes_by_innings
[1] FALSE FALSE FALSE FALSE FALSE
length(pitches_by_innings)
[1] 5
min(pitches_by_innings)
[1] 10
mean(pitches_by_innings)
[1] 13.4
pitches_by_innings
[1] 12 15 10 20 10
pitches_by_innings[1]
[1] 12
Question_5: Get the first element of hits_per_9innings.
hits_per_innings
[1] 3 3 4 6 1
hits_per_innings[1]
[1] 3
pitches_by_innings[length(pitches_by_innings)]
[1] 10
#Question_6: Get the last element of hits_per_9innings.
hits_per_innings
[1] 3 3 4 6 1
hits_per_innings[length(hits_per_innings)]
[1] 1
pitches_by_innings
[1] 12 15 10 20 10
pitches_by_innings[c(2, 3, 4)]
[1] 15 10 20
player_positions <- c("catcher", "pitcher", "infielders", "outfielders")
player_positions
[1] "catcher" "pitcher" "infielders"
[4] "outfielders"
Data Frames
data.frame(bonus = c(2, 3, 1),#in millions
active_roster = c("yes", "no", "yes"),
salary = c(1.5, 2.5, 1))#in millions
How to make a Random Sample
sample(1:10, size=5)
[1] 10 9 6 1 3
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)
# Check data frame
bar
n <- 5
samplerows <- sample(1:nrow(bar), size=n)
# print sample rows
samplerows
[1] 8 1 5 2 9
# extract rows
barsample <- bar[samplerows, ]
# print sample
print(barsample)
bar[sample(1:nrow(bar), n), ]
NA
Using Tables
x <- c("Yes","No","No","Yes","Yes")
table(x)
x
No Yes
2 3
Numerical Measures of Center and Spread
sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
# the average
mean(sals)
[1] 8.565
var(sals)
[1] 225.5145
sd(sals)
[1] 15.01714
median(sals)
[1] 3.5
# Tukey's five number summary, usefull for boxplots
# five numbers: min, lower hinge, median, upper hinge, max
fivenum(sals)
[1] 0.25 1.00 3.50 8.00 50.00
summary(sals)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.250 1.250 3.500 8.565 7.250 50.000
How about the Mode?
# Function to find the mode, i.e. most frequent value
getMode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
getMode(pitches_by_innings)
[1] 10
#Question_7: Find the most frequent value of hits_per_9innings.**
getMode(hits_per_innings)
[1] 3
#Question_8: Summarize the following survey with the
table()
command: #What is your favorite day of the week to
watch baseball? A total of 10 fans submitted this survey. #Saturday,
Saturday, Sunday, Monday, Saturday,Tuesday, Sunday, Friday, Friday,
Monday
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
table(game_day)
game_day
Friday Monday Saturday Sunday Tuesday
2 2 3 2 1
#Question_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results.
getMode(game_day)
[1] "Saturday"