4+3
[1] 7
4-3
[1] 1
4==3
[1] FALSE
3^2
[1] 9
sqrt(4)
[1] 2
log(10) #ln, natural log, base e=2.72
[1] 2.302585
log10(10)
[1] 1
log10(100)
[1] 2
log10(1000)
[1] 3
log(10, base =5)
[1] 1.430677
log10(10)
[1] 1
#Batting Average=(No. of Hits)/(No. of At Bats)
#What is the batting average of a player that bats 29 hits in 112 at bats?
BA=(29)/(112)
BA
[1] 0.2589286
Batting_Average=round(BA,digits = 3)
Batting_Average
[1] 0.259
#Question_2:What is the batting average of a player that bats 42 hits in 212 at bats?
BA_1 = (42)/212
Batting_Average1=round(BA_1, digits = 3)
Batting_Average1
[1] 0.198
#On Base Percentage #OBP=(H+BB+HBP)/(At Bats+BB+HBP+SF) #Let us compute the OBP for a player with the following general stats #AB=515,H=172,BB=84,HBP=5,SF=6
OBP=(172+84+5)/(515+84+5+6)
OBP
[1] 0.4278689
On_Base_Percentage=round(OBP,digits = 3)
On_Base_Percentage
[1] 0.428
#Question_3:Compute the OBP for a player with the following general stats: #AB=565,H=156,BB=65,HBP=3,SF=7
OBP=(156+65+3)/(565+65+3+7)
On_Base_Percentage2=round(OBP, digits = 3)
On_Base_Percentage2
[1] 0.35
# Logical Disjunction (or)
FALSE | FALSE # False OR False
[1] FALSE
# Logical Conjunction (and)
TRUE & FALSE #True AND False
[1] FALSE
# Negation
! FALSE # Not False
[1] TRUE
# Combination of statements
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
[1] TRUE
Total_Bases <- 6 + 5
Total_Bases*3
[1] 33
#to see the variables defined as a list use ls
ls()
[1] "BA" "BA_1" "Batting_Average" "Batting_Average1"
[5] "hits_per_innings" "OBP" "On_Base_Percentage" "On_Base_Percentage2"
[9] "pitches_by_innings" "runs_per_innings" "strikes_by_innings" "Total_Bases"
[13] "Wins_Season"
#to delete a variable use rm
rm(Total_Bases)
#the following is a vector, use c() means concatenate
pitches_by_innings <- c(12, 15, 10, 20, 10)
pitches_by_innings
[1] 12 15 10 20 10
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
[1] 9 12 6 14 9
TRUE | FALSE
[1] TRUE
FALSE | FALSE
[1] FALSE
! FALSE
[1] TRUE
2<3 | 1== 5 #2<3 is True, 1==5 is False, True OR False is True
[1] TRUE
#Question_4: Define two vectors,runs_per_9innings and hits_per_9innings, each with five elements.
runs_per_9innings<-c(1,2,3,4,5,6,7,8,9)
hits_per_9innings<-c(1,2,3,4,5,6,7,8,9)
# replicate function
rep(2, 5)
[1] 2 2 2 2 2
rep(1,4)
[1] 1 1 1 1
# consecutive numbers
1:5
[1] 1 2 3 4 5
2:10
[1] 2 3 4 5 6 7 8 9 10
# sequence from 1 to 10 with a step of 2
seq(1, 10, by=2)
[1] 1 3 5 7 9
seq(2,13,by=3)
[1] 2 5 8 11
# add vectors
pitches_by_innings+strikes_by_innings
[1] 21 27 16 34 19
# compare vectors
pitches_by_innings == strikes_by_innings
[1] FALSE FALSE FALSE FALSE FALSE
# find length of vector
length(pitches_by_innings)
[1] 5
# find minimum value in vector
min(pitches_by_innings)
[1] 10
# find average value in vector
mean(pitches_by_innings)
[1] 13.4
Wins_Season <- c(94,88,96,87,79)
Wins_Season
[1] 94 88 96 87 79
You can access parts of a vector by using [. Recall what the value is of the vector pitches_by_innings.
pitches_by_innings
[1] 12 15 10 20 10
# If you want to get the first element:
pitches_by_innings[1]
[1] 12
#Question_5: Get the first element of hits_per_9innings.
hits_per_9innings[1]
[1] 1
#Question_6: Get the last element of hits_per_9innings.
hits_per_9innings[length(hits_per_9innings)]
[1] 9
# Function to find the mode, i.e. most frequent value
getMode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
#Question_7: Find the most frequent value of hits_per_9innings.
getMode(hits_per_9innings)
[1] 1
sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
# the average
mean(sals)
[1] 8.565
# the variance
var(sals)
[1] 225.5145
# the standard deviation
sd(sals)
[1] 15.01714
# the median
median(sals)
[1] 3.5
# Tukey's five number summary, usefull for boxplots
# five numbers: min, lower hinge, median, upper hinge, max
fivenum(sals)
[1] 0.25 1.00 3.50 8.00 50.00
# summary statistics
summary(sals)
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.250 1.250 3.500 8.565 7.250 50.000
#Question_8: Summarize the following survey with the
table()
command: #What is your favorite day of the week to
watch baseball? A total of 10 fans submitted this survey. #Saturday,
Saturday, Sunday, Monday, Saturday,Tuesday, Sunday, Friday, Friday,
Monday
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
table(game_day)
game_day
Friday Monday Saturday Sunday Tuesday
2 2 3 2 1
#Question_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results.
getMode(game_day)
[1] "Saturday"