This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
mean(cars$speed)
## [1] 15.4
mean(cars$dist)
## [1] 42.98
max(cars$dist)
## [1] 120
max(cars$speed)
## [1] 25
4+1 #math functions
## [1] 5
5-2
## [1] 3
2^2
## [1] 4
sqrt(25)
## [1] 5
2^5
## [1] 32
log(2.72) #natural log
## [1] 1.000632
log10(5)
## [1] 0.69897
log10(10)
## [1] 1
log10(100)
## [1] 2
#Here we are computing log base ten
log(10,base=5)
## [1] 1.430677
log(10,base=2)
## [1] 3.321928
log(1000,base=10)
## [1] 3
#Question_1: Compute the log base 5 of 10 and the log of 10.
log(10,base=5)
## [1] 1.430677
log(10,base=10)
## [1] 1
Computing Offensive Metrics in Baseball
BA=(29)/(112)
BA
## [1] 0.2589286
Batting_Average=round(BA,digits = 3)
Batting_Average
## [1] 0.259
BA=(42)/(212)
BA
## [1] 0.1981132
Batting_Average=round(BA,digits = 3) #round BA to 3 digits
Batting_Average
## [1] 0.198
OBP=(172+84+5)/(515+172+84+5+6) #including hits in demoninator calculation
OBP
## [1] 0.3337596
On_Base_Percentage=round(OBP, digits = 3)
On_Base_Percentage
## [1] 0.334
#Question_3:Compute the OBP for a player with the following general stats: #AB=565,H=156,BB=65,HBP=3,SF=7
OBP=(156+65+3)/(565+156+65+3+7)
OBP
## [1] 0.281407
On_Base_Percentage=round(OBP, digits = 3)
On_Base_Percentage
## [1] 0.281
3==8 #true or false
## [1] FALSE
3!=8
## [1] TRUE
3<=8
## [1] TRUE
3>4
## [1] FALSE
FALSE|FALSE
## [1] FALSE
TRUE & FALSE
## [1] FALSE
!FALSE
## [1] TRUE
2<3|1==5
## [1] TRUE
#Assigning Values to variables
TotalBases<-6+5
TotalBases*3
## [1] 33
ls() #list of variables
## [1] "BA" "Batting_Average" "OBP"
## [4] "On_Base_Percentage" "TotalBases"
rm(TotalBases)
#Vectors
pitches_by_inning<-c(12,15,10,20,10) #creating vector
pitches_by_inning
## [1] 12 15 10 20 10
strikes_by_inning<-c(9,12,6,14,9)
strikes_by_inning
## [1] 9 12 6 14 9
#Question_4: Define two vectors,runs_per_9innings and hits_per_9innings, each with five elements.
runs_per_9innings<-c(3,7,1,2,4)
hits_per_9innings<-c(8,2,5,2,7)
runs_per_9innings
## [1] 3 7 1 2 4
hits_per_9innings
## [1] 8 2 5 2 7
rep(2,5) #repeating numbers
## [1] 2 2 2 2 2
rep(1,4)
## [1] 1 1 1 1
1:5 #consecutive numbers
## [1] 1 2 3 4 5
2:10
## [1] 2 3 4 5 6 7 8 9 10
seq(1,10,by=2) #sequence from number to number with intervals
## [1] 1 3 5 7 9
seq(2,13,by=3)
## [1] 2 5 8 11
pitches_by_inning+strikes_by_inning #adding vectors
## [1] 21 27 16 34 19
pitches_by_inning==strikes_by_inning #comparing vectors
## [1] FALSE FALSE FALSE FALSE FALSE
length(pitches_by_inning) #length of vector
## [1] 5
min(pitches_by_inning) #min value in vector
## [1] 10
mean(pitches_by_inning) #average value in vector
## [1] 13.4
pitches_by_inning
## [1] 12 15 10 20 10
pitches_by_inning[1] #getting first element
## [1] 12
#Question_5: Get the first element of hits_per_9innings.
hits_per_9innings[1]
## [1] 8
pitches_by_inning[length(pitches_by_inning)] #length helps show last element of vector
## [1] 10
#Question_6: Get the last element of hits_per_9innings.
hits_per_9innings[length(hits_per_9innings)]
## [1] 7
pitches_by_inning[c(2,3,4)] #getting 2nd through 4th value of a vector
## [1] 15 10 20
player_positions<-c( "catchers", "pitcher", "infielders", "outfielders")
#Data Frames, similar to a spreadsheet
data.frame(bonus = c(2,3,1), #in millions
active_roster = c("yes", "no", "yes"),
salary = c(1.5, 2.5, 1)) #in millions
#How to Make a Random Sample
sample(1:10, size=5) #first shows the numbers of vector that can be selected, then size shows numbers to populate
## [1] 3 5 2 7 10
bar<-data.frame(var1 = LETTERS[1:10], var2 = 1:10)
bar #making new data frame for random sample
n<-5 #selecting sample size
samplerows<-sample(1:nrow(bar), size=n) #creates random sample from data frame, use nrow instead of entering number
samplerows
## [1] 4 3 10 5 2
barsample<-bar[samplerows, ] #extracting rows of random sample of bar, also creating new data frame
print(barsample) #prints sample
## var1 var2
## 4 D 4
## 3 C 3
## 10 J 10
## 5 E 5
## 2 B 2
bar[sample(1:nrow(bar),n), ] #single line of code from above functions
#Using Tables
x<-c("Yes", "No", "No", "Yes", "Yes") #fan survey report
table(x) #creates table for x
## x
## No Yes
## 2 3
#Numerical Measures of Center and Sprea
sals<-c(12, .4, 5, 2, 50, 8,3, 1, 4, 0.25) #yearly salaries
mean(sals)
## [1] 8.565
var(sals) #variance
## [1] 225.5145
sd(sals) #standard deviation
## [1] 15.01714
median(sals)
## [1] 3.5
fivenum(sals) #shows min, lower hinge, median, upper hinge, and max
## [1] 0.25 1.00 3.50 8.00 50.00
summary(sals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.250 1.250 3.500 8.565 7.250 50.000
#How About the Mode
getMode<-function(x) {
ux<- unique(x)
ux[which.max(tabulate(match(x, ux)))]
} #Function to help us find the mode
getMode(pitches_by_inning) #finding mode
## [1] 10
#Question_7: Find the most frequent value of hits_per_9innings.
getMode(hits_per_9innings)
## [1] 2
#Question_8: Summarize the following survey with the
table() command: #What is your favorite day of the week to
watch baseball? A total of 10 fans submitted this survey. #Saturday,
Saturday, Sunday, Monday, Saturday,Tuesday, Sunday, Friday, Friday,
Monday
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
table(game_day)
## game_day
## Friday Monday Saturday Sunday Tuesday
## 2 2 3 2 1
#Question_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results.
getMode(game_day)
## [1] "Saturday"