# Addition
2+3
## [1] 5
# Subtraction
5-3
## [1] 2
# Multiplication  
2*3
## [1] 6
# Division
2/3
## [1] 0.6666667
# Exponentiation
2^3 
## [1] 8
# Square root
sqrt(2)
## [1] 1.414214
# Logarithms
log(2)
## [1] 0.6931472
log10(2)# log base 10
## [1] 0.30103

** Question_1: Compute the log base 5 of 10 and the log of 10.**

#Question_1: Compute the log base 5 of 10 and the log of 10.
#log base 5 of 10
log(10, base = 5)
## [1] 1.430677
#log of 10
log(10)
## [1] 2.302585

** Computing some offensive metrics in baseball**

#Batting Avg=Hits/At Bats
#comppute the batting average of a player with 29 hits in 112 at bats
BA=29/112
BA
## [1] 0.2589286
ba_avg =round(BA,3)
ba_avg
## [1] 0.259

** #Question_2:What is the batting average of a player that bats 42 hits in 212 at bats?**

#Number of hits
Number_of_hits <- 42
#Number of at bats
Number_of_at_bats <- 212
Batting_average <- Number_of_hits/Number_of_at_bats
Batting_average=round(Batting_average,3)
Batting_average
## [1] 0.198
#On Base Percentage
#OBP=(H+BB+HBP)/(At Bats+BB+HBP+SF)
#Let us compute the OBP for a player with the following general stats
#AB=515,H=172,BB=84,HBP=5,SF=6
OBP=(172+84+5)/(515+84+5+6)
paste(OBP," is the OBP of the player")
## [1] "0.427868852459016  is the OBP of the player"
OBP_Adj=round(OBP,3)
paste(OBP_Adj," is the OBP_Adj of the player")
## [1] "0.428  is the OBP_Adj of the player"

#Question_3:Compute the OBP for a player with the following general stats: #AB=565,H=156,BB=65,HBP=3,SF=7

#AB=565,H=156,BB=65,HBP=3,SF=7

OBP=(156+65+3)/(565+65+3+156+7)
OBP_Adj=round(OBP,3)
paste(OBP_Adj," is the OBP of the player")
## [1] "0.281  is the OBP of the player"

Often you will want to test whether something is less than, greater than or equal to something.

3 == 8 # Is 3 equal to 8?
## [1] FALSE
3 != 8# Is 3 different from 8?
## [1] TRUE
3 <=8 # Is 3 less than or equal to 8?
## [1] TRUE
3>4
## [1] FALSE

Logical operators can be used to combine multiple logical statements(and, or, not)(&&, ||, !)

#Logical Disjunction (or)
3==3 || 3==8
## [1] TRUE
FALSE || FALSE
## [1] FALSE
# Logical Conjunction (and)
TRUE & FALSE #True AND False
## [1] FALSE
# Negation
! FALSE # Not False
## [1] TRUE
# Combination of statements
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
## [1] TRUE

** Assigning Values to Variables**

# Combination of statements
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
## [1] TRUE

** Assigning Values to Variables** In R, you can assign values to variables using the assignment operator <-

Total_Bases <- 6+5
Total_Bases
## [1] 11

** To see the variables that are currently defined use ls (as in “list**)

ls()
## [1] "BA"                "ba_avg"            "Batting_average"  
## [4] "Number_of_at_bats" "Number_of_hits"    "OBP"              
## [7] "OBP_Adj"           "Total_Bases"

** To delete a variable, use the rm function**

rm(Total_Bases)

Vectors

pitches_by_innings <- c(12,15,10,20,10)
pitches_by_innings
## [1] 12 15 10 20 10
strikes_by_innings <- c(9,12,6,14,9)
strikes_by_innings
## [1]  9 12  6 14  9

#Question_4: Define two vectors, runs_per_9innings and hits_per_9innings, each with 5 elements.

runs_per_9innings <- c(2,3,1,4,2)
hits_per_9innings <- c(5,6,3,7,5)

Replicate

#replicate the number 2 five times
rep(2,5)
## [1] 2 2 2 2 2
#replicate the number 3 three times
rep(3,3)
## [1] 3 3 3

return a range of numbers

#return a range of numbers
2:7
## [1] 2 3 4 5 6 7

Sequences

#return a sequence of numbers from 1 to 10 by 3
seq(1,10,by=3)
## [1]  1  4  7 10
#return a sequence of numbers from 2 to 13 by 3
seq(2,13,by=3)
## [1]  2  5  8 11
# Adding Vectors
pitches_by_innings + strikes_by_innings
## [1] 21 27 16 34 19
#Comparing Vectors
pitches_by_innings== strikes_by_innings
## [1] FALSE FALSE FALSE FALSE FALSE
# get the length of a vector
length(pitches_by_innings)
## [1] 5
#Find the minimum value in a vector
min(pitches_by_innings)
## [1] 10
#Find the Mean value in a vector 
mean(pitches_by_innings)
## [1] 13.4

#Question_5: Get the first element of hits_per_9innings.

# get the first element of a vector
pitches_by_innings[1]
## [1] 12
# get the firs element of hits_per_9innings vector
hits_per_9innings[1]
## [1] 5
# get the last element of a vector
pitches_by_innings[length(pitches_by_innings)]
## [1] 10

#Question_6: Get the last element of hits_per_9innings.

# get the last element of a vector of hits_per_9innings
hits_per_9innings[length(hits_per_9innings)]
## [1] 5
#get the second,third and fourth elements of a vector
pitches_by_innings
## [1] 12 15 10 20 10
pitches_by_innings[2:4] 
## [1] 15 10 20
# or
pitches_by_innings[c(2,3,4)]
## [1] 15 10 20
player_positions <-c("Catcher","Pitcher","First Base","Second Base","Third Base","Shortstop","Left Field","Center Field","Right Field","Outfielder")
player_positions
##  [1] "Catcher"      "Pitcher"      "First Base"   "Second Base"  "Third Base"  
##  [6] "Shortstop"    "Left Field"   "Center Field" "Right Field"  "Outfielder"

Data Frames

# Create a data frame with three different elements
data_frame <- data.frame(bonus=c(2,3,1),active_roster=c("Yes","No","Yes"),salary=c(1.5,2.5,1))
data_frame
##   bonus active_roster salary
## 1     2           Yes    1.5
## 2     3            No    2.5
## 3     1           Yes    1.0

Using Tables

#Using tables
#create a list x
X <-c("Yes","No","No","Yes","Yes")
# tablulate the list x each category(count by categories)
table(X)
## X
##  No Yes 
##   2   3

Measures of Central Tendency(Numerical measures and center of spread)

#create a vector of salaries
ceo_salaries <- c(12,.4,2,15,8,3,1,4,.25)

mean(ceo_salaries)
## [1] 5.072222
#Get the spread/variance of the salaries
var(ceo_salaries)
## [1] 28.95944
#Get the standard deviation of the salaries
sd(ceo_salaries)
## [1] 5.381398
#Get the median of the salaries
median(ceo_salaries)
## [1] 3
# Get the range of the salaries
# 12,.4,2,15,8,3,1,4,.25(raw data points)
# .25,.4,1,2,3,4,8,12,15(ordered)
fivenum(ceo_salaries)
## [1]  0.25  1.00  3.00  8.00 15.00
# Create a function to get the mode

getmode <- function(v) {
  # get the unique values
   uniqv <- unique(v)
  # return the unique value with the highest frequency
   uniqv[which.max(tabulate(match(v, uniqv)))]
   
}
#Get the mode of the salaries
getmode(ceo_salaries)
## [1] 12
IQR(ceo_salaries)
## [1] 7

#Question_7:

getmode(hits_per_9innings)
## [1] 5

#Question_8:

game_day <-c("Saturday","Saturday","Sunday","Monday","Saturday","Tuesday","Sunday","Friday","Friday","Monday")
#tabulate the game_day vector
table(game_day)
## game_day
##   Friday   Monday Saturday   Sunday  Tuesday 
##        2        2        3        2        1

#Question_9:

#get the mode of the game_day vector
getmode(game_day)
## [1] "Saturday"