# Addition
2-3
## [1] -1
# Division
2/3
## [1] 0.6666667
# Exponentiation
2^3 
## [1] 8
# Square root
sqrt(2)
## [1] 1.414214
# Logarithms
log(2)
## [1] 0.6931472

#Question_1: Compute the log base 5 of 10 and the log of 10.

#Batting Average=(No. of Hits)/(No. of At Bats)
#What is the batting average of a player that bats 29 hits in 112 at bats?
BA=(29)/(112)
BA
## [1] 0.2589286
Batting_Average=round(BA,digits = 3)
Batting_Average
## [1] 0.259

#Question_2:What is the batting average of a player that bats 42 hits in 212 at bats?

#On Base Percentage
#OBP=(H+BB+HBP)/(At Bats+H+BB+HBP+SF)
#Let us compute the OBP for a player with the following general stats
#AB=515,H=172,BB=84,HBP=5,SF=6
OBP=(172+84+5)/(515+172+84+5+6)
OBP
## [1] 0.3337596
On_Base_Percentage=round(OBP,digits = 3)
On_Base_Percentage
## [1] 0.334

#Question_3:Compute the OBP for a player with the following general stats: #AB=565,H=156,BB=65,HBP=3,SF=7

3 == 8# Does 3 equals 8?
## [1] FALSE
3 != 8# Is 3 different from 8?
## [1] TRUE
3 <= 8# Is 3 less than or equal to 8?
## [1] TRUE
# Logical Disjunction (or)
FALSE | FALSE # False OR False
## [1] FALSE
# Logical Conjunction (and)
TRUE & FALSE #True AND False
## [1] FALSE
# Negation
! FALSE # Not False
## [1] TRUE
# Combination of statements
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
## [1] TRUE

Assigning Values to Variables

Total_Bases <- 6 + 5
Total_Bases*3
## [1] 33
#To see the variables that are currently defined, use ls (as in “list”)
ls()
## [1] "BA"                 "Batting_Average"    "OBP"               
## [4] "On_Base_Percentage" "Total_Bases"
#To delete a variable, use rm (as in “remove”)
rm(Total_Bases)

#Vectors

pitches_by_innings <- c(12, 15, 10, 20, 10) 
pitches_by_innings
## [1] 12 15 10 20 10
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
## [1]  9 12  6 14  9
# replicate function
rep(2, 5)
## [1] 2 2 2 2 2
# consecutive numbers
1:5
## [1] 1 2 3 4 5
# sequence from 1 to 10 with a step of 2
seq(1, 10, by=2)
## [1] 1 3 5 7 9
# add vectors
pitches_by_innings+strikes_by_innings
## [1] 21 27 16 34 19
# compare vectors
pitches_by_innings == strikes_by_innings
## [1] FALSE FALSE FALSE FALSE FALSE
# find length of vector
length(pitches_by_innings)
## [1] 5
# find minimum value in vector
min(pitches_by_innings)
## [1] 10
# find average value in vector
mean(pitches_by_innings)
## [1] 13.4

#You can access parts of a vector by using [. Recall what the value is of the vector pitches_by_innings.

pitches_by_innings
## [1] 12 15 10 20 10
# If you want to get the first element:
pitches_by_innings[1]
## [1] 12

#Question_5: Get the first element of hits_per_9innings.

pitches_by_innings[length(pitches_by_innings)]
## [1] 10

#Question_6: Get the last element of hits_per_9innings.

pitches_by_innings[c(2, 3, 4)]
## [1] 15 10 20
player_positions <- c("catcher", "pitcher", "infielders", "outfielders")
data.frame(bonus = c(2, 3, 1),#in millions 
           active_roster = c("yes", "no", "yes"), 
           salary = c(1.5, 2.5, 1))#in millions 
sample(1:10, size=5)
## [1] 5 2 7 8 9
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)
# Check data frame
bar
n <- 5
samplerows <- sample(1:nrow(bar), size=n) 
# print sample rows
samplerows
## [1] 3 8 7 5 9
# extract rows
barsample <- bar[samplerows, ]
# print sample
print(barsample)
##   var1 var2
## 3    C    3
## 8    H    8
## 7    G    7
## 5    E    5
## 9    I    9
bar[sample(1:nrow(bar), n), ]
x <- c("Yes","No","No","Yes","Yes") 
table(x)
## x
##  No Yes 
##   2   3

#Numerical measures of center and spread

sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
# the average
mean(sals) 
## [1] 8.565
# the variance
var(sals)
## [1] 225.5145
# the standard deviation
sd(sals)
## [1] 15.01714
# the median
median(sals)
## [1] 3.5
# Tukey's five number summary, usefull for boxplots
# five numbers: min, lower hinge, median, upper hinge, max
fivenum(sals)
## [1]  0.25  1.00  3.50  8.00 50.00
# summary statistics
summary(sals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.250   1.250   3.500   8.565   7.250  50.000
# Function to find the mode, i.e. most frequent value
getMode <- function(x) {
     ux <- unique(x)
     ux[which.max(tabulate(match(x, ux)))]
}
# Most frequent value in baz
getMode(pitches_by_innings)
## [1] 10
#Question_7: Find the most frequent value of hits_per_9innings.
#getMode(hits_per_9innings)
#Question_8: Summarize the following survey with the `table()` command:
#What is your favorite day of the week to watch baseball? A total of 10 fans submitted this survey.
#Saturday, Saturday, Sunday, Monday, Saturday,Tuesday, Sunday, Friday, Friday, Monday
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
game_day
##  [1] "Saturday" "Saturday" "Sunday"   "Monday"   "Saturday" "Tuesday" 
##  [7] "Sunday"   "Friday"   "Friday"   "Monday"
table(game_day)
## game_day
##   Friday   Monday Saturday   Sunday  Tuesday 
##        2        2        3        2        1
#Question_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results. 
getMode(game_day)
## [1] "Saturday"