plot(cars)

cat("Max value of speedis :", max(cars$speed))
## Max value of speedis : 25
cat("Distance Average is:", mean(cars$dist))
## Distance Average is: 42.98

Question 1: Compute the log base 5 and the log base 10

log(10, base=5)
## [1] 1.430677
log(10, base=10)
## [1] 1

Computing Offensive metrics in baseball

#Batting Average=(No. of Hits)/(No. of At Bats)
#What is the batting average of a player that bats 29 hits in 112 at bats?
BA=(29)/(112)
BA
## [1] 0.2589286

Question_2: What is the batting average of a player that bats 42 hits in 212 at bats?

Batting_Average=round(BA,digits = 3)
Batting_Average
## [1] 0.259
#On Base Percentage
#OBP=(H+BB+HBP)/(At Bats+H+BB+HBP+SF)
#Let us compute the OBP for a player with the following general stats
#AB=515,H=172,BB=84,HBP=5,SF=6
OBP=(172+84+5)/(515+172+84+5+6)
OBP
## [1] 0.3337596
On_Base_Percentage=round(OBP,digits = 3)
On_Base_Percentage
## [1] 0.334

Does 3 = 8?

3 == 8
## [1] FALSE

Is 3 differnt from 8?

3 != 8# Is 3 different from 8?
## [1] TRUE
3>4
## [1] FALSE

Logical Disjunction (or)

FALSE | FALSE # False OR False
## [1] FALSE

Logical Conjuction

TRUE & FALSE
## [1] FALSE

Negation

! FALSE
## [1] TRUE

Combination of statements

2 < 3 | 1 == 5
## [1] TRUE

Assigning Values to Variables

Total_Bases <- 6 + 5
Total_Bases * 3
## [1] 33

Using ls()

ls()
## [1] "BA"                 "Batting_Average"    "OBP"               
## [4] "On_Base_Percentage" "Total_Bases"

Using rm() to remove a variable

rm(Total_Bases)

Vectors

pitches_by_innings <- c(12, 15, 10, 20, 10)
pitches_by_innings
## [1] 12 15 10 20 10
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
## [1]  9 12  6 14  9

Question_4: Define two vectors,runs_per_9innings and hits_per_9innings, each with five elements.

runs_per_innings <- c(4,7,8,2,4)
runs_per_innings
## [1] 4 7 8 2 4
hits_per_9innings <- c(5, 8,1,9,3)
hits_per_9innings
## [1] 5 8 1 9 3

Replicate Function

rep(2, 5)
## [1] 2 2 2 2 2
rep(1,4)
## [1] 1 1 1 1

Consecutive numbers

1:5
## [1] 1 2 3 4 5

sequence from 1 to 10 with a step of 2

seq(1, 10, by=2)
## [1] 1 3 5 7 9

sequence from 2 to 13 with a step of 3

seq(2,13, by=3)
## [1]  2  5  8 11

Adding vectors

pitches_by_innings + strikes_by_innings
## [1] 21 27 16 34 19

compare vectors

pitches_by_innings == strikes_by_innings
## [1] FALSE FALSE FALSE FALSE FALSE

find length of vector

length(pitches_by_innings)
## [1] 5

Mininal value in vector

min(pitches_by_innings)
## [1] 10

find average value in vector

mean(pitches_by_innings)
## [1] 13.4

Getting first value

pitches_by_innings[1]
## [1] 12

Question_5: Get the first element

hits_per_9innings[1]
## [1] 5
pitches_by_innings[length(pitches_by_innings)]
## [1] 10

Question_6: Get the last element of hits_per_9innings.

hits_per_9innings[length(hits_per_9innings)]
## [1] 3

extract multiple values from a vector.

pitches_by_innings[c(2, 3, 4)]
## [1] 15 10 20

Vectors can also be strings or logical values

player_positions <- c("catcher", "pitcher", "infielders", "outfielders")

Data Frames

data.frame(bonus = c(2, 3, 1),#in millions 
           active_roster = c("yes", "no", "yes"), 
           salary = c(1.5, 2.5, 1))#in millions 
##   bonus active_roster salary
## 1     2           yes    1.5
## 2     3            no    2.5
## 3     1           yes    1.0

How to Make a Random Sample

sample(1:10, size=5)
## [1]  5  4 10  7  3
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)
bar
##    var1 var2
## 1     A    1
## 2     B    2
## 3     C    3
## 4     D    4
## 5     E    5
## 6     F    6
## 7     G    7
## 8     H    8
## 9     I    9
## 10    J   10

Example

n <- 5
samplerows <- sample(1:nrow(bar), size=n) 
samplerows
## [1] 2 1 4 8 9

extract rows from samplerows

barsample <- bar[samplerows, ]
print(barsample)
##   var1 var2
## 2    B    2
## 1    A    1
## 4    D    4
## 8    H    8
## 9    I    9

In a single line of code:

bar[sample(1:nrow(bar), n), ]
##    var1 var2
## 4     D    4
## 10    J   10
## 3     C    3
## 8     H    8
## 2     B    2

Using Tables

x <- c("Yes","No","No","Yes","Yes") 
table(x)
## x
##  No Yes 
##   2   3

Numerical measures of center and spread

mean

sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
mean(sals) 
## [1] 8.565

variance

# the variance
var(sals)
## [1] 225.5145

standard deviation

sd(sals)
## [1] 15.01714

median

median(sals)
## [1] 3.5

fivenum

# Tukey's five number summary, usefull for boxplots
# five numbers: min, lower hinge, median, upper hinge, max
fivenum(sals)
## [1]  0.25  1.00  3.50  8.00 50.00

summary statistics

# summary statistics
summary(sals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.250   1.250   3.500   8.565   7.250  50.000

Function to find the mode, i.e. most frequent value

getMode <- function(x) {
     ux <- unique(x)
     ux[which.max(tabulate(match(x, ux)))]
 }

find the most frequent value of the number of pitches_by_innings

getMode(pitches_by_innings)
## [1] 10

Question_7: Find the most frequent value of hits per 9 innings.

getMode(hits_per_9innings)
## [1] 5

Question_8: Summarize the following survey with the table command:

game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
table(game_day)
## game_day
##   Friday   Monday Saturday   Sunday  Tuesday 
##        2        2        3        2        1

Question_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results.

getMode(game_day)
## [1] "Saturday"