# First Steps with R
# Basic calculations
# Addition
2 - 3
## [1] -1
# Division
2 / 3
## [1] 0.6666667
# Exponentiation
2^3
## [1] 8
# Square root
sqrt(2)
## [1] 1.414214
# Logarithms
log(2)
## [1] 0.6931472
# Question_1: Compute the log base 5 of 10 and the log of 10.
log(10, base = 5)
## [1] 1.430677
log(10)
## [1] 2.302585
# Computing some offensive metrics in Baseball
# Batting Average = (No. of Hits) / (No. of At Bats)
BA <- 29 / 112
BA
## [1] 0.2589286
Batting_Average <- round(BA, digits = 3)
Batting_Average
## [1] 0.259
# Question_2: What is the batting average of a player that bats 42 hits in 212 at bats?
BA2 <- 42 / 212
Batting_Average2 <- round(BA2, digits = 3)
Batting_Average2
## [1] 0.198
# On Base Percentage
# OBP = (H + BB + HBP) / (At Bats + H + BB + HBP + SF)
OBP <- (172 + 84 + 5) / (515 + 172 + 84 + 5 + 6)
OBP
## [1] 0.3337596
On_Base_Percentage <- round(OBP, digits = 3)
On_Base_Percentage
## [1] 0.334
# Question_3: Compute the OBP for a player with the following general stats:
# AB = 565, H = 156, BB = 65, HBP = 3, SF = 7
OBP3 <- (156 + 65 + 3) / (565 + 156 + 65 + 3 + 7)
OBP3
## [1] 0.281407
# Testing logical operators
3 == 8 # Does 3 equal 8?
## [1] FALSE
3 != 8 # Is 3 different from 8?
## [1] TRUE
3 <= 8 # Is 3 less than or equal to 8?
## [1] TRUE
3 > 4 # Is 3 greater than 4?
## [1] FALSE
# Logical operators: AND, OR, NOT
FALSE | FALSE # False OR False
## [1] FALSE
TRUE & FALSE # True AND False
## [1] FALSE
!FALSE # Not False
## [1] TRUE
2 < 3 | 1 == 5 # True OR False
## [1] TRUE
# Assigning Values to Variables
Total_Bases <- 6 + 5
Total_Bases * 3
## [1] 33
# To see the variables that are currently defined
ls()
## [1] "BA" "BA2" "Batting_Average"
## [4] "Batting_Average2" "OBP" "OBP3"
## [7] "On_Base_Percentage" "Total_Bases"
# Deleting a variable
rm(Total_Bases)
# Assigning values using either <- or =
x <- 10
# Vectors
pitches_by_innings <- c(12, 15, 10, 20, 10)
strikes_by_innings <- c(9, 12, 6, 14, 9)
# Question_4: Define two vectors, runs_per_9innings and hits_per_9innings
runs_per_9innings <- c(5, 7, 8, 6, 7)
hits_per_9innings <- c(9, 10, 8, 11, 7)
# replicate function
rep(2, 5)
## [1] 2 2 2 2 2
# Consecutive numbers
1:5
## [1] 1 2 3 4 5
seq(1, 10, by = 2)
## [1] 1 3 5 7 9
# Adding and comparing vectors
pitches_by_innings + strikes_by_innings
## [1] 21 27 16 34 19
pitches_by_innings == strikes_by_innings
## [1] FALSE FALSE FALSE FALSE FALSE
# Find length, minimum, and average
length(pitches_by_innings)
## [1] 5
min(pitches_by_innings)
## [1] 10
mean(pitches_by_innings)
## [1] 13.4
# Accessing parts of a vector
pitches_by_innings[1]
## [1] 12
# Question_5: Get the first element of hits_per_9innings
hits_per_9innings[1]
## [1] 9
# Get the last element of a vector
pitches_by_innings[length(pitches_by_innings)]
## [1] 10
# Question_6: Get the last element of hits_per_9innings
hits_per_9innings[length(hits_per_9innings)]
## [1] 7
# Extracting multiple values
pitches_by_innings[c(2, 3, 4)]
## [1] 15 10 20
# Vectors can also be strings or logical values
player_positions <- c("catcher", "pitcher", "infielders", "outfielders")
# Data Frames
data.frame(bonus = c(2, 3, 1),
active_roster = c("yes", "no", "yes"),
salary = c(1.5, 2.5, 1))
# Random sampling from a vector
sample(1:10, size = 5)
## [1] 7 5 10 8 2
# Taking a random sample from a data frame
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)
n <- 5
samplerows <- sample(1:nrow(bar), size = n)
barsample <- bar[samplerows, ]
print(barsample)
## var1 var2
## 2 B 2
## 8 H 8
## 1 A 1
## 5 E 5
## 7 G 7
# Using table() to summarize categorical data
game_day <- c("Saturday", "Saturday", "Sunday", "Monday", "Saturday",
"Tuesday", "Sunday", "Friday", "Friday", "Monday")
table(game_day)
## game_day
## Friday Monday Saturday Sunday Tuesday
## 2 2 3 2 1
# Question_7: Find the most frequent value of hits_per_9innings
getMode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]
}
getMode(hits_per_9innings)
## [1] 9
# Question_8: Summarize the survey
table(game_day)
## game_day
## Friday Monday Saturday Sunday Tuesday
## 2 2 3 2 1
# Question_9: What is the most frequent answer recorded in the survey?
getMode(game_day)
## [1] "Saturday"