First Steps with R activity Basic calculations You can use R for basic computations you would perform in a calculator

result <- 2 - 3
print(result)
## [1] -1
result <- 2/3
print(result)
## [1] 0.6666667
result <- 2^3
print(result)
## [1] 8
result <- 2^3
print(result)
## [1] 8
result <- 2^3
print(result)
## [1] 8
result <- sqrt(2)
print(result)
## [1] 1.414214
result <- log(2)
print(result)
## [1] 0.6931472
log_base_5_of_10 <- log(10) / log(5)                      # Calculate the log base 5 of 10
natural_log_of_10 <- log(10)                              # Calculate the natural logarithm of 10

# Print the results
print(paste("Log base 5 of 10:", log_base_5_of_10))
## [1] "Log base 5 of 10: 1.43067655807339"
print(paste("Natural log of 10:", natural_log_of_10))
## [1] "Natural log of 10: 2.30258509299405"
#Q-1. Calculate Betting Average (BA)  for Player


hits <- 29                      # Number of hits
at_bats <- 112                  # Number of at bats
BA <- hits / at_bats            # Calculate batting average 
print(BA)                       # Print the batting average
## [1] 0.2589286
BA=round(BA,digits = 3)         # Round up with 3 decimals
print(BA)                       # Print the batting average with 3 decimals
## [1] 0.259
# Q-2: Player Batting Average (BA) Calculation
hits_q2 <- 42
at_bats_q2 <- 212
BA_q2 <- hits_q2 / at_bats_q2

# OBP Calculation
AB <- 515
H <- 172
BB <- 84
HBP <- 5
SF <- 6
OBP <- (H + BB + HBP) / (AB + H + BB + HBP + SF)

# Print the results
print(paste("BA for Q-2:", BA_q2))
## [1] "BA for Q-2: 0.19811320754717"
print(paste("On Base Percentage:", OBP))
## [1] "On Base Percentage: 0.333759590792839"
OBP=round(OBP,digits = 3)
print(paste("On Base Percentage:", OBP))
## [1] "On Base Percentage: 0.334"
# Q-3: Player Batting Average (BA) Calculation
hits_q3 <- 42
at_bats_q3 <- 212
BA_q3 <- hits_q3 / at_bats_q3

# OBP Calculation
AB_q3 <- 565
H_q3 <- 156
BB_q3 <- 65
HBP_q3 <- 3
SF_q3 <- 7
OBP_q3 <- (H_q3 + BB_q3 + HBP_q3) / (AB_q3 + H_q3 + BB_q3 + HBP_q3 + SF_q3)

# Print the results
print(paste("BA for Q-3:", BA_q3))
## [1] "BA for Q-3: 0.19811320754717"
print(paste("On Base Percentage:", OBP_q3))
## [1] "On Base Percentage: 0.281407035175879"
OBP_q3=round(OBP,digits = 3)
print(paste("On Base Percentage:", OBP_q3))
## [1] "On Base Percentage: 0.334"
result <- 3 == 8            # Test if 3 equals 8
print(result)               # Print the result
## [1] FALSE
result <- 3 != 8            # Test if 3 is different from 8
print(result)               # Print the result
## [1] TRUE
result <- 3 <= 8            # Test if 3 is less than or equal to 8
print(result)               # Print the result
## [1] TRUE
result <- 3 > 4             # Test if 3 is greater than 4
print(result)               # Print the result
## [1] FALSE
result <- FALSE | FALSE     # Logical disjunction (or) with FALSE | FALSE
print(result)               # Print the result
## [1] FALSE
result <- TRUE & FALSE      # Logical conjunction (and) with TRUE & FALSE 
print(result)               # Print the result
## [1] FALSE
result <- !FALSE                # Negation of FALSE
print(result)                   # Print the result
## [1] TRUE
result <- (2 < 3) | (1 == 5)    # Combination of statements: (2 < 3) OR (1 == 5)
print(result)                   # Print the result
## [1] TRUE
Total_Bases <- 6 + 5          # Assigning the sum of 6 and 5 to Total_Bases
result <- Total_Bases * 3     # Multiplying Total_Bases by 3
print(result)                 # Print the result
## [1] 33
ls()                          # To see the variables that are currently defined
##  [1] "AB"                "AB_q3"             "at_bats"          
##  [4] "at_bats_q2"        "at_bats_q3"        "BA"               
##  [7] "BA_q2"             "BA_q3"             "BB"               
## [10] "BB_q3"             "H"                 "H_q3"             
## [13] "HBP"               "HBP_q3"            "hits"             
## [16] "hits_q2"           "hits_q3"           "log_base_5_of_10" 
## [19] "natural_log_of_10" "OBP"               "OBP_q3"           
## [22] "result"            "SF"                "SF_q3"            
## [25] "Total_Bases"
rm(Total_Bases)     #To delete a variable
# Create a vector using the c() function (as in “concatenate”)
pitches_by_innings <- c(12, 15, 10, 20, 10) 
pitches_by_innings
## [1] 12 15 10 20 10
# Create a vector using the c() function (as in “concatenate”)
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
## [1]  9 12  6 14  9
#Q_4: Define 2 vectors,runs_per_9innings and hits_per_9innings, each with 5 elements

# Define the runs_per_9innings vector with five elements
runs_per_9innings <- c(3, 5, 4, 6, 2)

# Define the hits_per_9innings vector with five elements
hits_per_9innings <- c(8, 7, 9, 5, 6)

# Print the vectors
print(runs_per_9innings)
## [1] 3 5 4 6 2
print(hits_per_9innings)
## [1] 8 7 9 5 6
rep(2, 5)       # replicate function
## [1] 2 2 2 2 2
rep(1,4)
## [1] 1 1 1 1
1:5             # consecutive numbers
## [1] 1 2 3 4 5
2:10
## [1]  2  3  4  5  6  7  8  9 10
seq(1, 10, by=2)  # sequence from 1 to 10 with a step of 2
## [1] 1 3 5 7 9
seq(2,13,by=3)    # sequence from 2 to 13 with a step of 3
## [1]  2  5  8 11
# Define vectors
pitches_by_innings <- c(10, 12, 15, 20, 10)
strikes_by_innings <- c(11, 15, 1, 14, 9)

# Add vectors
sum_vector <- pitches_by_innings + strikes_by_innings
print(sum_vector)
## [1] 21 27 16 34 19
# Compare vectors
comparison_vector <- pitches_by_innings == strikes_by_innings
print(comparison_vector)
## [1] FALSE FALSE FALSE FALSE FALSE
# Find length of vector
length_pitches <- length(pitches_by_innings)
print(length_pitches)
## [1] 5
# Find minimum value in vector
min_pitch <- min(pitches_by_innings)
print(min_pitch)
## [1] 10
# Find average value in vector
mean_pitch <- mean(pitches_by_innings)
print(mean_pitch)
## [1] 13.4
# Define the pitches_by_innings vector
pitches_by_innings <- c(12, 15, 10, 20, 10)

# Access and print the first element
first_element <- pitches_by_innings[1]
print(first_element)
## [1] 12
pitches_by_innings[length(pitches_by_innings)]    # Q_5. Get the 1st element of 'hits_per_9innings'
## [1] 10
last_element <- hits_per_9innings[length(hits_per_9innings)]      # Get the last element
print(last_element)                                               # Print the last element
## [1] 6
pitches_by_innings[c(2, 3, 4)]   # extract multiple values from a vector. Ex: get 2nd through 4th values
## [1] 15 10 20
#Vectors can be strings or logical values
player_positions <- c("catcher", "pitcher", "infielders", "outfielders")         
# To manually create a data frame, use the data.frame() function
data.frame(bonus = c(2, 3, 1),#in millions 
           active_roster = c("yes", "no", "yes"), 
           salary = c(1.5, 2.5, 1))#in millions 
# To randomly select a sample use the function sample()
# The following code selects 5 numbers between 1 and 10 at random (without duplication)
sample(1:10, size=5)
## [1] 8 1 4 3 9
# Load the necessary library for random number generation
set.seed(123) # Setting a seed for reproducibility

# Create a data frame
my_data_frame <- data.frame(
  Column1 = LETTERS, 
  Column2 = sample(1:100, 26, replace=TRUE) # Generate 26 random numbers between 1 and 100
)

# Print the data frame
print(my_data_frame)
##    Column1 Column2
## 1        A      31
## 2        B      79
## 3        C      51
## 4        D      14
## 5        E      67
## 6        F      42
## 7        G      50
## 8        H      43
## 9        I      14
## 10       J      25
## 11       K      90
## 12       L      91
## 13       M      69
## 14       N      91
## 15       O      57
## 16       P      92
## 17       Q       9
## 18       R      93
## 19       S      99
## 20       T      72
## 21       U      26
## 22       V       7
## 23       W      42
## 24       X       9
## 25       Y      83
## 26       Z      36
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)

n <- 5

samplerows <- sample(1:nrow(bar), size=n)
n <- 5
# print sample rows
samplerows
## [1] 1 7 5 4 2
# extract rows
barsample <- bar[samplerows, ]

# print sample
print(barsample)
##   var1 var2
## 1    A    1
## 7    G    7
## 5    E    5
## 4    D    4
## 2    B    2
bar[sample(1:nrow(bar), n), ]
# Using Table Function to summurize 
x <- c("Yes","No","No","Yes","Yes") 
table(x)
## x
##  No Yes 
##   2   3
# Calculate Statistics
sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
mean(sals)                   # the average
## [1] 8.565
var(sals)                   # the variance
## [1] 225.5145
sd(sals)                    # the standard deviation
## [1] 15.01714
median(sals)                # the median
## [1] 3.5
# five numbers: min, lower hinge, median, upper hinge, max
fivenum(sals)
## [1]  0.25  1.00  3.50  8.00 50.00
# summary statistics
summary(sals)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.250   1.250   3.500   8.565   7.250  50.000
# Function to find the mode, i.e. most frequent value

getMode <- function(x) {
     ux <- unique(x)
     ux[which.max(tabulate(match(x, ux)))]}
# Most frequent value in pitches_by_innings
getMode(pitches_by_innings)
## [1] 10
#Q_7: Find the most frequent value of hits_per_9innings
getMode(hits_per_9innings)
## [1] 8
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
table(game_day) #Summarize survey with the `table()` command
## game_day
##   Friday   Monday Saturday   Sunday  Tuesday 
##        2        2        3        2        1
#Q_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results. 
getMode(game_day)
## [1] "Saturday"