4/2
[1] 2
#let us complete some basic operations using R
1+2
[1] 3
1-4
[1] -3
2+5
[1] 7
sqrt(9)
[1] 3
log(10)
[1] 2.302585
log(2,72)#Natural Log (Ln)
[1] 0.1620765
#Question_1: Compute the log base 5 of 10 and the log of 10.
# Log base 5 of 10
log_base5 <- log(10, base = 5)
cat("Log base 5 of 10:", log_base5, "\n")
Log base 5 of 10: 1.430677 
# Natural log (log base e) of 10
log_e <- log(10)
cat("Natural log (log base e) of 10:", log_e, "\n")
Natural log (log base e) of 10: 2.302585 
#addition
2-3
[1] -1
#Batting Average=(No. of Hits)/(No. of At Bats)
#What is the batting average of a player that bats 29 hits in 112 at bats?
BA=(29)/(112)
BA
[1] 0.2589286
Batting_Average=round(BA,digits = 3)
Batting_Average
[1] 0.259
# Calculate the batting average for the first player (42 hits in 212 at bats)
BA1 <- 42 / 212
BA1
[1] 0.1981132
#On Base Percentage
#OBP=(H+BB+HBP)/(At Bats+H+BB+HBP+SF)
#Let us compute the OBP for a player with the following general stats
#AB=515,H=172,BB=84,HBP=5,SF=6
OBP=(172+84+5)/(515+172+84+5+6)
OBP
[1] 0.3337596
On_Base_Percentage=round(OBP,digits = 3)
On_Base_Percentage
[1] 0.334
#Question_3:Compute the OBP for a player with the following general stats:
#AB=565,H=156,BB=65,HBP=3,SF=7
# Given stats
AB <- 565  
H <- 156   
BB <- 65   
HBP <- 3   
SF <- 7    

# Calculate OBP using the formula
OBP <- (H + BB + HBP) / (AB + BB + HBP + SF)

# Round the result to 3 decimal places
OBP_rounded <- round(OBP, digits = 3)

# Print the result
cat("On-Base Percentage (OBP):", OBP_rounded, "\n")
On-Base Percentage (OBP): 0.35 
3 == 8# Does 3 equals 8?
[1] FALSE
3 != 8# Is 3 different from 8?
[1] TRUE
3 <= 8# Is 3 less than or equal to 8?
[1] TRUE
3>4
[1] FALSE
# Logical Disjunction (or)
FALSE | FALSE # False OR False
[1] FALSE
# Logical Conjunction (and)
TRUE & FALSE #True AND False
[1] FALSE
# Negation
! FALSE # Not False
[1] TRUE
# Combination of statements
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
[1] TRUE
Total_Bases <- 6 + 5
Total_Bases*3
[1] 33
ls()
 [1] "A_complement"            "A_intersection_B"       
 [3] "A_union_B"               "AB"                     
 [5] "BA"                      "BA1"                    
 [7] "bar"                     "barsample"              
 [9] "Batting_Average"         "BB"                     
[11] "event_5_or_6"            "event_A"                
[13] "event_at_least_one_head" "event_B"                
[15] "event_even"              "event_greater_than_4"   
[17] "event_less_than_4"       "event_odd"              
[19] "first_hit"               "five_num_disp"          
[21] "getMode"                 "H"                      
[23] "HBP"                     "hits_per_9innings"      
[25] "independence_check"      "iqr_wt"                 
[27] "last_hit"                "log_base5"              
[29] "log_e"                   "mad_mpg"                
[31] "mad_qsec"                "mean_hp"                
[33] "mean_mpg"                "median_hp"              
[35] "median_qsec"             "mode_gear"              
[37] "mode_gear_value"         "mtcars"                 
[39] "n"                       "OBP"                    
[41] "OBP_rounded"             "On_Base_Percentage"     
[43] "outliers"                "P_A"                    
[45] "P_A_and_B"               "P_B"                    
[47] "P_Heart"                 "P_Red_and_Heart"        
[49] "P_Red_given_Heart"       "percentile_10_hp"       
[51] "percentile_50_hp"        "percentile_90_hp"       
[53] "pitches_by_innings"      "player_positions"       
[55] "prob_5_or_6"             "prob_at_least_one_head" 
[57] "prob_heart"              "prob_heart_and_spade"   
[59] "prob_less_than_4"        "prob_odd"               
[61] "prob_spade"              "prob_union"             
[63] "range_wt"                "ranks"                  
[65] "runs_per_9innings"       "sals"                   
[67] "sample_space_cards"      "sample_space_coin"      
[69] "sample_space_die"        "samplerows"             
[71] "sd_qsec"                 "SF"                     
[73] "std_dev_disp"            "strikes_by_innings"     
[75] "suits"                   "Total_Bases"            
[77] "total_cards"             "variance_disp"          
[79] "venn.plot"               "weighted_mean_mpg"      
[81] "x"                      
rm(Batting_Average)
pitches_by_innings <- c(12, 15, 10, 20, 10) 
pitches_by_innings
[1] 12 15 10 20 10
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
[1]  9 12  6 14  9
#Question_4: Define two vectors,runs_per_9innings and hits_per_9innings, each with five elements.
# Define the vectors
runs_per_9innings <- c(3.2, 4.5, 2.8, 5.1, 3.9)
runs_per_9innings
[1] 3.2 4.5 2.8 5.1 3.9
hits_per_9innings <- c(7.3, 8.1, 6.5, 7.0, 7.8)
hits_per_9innings
[1] 7.3 8.1 6.5 7.0 7.8
# replicate function
rep(2, 5)
[1] 2 2 2 2 2
rep(1,4)
[1] 1 1 1 1
# consecutive numbers
1:5
[1] 1 2 3 4 5
2:10
[1]  2  3  4  5  6  7  8  9 10
# sequence from 1 to 10 with a step of 2
seq(1, 10, by=2)
[1] 1 3 5 7 9
seq(2,13,by=3)
[1]  2  5  8 11
# add vectors
pitches_by_innings+strikes_by_innings
[1] 21 27 16 34 19
# compare vectors
pitches_by_innings == strikes_by_innings
[1] FALSE FALSE FALSE FALSE FALSE
# find length of vector
length(pitches_by_innings)
[1] 5
# find minimum value in vector
min(pitches_by_innings)
[1] 10
# find average value in vector
mean(pitches_by_innings)
[1] 13.4
pitches_by_innings
[1] 12 15 10 20 10
# If you want to get the first element:
pitches_by_innings[1]
[1] 12
#Question_5: Get the first element of hits_per_9innings.
# Get the first element of the hits_per_9innings vector
first_hit <- hits_per_9innings[1]

# Print the first element
first_hit
[1] 7.3
pitches_by_innings[length(pitches_by_innings)]
[1] 10
#Question_6: Get the last element of hits_per_9innings.
# Get the last element of the hits_per_9innings vector
last_hit <- hits_per_9innings[length(hits_per_9innings)]

# Print the last element
last_hit
[1] 7.8
pitches_by_innings[c(2, 3, 4)]
[1] 15 10 20
player_positions <- c("catcher", "pitcher", "infielders", "outfielders")
data.frame(bonus = c(2, 3, 1),#in millions 
           active_roster = c("yes", "no", "yes"), 
           salary = c(1.5, 2.5, 1))#in millions 
sample(1:10, size=5)
[1] 3 8 6 7 4
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)
# Check data frame
bar
n <- 5
samplerows <- sample(1:nrow(bar), size=n) 
# print sample rows
samplerows
[1] 2 8 7 1 3
# extract rows
barsample <- bar[samplerows, ]
# print sample
print(barsample)
bar[sample(1:nrow(bar), n), ]
x <- c("Yes","No","No","Yes","Yes") 
table(x)
x
 No Yes 
  2   3 
sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
# the average
mean(sals) 
[1] 8.565
# the variance
var(sals)
[1] 225.5145
# the standard deviation
sd(sals)
[1] 15.01714
# the median
median(sals)
[1] 3.5
# Tukey's five number summary, usefull for boxplots
# five numbers: min, lower hinge, median, upper hinge, max
fivenum(sals)
[1]  0.25  1.00  3.50  8.00 50.00
# summary statistics
summary(sals)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.250   1.250   3.500   8.565   7.250  50.000 
# Function to find the mode, i.e. most frequent value
getMode <- function(x) {
     ux <- unique(x)
     ux[which.max(tabulate(match(x, ux)))]
 }
# Most frequent value in pitches_by_innings
getMode(pitches_by_innings)
[1] 10
#Question_7: Find the most frequent value of hits_per_9innings.
# Find the most frequent value (mode) of hits_per_9innings
hits_table <- table(hits_per_9innings)  # Create a frequency table

# Check if there's more than one occurrence of any value
if (any(hits_table > 1)) {
  most_frequent <- as.numeric(names(hits_table)[which.max(hits_table)])  # Get the value with the highest frequency
} else {
  most_frequent <- "No mode (all values are unique)"
}

# Print the most frequent value
most_frequent
[1] "No mode (all values are unique)"
#Question_8: Summarize the following survey with the `table()` command:
#What is your favorite day of the week to watch baseball? A total of 10 fans submitted this survey.
#Saturday, Saturday, Sunday, Monday, Saturday,Tuesday, Sunday, Friday, Friday, Monday
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")

# Survey data
game_day <- c("Saturday", "Saturday", "Sunday", "Monday", "Saturday", 
              "Tuesday", "Sunday", "Friday", "Friday", "Monday")

# Create a frequency table using table()
table(game_day)
game_day
  Friday   Monday Saturday   Sunday  Tuesday 
       2        2        3        2        1 
#Question_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results. 
# Define the getMode function
getMode <- function(v) {
  return(names(sort(table(v), decreasing = TRUE))[1])
}

# Find the most frequent day
getMode(game_day)
[1] "Saturday"
getMode(game_day)
[1] "Saturday"
