Basic calculations

#Addition
#This is a simple calculation subtracting 3 from 2
2-3
[1] -1
#Division
#This is a simple calculation dividing 2 by 3
2/3
[1] 0.6666667
#Exponentiation 
#This is a simple calculation of 2 to the power of 3
#This will be the same a 2x2x2
2^3
[1] 8
#Square root
#This is a simple calculation of the square root of 2
sqrt(2)
[1] 1.414214
#Square root
#This is a sample showing the square root of 4
sqrt(4)
[1] 2
#Logarithms
#This is a simple calculation of the log of 2
log(2)
[1] 0.6931472
#Question_1: Compute the log base 5 of 10 and the log of 10
#In this case we are calculating the log of 10 with a base equal to 5
log(10,base = 5)
[1] 1.430677
#Log of 10
#In this case we are calculating the log of 10
log(10)
[1] 2.302585
#Batting Average=(No. of Hits)/(No. of At Bats)
#What is the batting average of a player that bats 29 hits in 112 at bats?
#In This case we are dividing the amount of hits the player achieved by the amounts of at bats the player incurred to achieve the 29 hits.
#Then we store the results in BA and print BA
BA=(29)/(112)
BA
[1] 0.2589286
# This code rounds a batting average (BA) to three decimal places and stores the result in a variable called Batting_Average. 
# Then, it prints or displays the rounded batting average.

Batting_Average=round(BA,digits = 3)
Batting_Average
[1] 0.259
#Question_2: What is the batting average of a player that bats 42 hits in 212 at bats?
# #In This case we are dividing the amount of hits the player achieved (42), by the amounts of at bats (212), the player incurred to achieve the 42 hits.
# Then we store the results in BA and print BA
BA=(42)/(212)
BA
[1] 0.1981132
#On Base Percentage
#OBP=(H+BB+HBP)/(At Bats+H+BB+HBP+SF)
#Let us compute the OBP for a player with the following general stats
#AB=515,H=172,BB=84,HBP=5,SF=6

#This code calculates the On-Base Percentage (OBP) for a baseball player based on their statistics:
#In baseball, OBP is calculated as the sum of hits, walks (BB), and times hit by pitch (HBP), divided by the sum of at bats (AB), hits, walks, times hit by pitch, and sacrifice flies (SF).

#We then store the results in the OBP variable and print the contents of OBP

OBP=(172+84+5)/(515+172+84+5+6)
OBP
[1] 0.3337596
#In this code we are rounding the contents of the OBP variable to 3 decimal places.
#The result is then stored in the variable On_Base_Percentage
#Lastly we print the contents of the variable On_Base_Percentage

On_Base_Percentage = round(OBP,digits = 3)
On_Base_Percentage
[1] 0.334

Question_3: Compute the OBP for a player with the following general stats: AB=565,H=156,BB=65,HBP=3,SF=7

#This code calculates the On-Base Percentage (OBP) for a baseball player based on their statistics:
#In baseball, OBP is calculated as the sum of hits, walks (BB), and times hit by pitch (HBP), divided by the sum of at bats (AB), hits, walks, times hit by pitch, and sacrifice flies (SF).

#We then store the results in the OBP variable and print the contents of OBP
OBP=(156+65+3)/(565+156+65+3+7)
OBP
[1] 0.281407
#This code ask the basic question if 3 is equal to 8 using the == operator.
#In this case since 8 is not equal to 3 the answer will be false
3 == 8# Does 3 equals 8?
[1] FALSE
#This code ask the basic question if 3 is not equal to 8 using the != operator.
#In this case since 8 is not equal to 3 the answer will be true
3 != 8# Is 3 different from 8?
[1] TRUE
#This code ask the basic question if 3 is less or equal to 8 using the <= operator.
#In this case since 3 is less than 8 one of the requirements of the operator is made and the answer will be true

3 <= 8# Is 3 less than or equal to 8?
[1] TRUE
#Logical Conjunction (and)
#TRUE and FALSE are reserved keywords that represent the logical boolean values.
#TRUE represents the logical value for true.
#FALSE represents the logical value for false.

#In this case since true and false are not equal to each other the answer will be false

TRUE & FALSE #True AND False
[1] FALSE
#Negation
#In this case we are using the ! operator that means not, followed by the reserve word false
! FALSE # This means not false and the answer will be true
[1] TRUE
#Combination of statements
#This code is a logical operation involving comparison and logical OR (|) operator.
#If this case since 2 is less than 3, the answer will be true
2 < 3 | 1 == 5 # 2<3 is True, 1==5 is False, True OR False is True
[1] TRUE
#Assigning Values to Variables
#In the first line of this code we assign the result of 6+5 to the variable Total_Bases. In this case 11
#In the second line of code we multiply the contents of the variable Total_Bases. In this case (11)
#So the result will be 33
Total_Bases <- 6 + 5
Total_Bases * 3
[1] 33
#ls() is a function that lists the names of objects (variables, functions, etc.) that are currently defined in the workspace
ls()
 [1] "BA"                 "bar"                "barsample"          "Batting_Average"    "contract_length"    "contract_years"     "contracts_mean"    
 [8] "contracts_median"   "contracts_n"        "contracts_sd"       "contracts_w1sd"     "contracts_w2sd"     "contracts_w3sd"     "favorite_days"     
[15] "getMode"            "hits_by_innings"    "HR_before"          "n"                  "n_1"                "n_2"                "n_seasons"         
[22] "OBP"                "On_Base_Percentage" "pitches_by_innings" "Player"             "player_positions"   "Robert_HRs"         "salary_ave"        
[29] "sals"               "samplerows"         "strikes_by_innings" "survey_summary"     "Total_Bases"        "triple_hits"        "triple_mean"       
[36] "triple_median"      "triple_n"           "triple_sd"          "triple_w1sd"        "triple_w3sd"        "triples_data"       "triples_hit"       
[43] "Walks_before"       "wanted_HR"          "wanted_Walks"       "x"                  "x_4"                "x_6"                "y_1"               
[50] "y_2"               

Vectors

#In this code we are creating a vector variable named pitches_by_innings and assigning 12, 15, 10, 20, 10 to the vector
#Then we print the contents of the vector pitches_by_innings

pitches_by_innings <- c(12, 15, 10, 20, 10) 
pitches_by_innings
[1] 12 15 10 20 10
#In this code we are creating a vector variable named strikes_by_innings and assigning 9, 12, 6, 14, 9 to the vector
#Then we print the contents of the vector strikes_by_innings
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
[1]  9 12  6 14  9
#Question_4: Define two vectors,runs_per_innings and hits_per_innings, each with five elements. 
#In this code we are creating a vector variable named pitches_by_innings and assigning 3, 2, 4, 1, 2 to the vector
#Then we print the contents of the vector pitches_by_innings
pitches_by_innings <- c(3, 2, 4, 1, 2)
pitches_by_innings
[1] 3 2 4 1 2
#In this code we are creating a vector variable named strikes_by_innings and assigning 7, 5, 8, 4, 6 to the vector
#Then we print the contents of the vector strikes_by_innings
strikes_by_innings <- c(7, 5, 8, 4, 6)
strikes_by_innings
[1] 7 5 8 4 6
#replicate function
#This code uses the replication function rep() to replicate a value times the amount of the second value following the comma
#In this case re are replication 2 five times and the result will be 2 2 2 2 2
rep(2, 5)
[1] 2 2 2 2 2
# consecutive numbers
#In this code we are using the Range Operator (:) to create sequences of numbers. When used between two numbers 1 and 5, it generates a sequence starting from 1 to 5, inclusive.
1:5
[1] 1 2 3 4 5
#sequence from 1 to 10 with a step of 2
# In this code we are using the sequence function seq() to generate a range from 1 to 10. 
#In this case we are using by (step size) to add by 2 to each step after 1
seq(1, 10, by=2)
[1] 1 3 5 7 9
#add vectors
#This is a simple code adding the vector variable pitches_by_innings to the vector variable strikes_by_innings
#Adding same place elements from pitches_by_innings to the same place elements of strikes_by_innings
pitches_by_innings+strikes_by_innings
[1] 10  7 12  5  8
#compare vectors
#This is a simple code comparing same place elements of the vector variable pitches_by_innings to the vector variable strikes_by_innings
pitches_by_innings == strikes_by_innings
[1] FALSE FALSE FALSE FALSE FALSE
#find length of vector
#this is a simple code using the length() function to find and print how many elements are in the vector
length(pitches_by_innings)
[1] 5
#find minimum value in vector
#this is a simple code using the min() function to find the lowest element in pitches_by_innings
min(pitches_by_innings)
[1] 1
#find average value in vector
#this is a simple code using the mean() function to find mean of the elements in pitches_by_innings
mean(pitches_by_innings)
[1] 2.4
#This code prints the values inside pitches_by_innings
pitches_by_innings
[1] 3 2 4 1 2
#If you want to get the first element:
#This code uses [] to find the value in the fist position of pitches_by_innings
pitches_by_innings[1]
[1] 3
# Question_5: Get the first element of hits_per_innings.
#In this code we are declaring the variable hits_by_innings
#Then convert it to a vector variable and adding 1, 3, 5, 2, 7. 
#Lastly we print the contents of the vector variable hits_by_innings
hits_by_innings <- c(1, 3, 5, 2, 7)
hits_by_innings
[1] 1 3 5 2 7
#This code uses [] to find the value in the fist position of hits_by_innings
hits_by_innings[1]
[1] 1
#This code is an expression that retrieves the last element from the vector pitches_by_innings.
pitches_by_innings[length(pitches_by_innings)]
[1] 2
#Question_6: Get the last element of hits_per_innings.
hits_by_innings[5]
[1] 7
#This code is used to subset or extract specific elements from the vector pitches_by_innings 
#In this case the values located in places 2,3, and 4
pitches_by_innings[c(2, 3, 4)]
[1] 2 4 1
#In this code we are declaring the variable player_positions
#Then we convert it to a vector variable by adding elements
#In this case the variable contains text items
player_positions <- c("catcher", "pitcher", "infielders", "outfielders")
#Data Frames
#In this code we are using the function data.frame() to create a data frame
#In this case with 3 columns with labels (bonus, active roster, and salary).
#Then we declare 3 variables with the above names and convert them to vector variables by adding values
data.frame(bonus = c(2, 3, 1),#in millions 
           active_roster = c("yes", "no", "yes"), 
           salary = c(1.5, 2.5, 1))#in millions 
#How to Make a Random Sample
#In this case we are using the sample() function is used to generate random samples or permutations of a specified size starting at 1 and ending at 10
#Then we are using size to define how many elements we want. In this case 5
#Note: Every time we run this code the result will randomly change 
sample(1:10, size=5)
[1] 9 4 3 1 6
#This code creates a data frame named bar with two columns: var1 and var2.
#Then we use the constant LETTER to assign letter values range to Var1 starting at 1 and ending at 10. In this case A - J
#Then we assign the a number range to Var2 starting at 1 and ending at 10
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)
#Check data frame
bar
#in this code we declare the variable n and assign 5 to it
n <- 5

#Then we declare a new variable samplerows and assign values to it
#In this case we use the sample() function adding a range starting at 1 and ending at the variable n. In this  case 5
# we are also using size=n to size the contents to 5
samplerows <- sample(1:nrow(bar), size=n) 
#print sample rows
samplerows
[1]  4  7  8  9 10
#extract rows
#In this code, barsample is created by sub-setting the data frame bar using the indices stored in samplerows.
barsample <- bar[samplerows, ]
#print sample
print(barsample)
#In this code, bar[sample(1:nrow(bar), n), ] is a concise way to randomly sample n rows from the data frame bar
bar[sample(1:nrow(bar), n), ]
# Using Tables
#In this code we declare the variable x and convert it to a vector and add the values "Yes","No","No","Yes","Yes"
#Then we use the function table() to group the contents of the vector x 
x <- c("Yes","No","No","Yes","Yes") 
table(x)
x
 No Yes 
  2   3 
#Numerical measures of center and spread
#Adding values to the variable sals
#In this case we are declaring the variable sals and convert it to a vector, then add 12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25
sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
#the average
#In this code we use the function mean() to calculate the average of the vector variable sals
mean(sals) 
[1] 8.565
#the variance
#In this case we are using the function var() to calculate the variance of the variable sals elements
var(sals)
[1] 225.5145
#the standard deviation
#In this case we are using the function sd() to calculate the standard deviation of the variable sals elements
sd(sals)
[1] 15.01714
#the median
#In this case we are using the function median() to calculate the median of the variable sals elements
median(sals)
[1] 3.5
#Tukey's five number summary, usefull for boxplots
#five numbers: min, lower hinge, median, upper hinge, max

#In this code fivenum() is a function that computes Tukey's five-number summary for a numeric vector. Tukey's five-number summary consists of:

#Minimum
#Lower hinge (25th percentile, Q1)
#Median (50th percentile, Q2)
#Upper hinge (75th percentile, Q3)
#Maximum

fivenum(sals)
[1]  0.25  1.00  3.50  8.00 50.00
#summary statistics
#In this code we use the summary() function to provides a concise summary of the statistical properties of a numeric vector, such as sals.

#summary() calculates and displays the following statistics:
#Minimum (Min): The smallest value in the vector.
#1st Quartile (1st Qu): The value below which 25% of the data falls (also known as Q1 or the lower quartile).
#Median (Median or 50th percentile): The middle value in the sorted vector (Q2).
#Mean (Mean): The average value of the vector.
#3rd Quartile (3rd Qu): The value below which 75% of the data falls (Q3 or the upper quartile).
#Maximum (Max): The largest value in the vector.
#Number of non-missing values (NA's): The count of missing or NA values in the vector.

summary(sals)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  0.250   1.250   3.500   8.565   7.250  50.000 

How about the mode?

#to find the mode, i.e. most frequent value
#In this code we use The getMode function to find the mode (most frequent value) in vector x. 
getMode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}
#Most frequent value in baz
#In this code we use The getMode function to find the mode (most frequent value) in vector pitches_by_innings. 
getMode(pitches_by_innings)
[1] 2
#Question_7: Find the most frequent value of hits_per_innings.
#In this code we use The getMode function to find the mode (most frequent value) in vector hits_by_innings. 
getMode(hits_by_innings)
[1] 1
#Question_8: Summarize the following survey with the `table()` command:
#What is your favorite day of the week to watch baseball? A total of 10 fans submitted this survey.
#Saturday, Saturday, Sunday, Monday, Saturday,Tuesday, Sunday, Friday, Friday, Monday

#Define the survey responses as a vector
favorite_days <- c("Saturday", "Saturday", "Sunday", "Monday", "Saturday", 
                   "Tuesday", "Sunday", "Friday", "Friday", "Monday")
#Create a table of frequencies
#This code uses the function table() to group the contents of the vector favorite_days and assign them to the function survey_summary
survey_summary <- table(favorite_days)
#Display the survey summary
survey_summary
favorite_days
  Friday   Monday Saturday   Sunday  Tuesday 
       2        2        3        2        1 
#Question_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results. 
#In this code we use The getMode function to find the mode (most frequent value) in variable survey_summary. 
getMode(survey_summary)
[1] 2
