First Steps with R activity Basic calculations You can use R for basic computations you would perform in a calculator
result <- 2 - 3
print(result)
## [1] -1
result <- 2/3
print(result)
## [1] 0.6666667
result <- 2^3
print(result)
## [1] 8
result <- 2^3
print(result)
## [1] 8
result <- 2^3
print(result)
## [1] 8
result <- sqrt(2)
print(result)
## [1] 1.414214
result <- log(2)
print(result)
## [1] 0.6931472
log_base_5_of_10 <- log(10) / log(5) # Calculate the log base 5 of 10
natural_log_of_10 <- log(10) # Calculate the natural logarithm of 10
# Print the results
print(paste("Log base 5 of 10:", log_base_5_of_10))
## [1] "Log base 5 of 10: 1.43067655807339"
print(paste("Natural log of 10:", natural_log_of_10))
## [1] "Natural log of 10: 2.30258509299405"
#Q-1. Calculate Betting Average (BA) for Player
hits <- 29 # Number of hits
at_bats <- 112 # Number of at bats
BA <- hits / at_bats # Calculate batting average
print(BA) # Print the batting average
## [1] 0.2589286
BA=round(BA,digits = 3) # Round up with 3 decimals
print(BA) # Print the batting average with 3 decimals
## [1] 0.259
# Q-2: Player Batting Average (BA) Calculation
hits_q2 <- 42
at_bats_q2 <- 212
BA_q2 <- hits_q2 / at_bats_q2
# OBP Calculation
AB <- 515
H <- 172
BB <- 84
HBP <- 5
SF <- 6
OBP <- (H + BB + HBP) / (AB + H + BB + HBP + SF)
# Print the results
print(paste("BA for Q-2:", BA_q2))
## [1] "BA for Q-2: 0.19811320754717"
print(paste("On Base Percentage:", OBP))
## [1] "On Base Percentage: 0.333759590792839"
OBP=round(OBP,digits = 3)
print(paste("On Base Percentage:", OBP))
## [1] "On Base Percentage: 0.334"
# Q-3: Player Batting Average (BA) Calculation
hits_q3 <- 42
at_bats_q3 <- 212
BA_q3 <- hits_q3 / at_bats_q3
# OBP Calculation
AB_q3 <- 565
H_q3 <- 156
BB_q3 <- 65
HBP_q3 <- 3
SF_q3 <- 7
OBP_q3 <- (H_q3 + BB_q3 + HBP_q3) / (AB_q3 + H_q3 + BB_q3 + HBP_q3 + SF_q3)
# Print the results
print(paste("BA for Q-3:", BA_q3))
## [1] "BA for Q-3: 0.19811320754717"
print(paste("On Base Percentage:", OBP_q3))
## [1] "On Base Percentage: 0.281407035175879"
OBP_q3=round(OBP,digits = 3)
print(paste("On Base Percentage:", OBP_q3))
## [1] "On Base Percentage: 0.334"
result <- 3 == 8 # Test if 3 equals 8
print(result) # Print the result
## [1] FALSE
result <- 3 != 8 # Test if 3 is different from 8
print(result) # Print the result
## [1] TRUE
result <- 3 <= 8 # Test if 3 is less than or equal to 8
print(result) # Print the result
## [1] TRUE
result <- 3 > 4 # Test if 3 is greater than 4
print(result) # Print the result
## [1] FALSE
result <- FALSE | FALSE # Logical disjunction (or) with FALSE | FALSE
print(result) # Print the result
## [1] FALSE
result <- TRUE & FALSE # Logical conjunction (and) with TRUE & FALSE
print(result) # Print the result
## [1] FALSE
result <- !FALSE # Negation of FALSE
print(result) # Print the result
## [1] TRUE
result <- (2 < 3) | (1 == 5) # Combination of statements: (2 < 3) OR (1 == 5)
print(result) # Print the result
## [1] TRUE
Total_Bases <- 6 + 5 # Assigning the sum of 6 and 5 to Total_Bases
result <- Total_Bases * 3 # Multiplying Total_Bases by 3
print(result) # Print the result
## [1] 33
ls() # To see the variables that are currently defined
## [1] "AB" "AB_q3" "at_bats"
## [4] "at_bats_q2" "at_bats_q3" "BA"
## [7] "BA_q2" "BA_q3" "BB"
## [10] "BB_q3" "H" "H_q3"
## [13] "HBP" "HBP_q3" "hits"
## [16] "hits_q2" "hits_q3" "log_base_5_of_10"
## [19] "natural_log_of_10" "OBP" "OBP_q3"
## [22] "result" "SF" "SF_q3"
## [25] "Total_Bases"
rm(Total_Bases) #To delete a variable
# Create a vector using the c() function (as in “concatenate”)
pitches_by_innings <- c(12, 15, 10, 20, 10)
pitches_by_innings
## [1] 12 15 10 20 10
# Create a vector using the c() function (as in “concatenate”)
strikes_by_innings <- c(9, 12, 6, 14, 9)
strikes_by_innings
## [1] 9 12 6 14 9
#Q_4: Define 2 vectors,runs_per_9innings and hits_per_9innings, each with 5 elements
# Define the runs_per_9innings vector with five elements
runs_per_9innings <- c(3, 5, 4, 6, 2)
# Define the hits_per_9innings vector with five elements
hits_per_9innings <- c(8, 7, 9, 5, 6)
# Print the vectors
print(runs_per_9innings)
## [1] 3 5 4 6 2
print(hits_per_9innings)
## [1] 8 7 9 5 6
rep(2, 5) # replicate function
## [1] 2 2 2 2 2
rep(1,4)
## [1] 1 1 1 1
1:5 # consecutive numbers
## [1] 1 2 3 4 5
2:10
## [1] 2 3 4 5 6 7 8 9 10
seq(1, 10, by=2) # sequence from 1 to 10 with a step of 2
## [1] 1 3 5 7 9
seq(2,13,by=3) # sequence from 2 to 13 with a step of 3
## [1] 2 5 8 11
# Define vectors
pitches_by_innings <- c(10, 12, 15, 20, 10)
strikes_by_innings <- c(11, 15, 1, 14, 9)
# Add vectors
sum_vector <- pitches_by_innings + strikes_by_innings
print(sum_vector)
## [1] 21 27 16 34 19
# Compare vectors
comparison_vector <- pitches_by_innings == strikes_by_innings
print(comparison_vector)
## [1] FALSE FALSE FALSE FALSE FALSE
# Find length of vector
length_pitches <- length(pitches_by_innings)
print(length_pitches)
## [1] 5
# Find minimum value in vector
min_pitch <- min(pitches_by_innings)
print(min_pitch)
## [1] 10
# Find average value in vector
mean_pitch <- mean(pitches_by_innings)
print(mean_pitch)
## [1] 13.4
# Define the pitches_by_innings vector
pitches_by_innings <- c(12, 15, 10, 20, 10)
# Access and print the first element
first_element <- pitches_by_innings[1]
print(first_element)
## [1] 12
pitches_by_innings[length(pitches_by_innings)] # Q_5. Get the 1st element of 'hits_per_9innings'
## [1] 10
last_element <- hits_per_9innings[length(hits_per_9innings)] # Get the last element
print(last_element) # Print the last element
## [1] 6
pitches_by_innings[c(2, 3, 4)] # extract multiple values from a vector. Ex: get 2nd through 4th values
## [1] 15 10 20
#Vectors can be strings or logical values
player_positions <- c("catcher", "pitcher", "infielders", "outfielders")
# To manually create a data frame, use the data.frame() function
data.frame(bonus = c(2, 3, 1),#in millions
active_roster = c("yes", "no", "yes"),
salary = c(1.5, 2.5, 1))#in millions
# To randomly select a sample use the function sample()
# The following code selects 5 numbers between 1 and 10 at random (without duplication)
sample(1:10, size=5)
## [1] 8 1 4 3 9
# Load the necessary library for random number generation
set.seed(123) # Setting a seed for reproducibility
# Create a data frame
my_data_frame <- data.frame(
Column1 = LETTERS,
Column2 = sample(1:100, 26, replace=TRUE) # Generate 26 random numbers between 1 and 100
)
# Print the data frame
print(my_data_frame)
## Column1 Column2
## 1 A 31
## 2 B 79
## 3 C 51
## 4 D 14
## 5 E 67
## 6 F 42
## 7 G 50
## 8 H 43
## 9 I 14
## 10 J 25
## 11 K 90
## 12 L 91
## 13 M 69
## 14 N 91
## 15 O 57
## 16 P 92
## 17 Q 9
## 18 R 93
## 19 S 99
## 20 T 72
## 21 U 26
## 22 V 7
## 23 W 42
## 24 X 9
## 25 Y 83
## 26 Z 36
bar <- data.frame(var1 = LETTERS[1:10], var2 = 1:10)
n <- 5
samplerows <- sample(1:nrow(bar), size=n)
n <- 5
# print sample rows
samplerows
## [1] 1 7 5 4 2
# extract rows
barsample <- bar[samplerows, ]
# print sample
print(barsample)
## var1 var2
## 1 A 1
## 7 G 7
## 5 E 5
## 4 D 4
## 2 B 2
bar[sample(1:nrow(bar), n), ]
# Using Table Function to summurize
x <- c("Yes","No","No","Yes","Yes")
table(x)
## x
## No Yes
## 2 3
# Calculate Statistics
sals <- c(12, .4, 5, 2, 50, 8, 3, 1, 4, 0.25)
mean(sals) # the average
## [1] 8.565
var(sals) # the variance
## [1] 225.5145
sd(sals) # the standard deviation
## [1] 15.01714
median(sals) # the median
## [1] 3.5
# five numbers: min, lower hinge, median, upper hinge, max
fivenum(sals)
## [1] 0.25 1.00 3.50 8.00 50.00
# summary statistics
summary(sals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.250 1.250 3.500 8.565 7.250 50.000
# Function to find the mode, i.e. most frequent value
getMode <- function(x) {
ux <- unique(x)
ux[which.max(tabulate(match(x, ux)))]}
# Most frequent value in pitches_by_innings
getMode(pitches_by_innings)
## [1] 10
#Q_7: Find the most frequent value of hits_per_9innings
getMode(hits_per_9innings)
## [1] 8
game_day<-c("Saturday", "Saturday", "Sunday", "Monday", "Saturday","Tuesday", "Sunday", "Friday", "Friday", "Monday")
table(game_day) #Summarize survey with the `table()` command
## game_day
## Friday Monday Saturday Sunday Tuesday
## 2 2 3 2 1
#Q_9: What is the most frequent answer recorded in the survey? Use the getMode function to compute results.
getMode(game_day)
## [1] "Saturday"