# VECTORS
# Create gender variable
getwd()
## [1] "C:/Users/USER/Desktop/cchub kmeans"
ge_var <- rep(c("males", "female"), times = 3) # Alternating genders
print("Gender Variable:")
## [1] "Gender Variable:"
print(ge_var) # Interpretation: Created a gender variable with alternating values of males and females.
## [1] "males" "female" "males" "female" "males" "female"
# Create marks variable
marks_variable <- c(35, 33, 39, 37, 31, 36)
print("Marks Variable:")
## [1] "Marks Variable:"
print(marks_variable) # Interpretation: Established a marks variable with scores for six students.
## [1] 35 33 39 37 31 36
# Calculate statistics for marks variable
print("Statistics for Marks:")
## [1] "Statistics for Marks:"
print(mean(marks_variable)) # Interpretation: Calculated mean marks.
## [1] 35.16667
print(var(marks_variable)) # Interpretation: Calculated variance.
## [1] 8.166667
print(sd(marks_variable)) # Interpretation: Calculated standard deviation.
## [1] 2.857738
print(min(marks_variable)) # Interpretation: Found minimum score.
## [1] 31
print(max(marks_variable)) # Interpretation: Found maximum score.
## [1] 39
# FACTORS
# Create and convert region variable to factor
region <- as.factor(c(1, 2, 1, 2, 2, 1))
levels(region) <- c("Rural", "Urban")
print("Region Variable:")
## [1] "Region Variable:"
print(region) # Interpretation: Created a region variable and converted it to a factor with Rural and Urban levels.
## [1] Rural Urban Rural Urban Urban Rural
## Levels: Rural Urban
# Convert gender variable to factor
gender_variable <- as.factor(ge_var)
print("Gender Variable as Factor:")
## [1] "Gender Variable as Factor:"
print(gender_variable) # Interpretation: Converted the gender variable to a factor.
## [1] males female males female males female
## Levels: female males
# Calculate mean, variance, and standard deviation by gender
print("Mean Marks by Gender:")
## [1] "Mean Marks by Gender:"
print(tapply(marks_variable, ge_var, mean)) # Interpretation: Mean marks calculated by gender.
## female males
## 35.33333 35.00000
print("Variance by Gender:")
## [1] "Variance by Gender:"
print(tapply(marks_variable, ge_var, var)) # Interpretation: Variance in marks by gender.
## female males
## 4.333333 16.000000
print("Standard Deviation by Gender:")
## [1] "Standard Deviation by Gender:"
print(tapply(marks_variable, ge_var, sd)) # Interpretation: Standard deviation of marks by gender.
## female males
## 2.081666 4.000000
# MATRICES
# Create matrix "RESULTS" with given vectors
X <- c(12, 15, 19, 13, 17)
Y <- c(10, 13, 19, 18, 12)
Z <- c(15, 19, 20, 12, 16)
W <- c(10, 20, 15, 12, 19)
RESULTS <- matrix(c(X, Y, Z, W), ncol = 4,
dimnames = list(c("Peter", "Mary", "Jack", "Jane", "John"),
c("Level I", "Level II", "Level III", "Level IV")))
print("Matrix RESULTS:")
## [1] "Matrix RESULTS:"
print(RESULTS) # Explanation: Created a matrix named RESULTS representing scores across four levels for five students.
## Level I Level II Level III Level IV
## Peter 12 10 15 10
## Mary 15 13 19 20
## Jack 19 19 20 15
## Jane 13 18 12 12
## John 17 12 16 19
# Extract specific columns "Level I" and "Level IV" from RESULTS
RESULT_1 <- RESULTS[, c("Level I", "Level IV")]
print("Subset of Results with Level I and Level IV:")
## [1] "Subset of Results with Level I and Level IV:"
print(RESULT_1) # Explanation: Extracted 'Level I' and 'Level IV' columns for all students.
## Level I Level IV
## Peter 12 10
## Mary 15 20
## Jack 19 15
## Jane 13 12
## John 17 19
# Extracting results excluding "Level II" and "Level III"
RESULTS_2 <- RESULTS[, !(colnames(RESULTS) %in% c("Level II", "Level III"))]
print("Subset of Results excluding Level II and Level III:")
## [1] "Subset of Results excluding Level II and Level III:"
print(RESULTS_2) # Explanation: Created a subset of RESULTS excluding 'Level II' and 'Level III'.
## Level I Level IV
## Peter 12 10
## Mary 15 20
## Jack 19 15
## Jane 13 12
## John 17 19
# Additional specific print statements for further analysis
print(RESULTS[1:4, "Level III"]) # Explanation: Printed scores in 'Level III' for the first four students.
## Peter Mary Jack Jane
## 15 19 20 12
print(RESULTS["Peter", c("Level II", "Level IV")]) # Explanation: Printed 'Level II' and 'Level IV' scores for Peter.
## Level II Level IV
## 10 10
# Applying mean calculations on specific subsets
print(apply(RESULTS[c("Peter", "Mary"), c("Level I", "Level IV")], 1, mean)) # Explanation: Calculated mean scores for 'Level I' and 'Level IV' for Peter and Mary.
## Peter Mary
## 11.0 17.5
print(apply(RESULTS["Peter", , drop = FALSE], 2, mean)) # Explanation: Calculated mean scores across all levels for Peter.
## Level I Level II Level III Level IV
## 12 10 15 10
# Calculating total scores for Peter and sum across Level IV for all students
print(sum(RESULTS["Peter", ])) # Explanation: Calculated the total score for Peter across all levels.
## [1] 47
print(sum(RESULTS[, "Level IV"])) # Explanation: Calculated the sum of scores in 'Level IV' for all students.
## [1] 76
# DATA FRAMES
# Create the dataframe
x <- c(12, 15, 19, 13, 17)
y <- c(10, 13, 19, 18, 12)
z <- c(15, 19, 20, 12, 16)
w <- c(10, 20, 15, 12, 19)
new_list <- data.frame(x, y, z, w)
print("Initial Dataframe:")
## [1] "Initial Dataframe:"
print(new_list) # Explanation: Created an initial dataframe with scores across different levels.
## x y z w
## 1 12 10 15 10
## 2 15 13 19 20
## 3 19 19 20 15
## 4 13 18 12 12
## 5 17 12 16 19
# Renaming columns and rows
colnames(new_list) <- c('level I', 'level II', 'level III', 'level IV')
rownames(new_list) <- c('peter', 'mery', 'jack', 'jane', 'john')
print("Dataframe with Renamed Columns and Rows:")
## [1] "Dataframe with Renamed Columns and Rows:"
print(new_list) # Explanation: Renamed columns and rows for clarity.
## level I level II level III level IV
## peter 12 10 15 10
## mery 15 13 19 20
## jack 19 19 20 15
## jane 13 18 12 12
## john 17 12 16 19
# Calculate total marks and add as a new column "Total"
total <- rowSums(new_list)
new_list <- cbind(new_list, Total = total)
print("Dataframe with Total Marks:")
## [1] "Dataframe with Total Marks:"
print(new_list) # Explanation: Added a new column "Total" with the sum of scores for each student.
## level I level II level III level IV Total
## peter 12 10 15 10 47
## mery 15 13 19 20 67
## jack 19 19 20 15 73
## jane 13 18 12 12 55
## john 17 12 16 19 64
# Find the student with the highest total marks
max_total <- max(new_list$Total)
max_total
## [1] 73
print("Student with the Highest Total Marks:")
## [1] "Student with the Highest Total Marks:"
print(new_list[new_list$Total == max_total, ]) # Explanation: Identified the student with the highest total marks.
## level I level II level III level IV Total
## jack 19 19 20 15 73
# Sort the dataframe based on total marks in descending order
sorted_list <- new_list[order(-new_list$Total), ]
print("Sorted Dataframe by Total Marks:")
## [1] "Sorted Dataframe by Total Marks:"
print(sorted_list) # Explanation: Sorted students based on total marks in descending order.
## level I level II level III level IV Total
## jack 19 19 20 15 73
## mery 15 13 19 20 67
## john 17 12 16 19 64
## jane 13 18 12 12 55
## peter 12 10 15 10 47
# Update marks for mery in level III with 21
new_list['mery', 'level III'] <- 21
print("Updated Dataframe with Mery's Marks:")
## [1] "Updated Dataframe with Mery's Marks:"
print(new_list) # Explanation: Updated Mery's marks in Level III to 21.
## level I level II level III level IV Total
## peter 12 10 15 10 47
## mery 15 13 21 20 67
## jack 19 19 20 15 73
## jane 13 18 12 12 55
## john 17 12 16 19 64
# Create a subset of students with level II marks greater than 16
subset_above_16 <- new_list[new_list$`level II` > 16, ]
print("Subset of Students with Level II > 16:")
## [1] "Subset of Students with Level II > 16:"
print(subset_above_16) # Explanation: Created a subset of students with scores greater than 16 in Level II.
## level I level II level III level IV Total
## jack 19 19 20 15 73
## jane 13 18 12 12 55
# Calculate the percentage of marks and add a new column "Percentage"
new_list$Percentage <- rowMeans(new_list) / 20 * 100
print("Dataframe with Percentage Marks:")
## [1] "Dataframe with Percentage Marks:"
print(new_list) # Explanation: Calculated percentage marks for each student.
## level I level II level III level IV Total Percentage
## peter 12 10 15 10 47 94
## mery 15 13 21 20 67 136
## jack 19 19 20 15 73 146
## jane 13 18 12 12 55 110
## john 17 12 16 19 64 128
# Create a summary statistics table
summary_table <- summary(new_list)
print("Summary Statistics:")
## [1] "Summary Statistics:"
print(summary_table) # Explanation: Generated summary statistics for the dataframe.
## level I level II level III level IV Total
## Min. :12.0 Min. :10.0 Min. :12.0 Min. :10.0 Min. :47.0
## 1st Qu.:13.0 1st Qu.:12.0 1st Qu.:15.0 1st Qu.:12.0 1st Qu.:55.0
## Median :15.0 Median :13.0 Median :16.0 Median :15.0 Median :64.0
## Mean :15.2 Mean :14.4 Mean :16.8 Mean :15.2 Mean :61.2
## 3rd Qu.:17.0 3rd Qu.:18.0 3rd Qu.:20.0 3rd Qu.:19.0 3rd Qu.:67.0
## Max. :19.0 Max. :19.0 Max. :21.0 Max. :20.0 Max. :73.0
## Percentage
## Min. : 94.0
## 1st Qu.:110.0
## Median :128.0
## Mean :122.8
## 3rd Qu.:136.0
## Max. :146.0
# Identify and print students with the highest and lowest marks in Level IV
max_level_IV_student <- rownames(new_list)[which.max(new_list$`level IV`)]
min_level_IV_student <- rownames(new_list)[which.min(new_list$`level IV`)]
cat("Student with the Highest Marks in Level IV:", max_level_IV_student, "\n")
## Student with the Highest Marks in Level IV: mery
cat("Student with the Lowest Marks in Level IV:", min_level_IV_student, "\n")
## Student with the Lowest Marks in Level IV: peter