# VECTORS
# Create gender variable
getwd()
## [1] "C:/Users/USER/Desktop/cchub kmeans"
ge_var <- rep(c("males", "female"), times = 3)  # Alternating genders
print("Gender Variable:")
## [1] "Gender Variable:"
print(ge_var)  # Interpretation: Created a gender variable with alternating values of males and females.
## [1] "males"  "female" "males"  "female" "males"  "female"
# Create marks variable
marks_variable <- c(35, 33, 39, 37, 31, 36)
print("Marks Variable:")
## [1] "Marks Variable:"
print(marks_variable)  # Interpretation: Established a marks variable with scores for six students.
## [1] 35 33 39 37 31 36
# Calculate statistics for marks variable
print("Statistics for Marks:")
## [1] "Statistics for Marks:"
print(mean(marks_variable))  # Interpretation: Calculated mean marks.
## [1] 35.16667
print(var(marks_variable))   # Interpretation: Calculated variance.
## [1] 8.166667
print(sd(marks_variable))    # Interpretation: Calculated standard deviation.
## [1] 2.857738
print(min(marks_variable))   # Interpretation: Found minimum score.
## [1] 31
print(max(marks_variable))   # Interpretation: Found maximum score.
## [1] 39
# FACTORS
# Create and convert region variable to factor
region <- as.factor(c(1, 2, 1, 2, 2, 1))
levels(region) <- c("Rural", "Urban")
print("Region Variable:")
## [1] "Region Variable:"
print(region)  # Interpretation: Created a region variable and converted it to a factor with Rural and Urban levels.
## [1] Rural Urban Rural Urban Urban Rural
## Levels: Rural Urban
# Convert gender variable to factor
gender_variable <- as.factor(ge_var)
print("Gender Variable as Factor:")
## [1] "Gender Variable as Factor:"
print(gender_variable)  # Interpretation: Converted the gender variable to a factor.
## [1] males  female males  female males  female
## Levels: female males
# Calculate mean, variance, and standard deviation by gender
print("Mean Marks by Gender:")
## [1] "Mean Marks by Gender:"
print(tapply(marks_variable, ge_var, mean))  # Interpretation: Mean marks calculated by gender.
##   female    males 
## 35.33333 35.00000
print("Variance by Gender:")
## [1] "Variance by Gender:"
print(tapply(marks_variable, ge_var, var))   # Interpretation: Variance in marks by gender.
##    female     males 
##  4.333333 16.000000
print("Standard Deviation by Gender:")
## [1] "Standard Deviation by Gender:"
print(tapply(marks_variable, ge_var, sd))    # Interpretation: Standard deviation of marks by gender.
##   female    males 
## 2.081666 4.000000
# MATRICES
# Create matrix "RESULTS" with given vectors
X <- c(12, 15, 19, 13, 17)
Y <- c(10, 13, 19, 18, 12)
Z <- c(15, 19, 20, 12, 16)
W <- c(10, 20, 15, 12, 19)

RESULTS <- matrix(c(X, Y, Z, W), ncol = 4,
                  dimnames = list(c("Peter", "Mary", "Jack", "Jane", "John"),
                                  c("Level I", "Level II", "Level III", "Level IV")))
print("Matrix RESULTS:")
## [1] "Matrix RESULTS:"
print(RESULTS)  # Explanation: Created a matrix named RESULTS representing scores across four levels for five students.
##       Level I Level II Level III Level IV
## Peter      12       10        15       10
## Mary       15       13        19       20
## Jack       19       19        20       15
## Jane       13       18        12       12
## John       17       12        16       19
# Extract specific columns "Level I" and "Level IV" from RESULTS
RESULT_1 <- RESULTS[, c("Level I", "Level IV")]
print("Subset of Results with Level I and Level IV:")
## [1] "Subset of Results with Level I and Level IV:"
print(RESULT_1)  # Explanation: Extracted 'Level I' and 'Level IV' columns for all students.
##       Level I Level IV
## Peter      12       10
## Mary       15       20
## Jack       19       15
## Jane       13       12
## John       17       19
# Extracting results excluding "Level II" and "Level III"
RESULTS_2 <- RESULTS[, !(colnames(RESULTS) %in% c("Level II", "Level III"))]
print("Subset of Results excluding Level II and Level III:")
## [1] "Subset of Results excluding Level II and Level III:"
print(RESULTS_2)  # Explanation: Created a subset of RESULTS excluding 'Level II' and 'Level III'.
##       Level I Level IV
## Peter      12       10
## Mary       15       20
## Jack       19       15
## Jane       13       12
## John       17       19
# Additional specific print statements for further analysis
print(RESULTS[1:4, "Level III"])  # Explanation: Printed scores in 'Level III' for the first four students.
## Peter  Mary  Jack  Jane 
##    15    19    20    12
print(RESULTS["Peter", c("Level II", "Level IV")])  # Explanation: Printed 'Level II' and 'Level IV' scores for Peter.
## Level II Level IV 
##       10       10
# Applying mean calculations on specific subsets
print(apply(RESULTS[c("Peter", "Mary"), c("Level I", "Level IV")], 1, mean))  # Explanation: Calculated mean scores for 'Level I' and 'Level IV' for Peter and Mary.
## Peter  Mary 
##  11.0  17.5
print(apply(RESULTS["Peter", , drop = FALSE], 2, mean))  # Explanation: Calculated mean scores across all levels for Peter.
##   Level I  Level II Level III  Level IV 
##        12        10        15        10
# Calculating total scores for Peter and sum across Level IV for all students
print(sum(RESULTS["Peter", ]))  # Explanation: Calculated the total score for Peter across all levels.
## [1] 47
print(sum(RESULTS[, "Level IV"]))  # Explanation: Calculated the sum of scores in 'Level IV' for all students.
## [1] 76
# DATA FRAMES

# Create the dataframe
x <- c(12, 15, 19, 13, 17)
y <- c(10, 13, 19, 18, 12)
z <- c(15, 19, 20, 12, 16)
w <- c(10, 20, 15, 12, 19)
new_list <- data.frame(x, y, z, w)
print("Initial Dataframe:")
## [1] "Initial Dataframe:"
print(new_list)  # Explanation: Created an initial dataframe with scores across different levels.
##    x  y  z  w
## 1 12 10 15 10
## 2 15 13 19 20
## 3 19 19 20 15
## 4 13 18 12 12
## 5 17 12 16 19
# Renaming columns and rows
colnames(new_list) <- c('level I', 'level II', 'level III', 'level IV')
rownames(new_list) <- c('peter', 'mery', 'jack', 'jane', 'john')
print("Dataframe with Renamed Columns and Rows:")
## [1] "Dataframe with Renamed Columns and Rows:"
print(new_list)  # Explanation: Renamed columns and rows for clarity.
##       level I level II level III level IV
## peter      12       10        15       10
## mery       15       13        19       20
## jack       19       19        20       15
## jane       13       18        12       12
## john       17       12        16       19
# Calculate total marks and add as a new column "Total"
total <- rowSums(new_list)
new_list <- cbind(new_list, Total = total)
print("Dataframe with Total Marks:")
## [1] "Dataframe with Total Marks:"
print(new_list)  # Explanation: Added a new column "Total" with the sum of scores for each student.
##       level I level II level III level IV Total
## peter      12       10        15       10    47
## mery       15       13        19       20    67
## jack       19       19        20       15    73
## jane       13       18        12       12    55
## john       17       12        16       19    64
# Find the student with the highest total marks
max_total <- max(new_list$Total)
max_total
## [1] 73
print("Student with the Highest Total Marks:")
## [1] "Student with the Highest Total Marks:"
print(new_list[new_list$Total == max_total, ])  # Explanation: Identified the student with the highest total marks.
##      level I level II level III level IV Total
## jack      19       19        20       15    73
# Sort the dataframe based on total marks in descending order
sorted_list <- new_list[order(-new_list$Total), ]
print("Sorted Dataframe by Total Marks:")
## [1] "Sorted Dataframe by Total Marks:"
print(sorted_list)  # Explanation: Sorted students based on total marks in descending order.
##       level I level II level III level IV Total
## jack       19       19        20       15    73
## mery       15       13        19       20    67
## john       17       12        16       19    64
## jane       13       18        12       12    55
## peter      12       10        15       10    47
# Update marks for mery in level III with 21
new_list['mery', 'level III'] <- 21
print("Updated Dataframe with Mery's Marks:")
## [1] "Updated Dataframe with Mery's Marks:"
print(new_list)  # Explanation: Updated Mery's marks in Level III to 21.
##       level I level II level III level IV Total
## peter      12       10        15       10    47
## mery       15       13        21       20    67
## jack       19       19        20       15    73
## jane       13       18        12       12    55
## john       17       12        16       19    64
# Create a subset of students with level II marks greater than 16
subset_above_16 <- new_list[new_list$`level II` > 16, ]
print("Subset of Students with Level II > 16:")
## [1] "Subset of Students with Level II > 16:"
print(subset_above_16)  # Explanation: Created a subset of students with scores greater than 16 in Level II.
##      level I level II level III level IV Total
## jack      19       19        20       15    73
## jane      13       18        12       12    55
# Calculate the percentage of marks and add a new column "Percentage"
new_list$Percentage <- rowMeans(new_list) / 20 * 100
print("Dataframe with Percentage Marks:")
## [1] "Dataframe with Percentage Marks:"
print(new_list)  # Explanation: Calculated percentage marks for each student.
##       level I level II level III level IV Total Percentage
## peter      12       10        15       10    47         94
## mery       15       13        21       20    67        136
## jack       19       19        20       15    73        146
## jane       13       18        12       12    55        110
## john       17       12        16       19    64        128
# Create a summary statistics table
summary_table <- summary(new_list)
print("Summary Statistics:")
## [1] "Summary Statistics:"
print(summary_table)  # Explanation: Generated summary statistics for the dataframe.
##     level I        level II      level III       level IV        Total     
##  Min.   :12.0   Min.   :10.0   Min.   :12.0   Min.   :10.0   Min.   :47.0  
##  1st Qu.:13.0   1st Qu.:12.0   1st Qu.:15.0   1st Qu.:12.0   1st Qu.:55.0  
##  Median :15.0   Median :13.0   Median :16.0   Median :15.0   Median :64.0  
##  Mean   :15.2   Mean   :14.4   Mean   :16.8   Mean   :15.2   Mean   :61.2  
##  3rd Qu.:17.0   3rd Qu.:18.0   3rd Qu.:20.0   3rd Qu.:19.0   3rd Qu.:67.0  
##  Max.   :19.0   Max.   :19.0   Max.   :21.0   Max.   :20.0   Max.   :73.0  
##    Percentage   
##  Min.   : 94.0  
##  1st Qu.:110.0  
##  Median :128.0  
##  Mean   :122.8  
##  3rd Qu.:136.0  
##  Max.   :146.0
# Identify and print students with the highest and lowest marks in Level IV
max_level_IV_student <- rownames(new_list)[which.max(new_list$`level IV`)]
min_level_IV_student <- rownames(new_list)[which.min(new_list$`level IV`)]
cat("Student with the Highest Marks in Level IV:", max_level_IV_student, "\n")
## Student with the Highest Marks in Level IV: mery
cat("Student with the Lowest Marks in Level IV:", min_level_IV_student, "\n")
## Student with the Lowest Marks in Level IV: peter