data <- read.csv(“C:/Users/HBE/OneDrive - CAST/CUNY/Bridge Workshop/Homework Materials/R/data.csv”, header = TRUE)
summary(data)
mean_n_factor <- mean(data\(n_factor) median_n_factor <- median(data\)n_factor)
cat(“Mean of n_factor:”, mean_n_factor, “”) cat(“Median of n_factor:”, median_n_factor, “”)
mean_n_logical <- mean(data\(n_logical) median_n_logical <- median(data\)n_logical)
cat(“Mean of n_logical:”, mean_n_logical, “”) cat(“Median of n_logical:”, median_n_logical, “”)
mean_n_numeric <- mean(data\(n_numeric) median_n_numeric <- median(data\)n_numeric)
cat(“Mean of n_numeric:”, mean_n_numeric, “”) cat(“Median of n_numeric:”, mean_n_numeric, “”)
subset_df <- data[data$n_factor > 10, c(“Package”, “Item”, “Title”, “n_factor”, “n_logical”, “n_numeric”)]
filtered_data <- subset_df
new_column_names <- c(“n_fac”, “n_log”, “n_num”)
original_column_names <- names(filtered_data)
updated_column_names <- c(original_column_names[1:3], new_column_names)
names(filtered_data) <- updated_column_names
data_summary <- summary(filtered_data)
print(data_summary)
mean_n_fac <- mean(filtered_data\(n_fac) median_n_fac <- median(filtered_data\)n_fac)
cat(“Mean of n_fac:”, mean_n_fac, “”) cat(“Median of n_fac:”, median_n_fac, “”)
mean_n_log <- mean(filtered_data\(n_log) median_n_log <- median(filtered_data\)n_log)
cat(“Mean of n_log:”, mean_n_log, “”) cat(“Median of n_log:”, mean_n_log, “”)
mean_n_num <- mean(filtered_data\(n_num) median_n_num <- median(filtered_data\)n_num)
cat(“Mean of n_num:”, mean_n_num, “”) cat(“Median of n_num:”, median_n_num, “”)
n_factor_data <- data\(n_factor n_logical_data <- data\)n_logical n_numeric_data <- data$n_numeric
n_fac_filtered <- filtered_data\(n_fac n_log_filtered <- filtered_data\)n_log n_num_filtered <- filtered_data$n_num
wilcox_factor <- wilcox.test(n_factor_data, n_fac_filtered)
wilcox_logical <- wilcox.test(n_logical_data, n_log_filtered)
wilcox_numeric <- wilcox.test(n_numeric_data, n_num_filtered)
cat(“Wilcoxon rank-sum test results for n_factor:”) print(wilcox_factor)
cat(“rank-sum test results for n_logical:”) print(wilcox_logical)
cat(“rank-sum test results for n_numeric:”) print(wilcox_numeric)
filtered_data\(Package <- ifelse(filtered_data\)Package %in% c(“AER”, “Ecdat”, “medicaldata”), c(“REA”, “tadcE”, “atadlacidem”)[match(filtered_data\(Package, c("AER", "Ecdat", "medicaldata"))], filtered_data\)Package)
print(“Step 1: Original dataset”) head(data, 3)
print(“Step 2: ‘filtered_data’ with n_factor > 10”) head(filtered_data, 3)
print(“Step 3.1: Mean and Median of ‘n_fac’ in ‘filtered_data’”) print(mean_n_fac) print(median_n_fac)
print(“Step 3.2: Mean and Median of ‘n_factor’ in ‘data’”) print(mean_n_factor) print(median_n_factor)
print(“Step 4.1: Mean and Median of ‘n_num’ in ‘filtered_data’”) print(mean_n_num) print(median_n_num)
print(“Step 4.2: Mean and Median of ‘n_numeric’ from in ‘data’”) print(mean_n_numeric) print(median_n_numeric)
print(“Step 5.1: Mean and Median of ‘n_log’ in ‘filtered_data’”) print(mean_n_log) print(median_n_log)
print(“Step 5.2: Mean and Median of ‘n_logical’ in ‘data’”) print(mean_n_log) print(median_n_log)
install.packages(“readr”) library(readr)
Sys.setenv(“VROOM_CONNECTION_SIZE” = 1e6)
url <- “https://github.com/hbedros/R_HW2/blob/main/data.csv”
data <- read_csv(url)
print(data)