R Code
#Question 6. Construct a web graph (or spider graph) of the categorical variables. Fine-tune the graph so that interesting results emerge. Discuss your findings.
install.packages("fmsb")
install.packages("readxl")
install.packages("dplyr")
library(fmsb)
library(readxl)
library(dplyr)
df <- read_excel("C:/Users/Kadeem Green/Downloads/spreadsheet_full.xlsx")
categorical_vars <- c("workclass", "education", "marital-status", "occupation",
"relationship", "race", "sex", "native-country", "income")
df[categorical_vars] <- lapply(df[categorical_vars], as.factor)
plot_radar_chart <- function(var_name) {
category_freq <- prop.table(table(df[[var_name]])) # Correct frequency calculation
category_freq <- as.data.frame(category_freq)
colnames(category_freq) <- c("Category", "Frequency")
max_val <- max(category_freq$Frequency, na.rm = TRUE)
min_val <- min(category_freq$Frequency, na.rm = TRUE)
max_values <- rep(max_val, nrow(category_freq)) # Ensure proper max scaling
min_values <- rep(0, nrow(category_freq)) # Min is always 0
radar_data <- rbind(max_values, min_values, t(category_freq$Frequency))
colnames(radar_data) <- category_freq$Category
radar_data <- as.data.frame(radar_data)
dev.new()
radarchart(radar_data,
axistype = 1,
pcol = "blue", # Line color
pfcol = rgb(0.2, 0.5, 0.8, 0.5), # Fill color
plwd = 3, # Line width
cglcol = "grey", # Grid color
cglty = 1, # Grid line type
axislabcol = "black",
caxislabels = round(seq(0, max_val, length.out = 5), 2), # Properly formatted axis labels
title = paste("Radar Chart of", var_name))
legend("topright", legend = paste("Categories of", var_name), col = "blue", lwd = 3)
}
***************************************************************************************************
#Question 9. Construct a histogram of each numerical variables, with an overlay of the target variable income normalize if necessary.
library(tidyverse)
age <- read.csv("C:/Users/Kadeem Green/Downloads/Adult.txt", stringsAsFactors = TRUE)
head(age)
str(age)
num_vars <- age %>% select_if(is.numeric) %>% names()
if(!is.factor(age$income)) {
age$income <- as.factor(age$income)
}
normalize <- function(x) {
return((x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE)))
}
age_normalized <- age %>%
mutate(across(all_of(num_vars), normalize))
for (var in num_vars) {
print(
ggplot(age, aes_string(x = var, fill = "income")) +
geom_histogram(alpha = 0.5, position = "identity", bins = 30) +
labs(title = paste("Histogram of", var, "by Income"),
x = var,
y = "Count") +
theme_minimal()
)
}
*****************************************************************************************************
#Question 10 For each pair of numerical variables, construct a scatter plot of the variables. Discuss your salient results.
library(ggplot2)
df <- read.csv("C:/Users/Kadeem Green/Downloads/spreadsheet_full.xlsx", stringsAsFactors = FALSE)
df$age <- as.numeric(df$age)
df$demogweight <- as.numeric(df$demogweight)
df$education_num <- as.numeric(df$education.num) # Ensure correct column name
df$capital_gain <- as.numeric(df$capital.gain)
df$capital_loss <- as.numeric(df$capital.loss)
df$hours_per_week <- as.numeric(df$hours.per.week)
df <- as.data.frame(df)
plot(df$age, df$hours_per_week,
main="Scatterplot of Age vs Hours per Week",
xlab="Age",
ylab="Hours Per Week",
pch=19, col="blue")
plot(df$education_num, df$capital_gain,
main="Scatterplot of Education Number vs Capital Gain",
xlab="Education Number",
ylab="Capital Gain",
pch=19, col="red")
plot(df$capital_gain, df$capital_loss,
main="Scatterplot of Capital Gain vs Capital Loss",
xlab="Capital Gain",
ylab="Capital Loss",
pch=19, col="green")
plot(df$hours_per_week, df$demogweight,
main="Scatterplot of Hours per Week vs Demographic Weight",
xlab="Hours Per Week",
ylab="Demographic Weight",
pch=19, col="purple")
plot(df$capital_gain, df$age,
main="Scatterplot of Capital Gain vs Age",
xlab="Capital Gain",
ylab="Age",
pch=19, col="orange")
```
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NClIgQ29kZSANCiNRdWVzdGlvbiA2LiBDb25zdHJ1Y3QgYSB3ZWIgZ3JhcGggKG9yIHNwaWRlciBncmFwaCkgb2YgdGhlIGNhdGVnb3JpY2FsIHZhcmlhYmxlcy4gRmluZS10dW5lIHRoZSBncmFwaCBzbyB0aGF0IGludGVyZXN0aW5nIHJlc3VsdHMgZW1lcmdlLiBEaXNjdXNzIHlvdXIgZmluZGluZ3MuDQoNCg0KDQppbnN0YWxsLnBhY2thZ2VzKCJmbXNiIikgICANCmluc3RhbGwucGFja2FnZXMoInJlYWR4bCIpIA0KaW5zdGFsbC5wYWNrYWdlcygiZHBseXIiKSAgDQoNCg0KbGlicmFyeShmbXNiKQ0KbGlicmFyeShyZWFkeGwpDQpsaWJyYXJ5KGRwbHlyKQ0KDQoNCmRmIDwtIHJlYWRfZXhjZWwoIkM6L1VzZXJzL0thZGVlbSBHcmVlbi9Eb3dubG9hZHMvc3ByZWFkc2hlZXRfZnVsbC54bHN4IikNCg0KDQpjYXRlZ29yaWNhbF92YXJzIDwtIGMoIndvcmtjbGFzcyIsICJlZHVjYXRpb24iLCAibWFyaXRhbC1zdGF0dXMiLCAib2NjdXBhdGlvbiIsIA0KICAgICAgICAgICAgICAgICAgICAgICJyZWxhdGlvbnNoaXAiLCAicmFjZSIsICJzZXgiLCAibmF0aXZlLWNvdW50cnkiLCAiaW5jb21lIikNCg0KZGZbY2F0ZWdvcmljYWxfdmFyc10gPC0gbGFwcGx5KGRmW2NhdGVnb3JpY2FsX3ZhcnNdLCBhcy5mYWN0b3IpDQoNCg0KcGxvdF9yYWRhcl9jaGFydCA8LSBmdW5jdGlvbih2YXJfbmFtZSkgew0KICANCiANCiAgY2F0ZWdvcnlfZnJlcSA8LSBwcm9wLnRhYmxlKHRhYmxlKGRmW1t2YXJfbmFtZV1dKSkgICMgQ29ycmVjdCBmcmVxdWVuY3kgY2FsY3VsYXRpb24NCiAgY2F0ZWdvcnlfZnJlcSA8LSBhcy5kYXRhLmZyYW1lKGNhdGVnb3J5X2ZyZXEpDQogIGNvbG5hbWVzKGNhdGVnb3J5X2ZyZXEpIDwtIGMoIkNhdGVnb3J5IiwgIkZyZXF1ZW5jeSIpDQogIA0KICANCiAgbWF4X3ZhbCA8LSBtYXgoY2F0ZWdvcnlfZnJlcSRGcmVxdWVuY3ksIG5hLnJtID0gVFJVRSkNCiAgbWluX3ZhbCA8LSBtaW4oY2F0ZWdvcnlfZnJlcSRGcmVxdWVuY3ksIG5hLnJtID0gVFJVRSkNCiAgDQogDQogIG1heF92YWx1ZXMgPC0gcmVwKG1heF92YWwsIG5yb3coY2F0ZWdvcnlfZnJlcSkpICAjIEVuc3VyZSBwcm9wZXIgbWF4IHNjYWxpbmcNCiAgbWluX3ZhbHVlcyA8LSByZXAoMCwgbnJvdyhjYXRlZ29yeV9mcmVxKSkgICMgTWluIGlzIGFsd2F5cyAwDQogIA0KICANCiAgcmFkYXJfZGF0YSA8LSByYmluZChtYXhfdmFsdWVzLCBtaW5fdmFsdWVzLCB0KGNhdGVnb3J5X2ZyZXEkRnJlcXVlbmN5KSkNCiAgY29sbmFtZXMocmFkYXJfZGF0YSkgPC0gY2F0ZWdvcnlfZnJlcSRDYXRlZ29yeQ0KICByYWRhcl9kYXRhIDwtIGFzLmRhdGEuZnJhbWUocmFkYXJfZGF0YSkNCiAgDQogIA0KICBkZXYubmV3KCkgIA0KICANCiANCiAgcmFkYXJjaGFydChyYWRhcl9kYXRhLA0KICAgICAgICAgICAgIGF4aXN0eXBlID0gMSwgDQogICAgICAgICAgICAgcGNvbCA9ICJibHVlIiwgICMgTGluZSBjb2xvcg0KICAgICAgICAgICAgIHBmY29sID0gcmdiKDAuMiwgMC41LCAwLjgsIDAuNSksICAjIEZpbGwgY29sb3INCiAgICAgICAgICAgICBwbHdkID0gMywgICAgICAgICMgTGluZSB3aWR0aA0KICAgICAgICAgICAgIGNnbGNvbCA9ICJncmV5IiwgIyBHcmlkIGNvbG9yDQogICAgICAgICAgICAgY2dsdHkgPSAxLCAgICAgICAjIEdyaWQgbGluZSB0eXBlDQogICAgICAgICAgICAgYXhpc2xhYmNvbCA9ICJibGFjayIsDQogICAgICAgICAgICAgY2F4aXNsYWJlbHMgPSByb3VuZChzZXEoMCwgbWF4X3ZhbCwgbGVuZ3RoLm91dCA9IDUpLCAyKSwgIyBQcm9wZXJseSBmb3JtYXR0ZWQgYXhpcyBsYWJlbHMNCiAgICAgICAgICAgICB0aXRsZSA9IHBhc3RlKCJSYWRhciBDaGFydCBvZiIsIHZhcl9uYW1lKSkNCiAgDQogIGxlZ2VuZCgidG9wcmlnaHQiLCBsZWdlbmQgPSBwYXN0ZSgiQ2F0ZWdvcmllcyBvZiIsIHZhcl9uYW1lKSwgY29sID0gImJsdWUiLCBsd2QgPSAzKQ0KfQ0KDQoqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioNCiAgDQogICNRdWVzdGlvbiA5LiBDb25zdHJ1Y3QgYSBoaXN0b2dyYW0gb2YgZWFjaCBudW1lcmljYWwgdmFyaWFibGVzLCB3aXRoIGFuIG92ZXJsYXkgb2YgdGhlIHRhcmdldCB2YXJpYWJsZSBpbmNvbWUgbm9ybWFsaXplIGlmIG5lY2Vzc2FyeS4NCg0KDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCg0KYWdlIDwtIHJlYWQuY3N2KCJDOi9Vc2Vycy9LYWRlZW0gR3JlZW4vRG93bmxvYWRzL0FkdWx0LnR4dCIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBUUlVFKQ0KDQoNCmhlYWQoYWdlKQ0KDQoNCnN0cihhZ2UpDQoNCm51bV92YXJzIDwtIGFnZSAlPiUgc2VsZWN0X2lmKGlzLm51bWVyaWMpICU+JSBuYW1lcygpDQoNCg0KaWYoIWlzLmZhY3RvcihhZ2UkaW5jb21lKSkgew0KICBhZ2UkaW5jb21lIDwtIGFzLmZhY3RvcihhZ2UkaW5jb21lKQ0KfQ0KDQpub3JtYWxpemUgPC0gZnVuY3Rpb24oeCkgew0KICByZXR1cm4oKHggLSBtaW4oeCwgbmEucm0gPSBUUlVFKSkgLyAobWF4KHgsIG5hLnJtID0gVFJVRSkgLSBtaW4oeCwgbmEucm0gPSBUUlVFKSkpDQp9DQoNCg0KYWdlX25vcm1hbGl6ZWQgPC0gYWdlICU+JQ0KICBtdXRhdGUoYWNyb3NzKGFsbF9vZihudW1fdmFycyksIG5vcm1hbGl6ZSkpDQoNCg0KZm9yICh2YXIgaW4gbnVtX3ZhcnMpIHsNCiAgcHJpbnQoDQogICAgZ2dwbG90KGFnZSwgYWVzX3N0cmluZyh4ID0gdmFyLCBmaWxsID0gImluY29tZSIpKSArDQogICAgICBnZW9tX2hpc3RvZ3JhbShhbHBoYSA9IDAuNSwgcG9zaXRpb24gPSAiaWRlbnRpdHkiLCBiaW5zID0gMzApICsNCiAgICAgIGxhYnModGl0bGUgPSBwYXN0ZSgiSGlzdG9ncmFtIG9mIiwgdmFyLCAiYnkgSW5jb21lIiksDQogICAgICAgICAgIHggPSB2YXIsDQogICAgICAgICAgIHkgPSAiQ291bnQiKSArDQogICAgICB0aGVtZV9taW5pbWFsKCkNCiAgKQ0KfQ0KDQoqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKioqKg0KICANCiNRdWVzdGlvbiAxMCBGb3IgZWFjaCBwYWlyIG9mIG51bWVyaWNhbCB2YXJpYWJsZXMsIGNvbnN0cnVjdCBhIHNjYXR0ZXIgcGxvdCBvZiB0aGUgdmFyaWFibGVzLiBEaXNjdXNzIHlvdXIgc2FsaWVudCByZXN1bHRzLg0KIGxpYnJhcnkoZ2dwbG90MikNCg0KDQpkZiA8LSByZWFkLmNzdigiQzovVXNlcnMvS2FkZWVtIEdyZWVuL0Rvd25sb2Fkcy9zcHJlYWRzaGVldF9mdWxsLnhsc3giLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpDQoNCg0KZGYkYWdlIDwtIGFzLm51bWVyaWMoZGYkYWdlKQ0KZGYkZGVtb2d3ZWlnaHQgPC0gYXMubnVtZXJpYyhkZiRkZW1vZ3dlaWdodCkNCmRmJGVkdWNhdGlvbl9udW0gPC0gYXMubnVtZXJpYyhkZiRlZHVjYXRpb24ubnVtKSAgIyBFbnN1cmUgY29ycmVjdCBjb2x1bW4gbmFtZQ0KZGYkY2FwaXRhbF9nYWluIDwtIGFzLm51bWVyaWMoZGYkY2FwaXRhbC5nYWluKQ0KZGYkY2FwaXRhbF9sb3NzIDwtIGFzLm51bWVyaWMoZGYkY2FwaXRhbC5sb3NzKQ0KZGYkaG91cnNfcGVyX3dlZWsgPC0gYXMubnVtZXJpYyhkZiRob3Vycy5wZXIud2VlaykNCg0KDQpkZiA8LSBhcy5kYXRhLmZyYW1lKGRmKQ0KDQoNCnBsb3QoZGYkYWdlLCBkZiRob3Vyc19wZXJfd2VlaywgDQogICAgIG1haW49IlNjYXR0ZXJwbG90IG9mIEFnZSB2cyBIb3VycyBwZXIgV2VlayIsDQogICAgIHhsYWI9IkFnZSIsIA0KICAgICB5bGFiPSJIb3VycyBQZXIgV2VlayIsIA0KICAgICBwY2g9MTksIGNvbD0iYmx1ZSIpDQoNCg0KcGxvdChkZiRlZHVjYXRpb25fbnVtLCBkZiRjYXBpdGFsX2dhaW4sIA0KICAgICBtYWluPSJTY2F0dGVycGxvdCBvZiBFZHVjYXRpb24gTnVtYmVyIHZzIENhcGl0YWwgR2FpbiIsDQogICAgIHhsYWI9IkVkdWNhdGlvbiBOdW1iZXIiLCANCiAgICAgeWxhYj0iQ2FwaXRhbCBHYWluIiwgDQogICAgIHBjaD0xOSwgY29sPSJyZWQiKQ0KDQpwbG90KGRmJGNhcGl0YWxfZ2FpbiwgZGYkY2FwaXRhbF9sb3NzLCANCiAgICAgbWFpbj0iU2NhdHRlcnBsb3Qgb2YgQ2FwaXRhbCBHYWluIHZzIENhcGl0YWwgTG9zcyIsDQogICAgIHhsYWI9IkNhcGl0YWwgR2FpbiIsIA0KICAgICB5bGFiPSJDYXBpdGFsIExvc3MiLCANCiAgICAgcGNoPTE5LCBjb2w9ImdyZWVuIikNCg0KDQpwbG90KGRmJGhvdXJzX3Blcl93ZWVrLCBkZiRkZW1vZ3dlaWdodCwgDQogICAgIG1haW49IlNjYXR0ZXJwbG90IG9mIEhvdXJzIHBlciBXZWVrIHZzIERlbW9ncmFwaGljIFdlaWdodCIsDQogICAgIHhsYWI9IkhvdXJzIFBlciBXZWVrIiwgDQogICAgIHlsYWI9IkRlbW9ncmFwaGljIFdlaWdodCIsIA0KICAgICBwY2g9MTksIGNvbD0icHVycGxlIikNCg0KDQpwbG90KGRmJGNhcGl0YWxfZ2FpbiwgZGYkYWdlLCANCiAgICAgbWFpbj0iU2NhdHRlcnBsb3Qgb2YgQ2FwaXRhbCBHYWluIHZzIEFnZSIsDQogICAgIHhsYWI9IkNhcGl0YWwgR2FpbiIsIA0KICAgICB5bGFiPSJBZ2UiLCANCiAgICAgcGNoPTE5LCBjb2w9Im9yYW5nZSIpDQpgYGANCg0KXGBcYFxgDQo=