mydata <- read_excel(“heart_data.xlsx”)
head(mydata)
str(mydata)
names(mydata)
head(mydata, 15)
classify_age <- function(age) { if (age >= 60) { return(“Senior”) } else { return(“Adult”) } }
age_group <- sapply(mydata$Age, classify_age) head(age_group)
mydata$AgeGroup <- age_group head(mydata)
seniors <- mydata[mydata$Age >= 60, ] head(seniors)
high_cholesterol <- mydata[mydata$Cholesterol > 200, ] head(high_cholesterol)
dependent_var <- mydata$HeartDisease independent_vars <- mydata[, c(“Age”, “Cholesterol”, “RestingBP”)] new_df <- data.frame(dependent_var, independent_vars) colnames(new_df)[1] <- “HeartDisease” head(new_df)
colSums(is.na(mydata)) mydata_clean <- na.omit(mydata) nrow(mydata) # Before nrow(mydata_clean) # After
sum(duplicated(mydata_clean)) mydata_clean <- mydata_clean[!duplicated(mydata_clean), ]
mydata_clean\(Age <- as.numeric(mydata_clean\)Age) ordered_data <- mydata_clean[order(-mydata_clean$Age), ] head(ordered_data)
colnames(mydata_clean)[colnames(mydata_clean) == “RestingBP”] <- “Resting_BP” colnames(mydata_clean)[colnames(mydata_clean) == “HeartDisease”] <- “Heart_Disease” names(mydata_clean)
mydata_clean\(Cholesterol <- as.numeric(mydata_clean\)Cholesterol) mydata_clean\(Cholesterol_Double <- mydata_clean\)Cholesterol * 2 head(mydata_clean)
set.seed(123) n <- nrow(mydata_clean) train_indices <- sample(1:n, size = 0.7 * n) training_set <- mydata_clean[train_indices, ] testing_set <- mydata_clean[-train_indices, ]
summary(mydata_clean) summary(mydata_clean$Age)
mean(mydata_clean\(Age, na.rm = TRUE) median(mydata_clean\)Age, na.rm = TRUE)
get_mode <- function(v) { uniqv <- unique(v) uniqv[which.max(tabulate(match(v, uniqv)))] } get_mode(mydata_clean$Age)
range(mydata_clean$Age, na.rm = TRUE)
plot(mydata_clean\(Age, mydata_clean\)Cholesterol, main = “Age vs Cholesterol”, xlab = “Age”, ylab = “Cholesterol”, col = “blue”, pch = 19)
avg_cholesterol <- tapply(mydata_clean\(Cholesterol, mydata_clean\)Sex, mean, na.rm = TRUE)
barplot(avg_cholesterol, main = “Average Cholesterol by Sex”, ylab = “Cholesterol”, col = “green”)
cor(mydata_clean\(Age, mydata_clean\)Cholesterol, use = “complete.obs”)
model <- lm(Cholesterol ~ Age, data = mydata_clean) summary(model)
plot(mydata_clean\(Age, mydata_clean\)Cholesterol, main = “Age vs Cholesterol with Regression Line”, xlab = “Age”, ylab = “Cholesterol”, pch = 19) abline(model, col = “red”, lwd = 2)
save.image(“Assignment_workspace.RData”)