# Install and load necessary packages
library(randomForest)
library(caret)
library(ggplot2)
# Load the Titanic dataset
data("Titanic")

# Convert the dataset to a data frame
titanic_df <- as.data.frame(Titanic)
head(titanic_df)

Convert ‘Survived’ to a binary factor

set.seed(123)
titanic_df$Survived <- as.factor(ifelse(titanic_df$Survived == "Yes", 1, 0))

splitIndex <- caret::createDataPartition(titanic_df$Survived, p = 0.7, list = FALSE)
train_data <- titanic_df[splitIndex, ]
test_data <- titanic_df[-splitIndex, ]

Split the data into training and testing datasets

set.seed(123)
splitIndex <- caret::createDataPartition(titanic_df$Survived, p = 0.7, list = FALSE)
train_data <- titanic_df[splitIndex, ]
test_data <- titanic_df[-splitIndex, ]
rf_model <- randomForest::randomForest(Survived ~ ., data = train_data, ntree = 100)

# Feature selection using importance measure from the random forest
importance <- randomForest::importance(rf_model)

# Extract feature importance
importance$Feature <- rownames(importance)
Warning: Coercing LHS to a list
# Select features with positive MeanDecreaseGini
selected_features <- importance$Feature[importance$MeanDecreaseGini > 0]

# Subset the data with selected features
train_data_selected <- train_data[, c("Survived", selected_features)]
test_data_selected <- test_data[, c("Survived", selected_features)]

# Extract feature importance
importance <- randomForest::importance(rf_model)
importance$Feature <- rownames(importance)
Warning: Coercing LHS to a list
# Select features with positive MeanDecreaseGini
selected_features <- importance$Feature[importance$MeanDecreaseGini > 0]

# Subset the data with selected features
train_data_selected <- train_data[, c("Survived", selected_features)]
test_data_selected <- test_data[, c("Survived", selected_features)]

top_n <- 4
# Visualize feature importance
# Check the structure of importance object
str(importance)
List of 5
 $        : num 1.79
 $        : num 0.666
 $        : num 1
 $        : num 6.92
 $ Feature: chr [1:4] "Class" "Sex" "Age" "Freq"
# Extract feature importance
importance_df <- data.frame(
  Feature = importance[[5]],
  Importance = unlist(importance[1:4])
)

# Order the data frame by Importance
importance_df <- importance_df[order(-importance_df$Importance), ]

# Keep only the top N features
importance_df <- importance_df[1:top_n, ]


importance_df <- importance_df[1:top_n, ]  # Keep only the top N features

Plot

# Plot the top N important features

# Visualize feature importance
print(ggplot2::ggplot(importance_df, ggplot2::aes(x = reorder(Feature, Importance), y = Importance)) +
      ggplot2::geom_bar(stat = "identity", fill = "skyblue", color = "black") +
      ggplot2::coord_flip() +
      ggplot2::labs(title = paste("Top", top_n, "Important Features from Random Forest"),
           x = "Feature",
           y = "Mean Decrease in Gini"))

LS0tCnRpdGxlOiAiUmFuZG9tIEZvcmVzdCBvbiBUaXRhbmljIERhdGFzZXQiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCmBgYHtyfQojIEluc3RhbGwgYW5kIGxvYWQgbmVjZXNzYXJ5IHBhY2thZ2VzCmxpYnJhcnkocmFuZG9tRm9yZXN0KQpsaWJyYXJ5KGNhcmV0KQpsaWJyYXJ5KGdncGxvdDIpCmBgYAoKCmBgYHtyfQojIExvYWQgdGhlIFRpdGFuaWMgZGF0YXNldApkYXRhKCJUaXRhbmljIikKCiMgQ29udmVydCB0aGUgZGF0YXNldCB0byBhIGRhdGEgZnJhbWUKdGl0YW5pY19kZiA8LSBhcy5kYXRhLmZyYW1lKFRpdGFuaWMpCmhlYWQodGl0YW5pY19kZikKYGBgCgojIENvbnZlcnQgJ1N1cnZpdmVkJyB0byBhIGJpbmFyeSBmYWN0b3IKYGBge3J9CnRpdGFuaWNfZGYkU3Vydml2ZWQgPC0gYXMuZmFjdG9yKGlmZWxzZSh0aXRhbmljX2RmJFN1cnZpdmVkID09ICJZZXMiLCAxLCAwKSkKYGBgCgojIFNwbGl0IHRoZSBkYXRhIGludG8gdHJhaW5pbmcgYW5kIHRlc3RpbmcgZGF0YXNldHMKCmBgYHtyfQpzZXQuc2VlZCgxMjMpCnNwbGl0SW5kZXggPC0gY2FyZXQ6OmNyZWF0ZURhdGFQYXJ0aXRpb24odGl0YW5pY19kZiRTdXJ2aXZlZCwgcCA9IDAuNywgbGlzdCA9IEZBTFNFKQp0cmFpbl9kYXRhIDwtIHRpdGFuaWNfZGZbc3BsaXRJbmRleCwgXQp0ZXN0X2RhdGEgPC0gdGl0YW5pY19kZlstc3BsaXRJbmRleCwgXQpgYGAKCgoKYGBge3J9CnJmX21vZGVsIDwtIHJhbmRvbUZvcmVzdDo6cmFuZG9tRm9yZXN0KFN1cnZpdmVkIH4gLiwgZGF0YSA9IHRyYWluX2RhdGEsIG50cmVlID0gMTAwKQoKIyBGZWF0dXJlIHNlbGVjdGlvbiB1c2luZyBpbXBvcnRhbmNlIG1lYXN1cmUgZnJvbSB0aGUgcmFuZG9tIGZvcmVzdAppbXBvcnRhbmNlIDwtIHJhbmRvbUZvcmVzdDo6aW1wb3J0YW5jZShyZl9tb2RlbCkKCiMgRXh0cmFjdCBmZWF0dXJlIGltcG9ydGFuY2UKaW1wb3J0YW5jZSRGZWF0dXJlIDwtIHJvd25hbWVzKGltcG9ydGFuY2UpCgojIFNlbGVjdCBmZWF0dXJlcyB3aXRoIHBvc2l0aXZlIE1lYW5EZWNyZWFzZUdpbmkKc2VsZWN0ZWRfZmVhdHVyZXMgPC0gaW1wb3J0YW5jZSRGZWF0dXJlW2ltcG9ydGFuY2UkTWVhbkRlY3JlYXNlR2luaSA+IDBdCgojIFN1YnNldCB0aGUgZGF0YSB3aXRoIHNlbGVjdGVkIGZlYXR1cmVzCnRyYWluX2RhdGFfc2VsZWN0ZWQgPC0gdHJhaW5fZGF0YVssIGMoIlN1cnZpdmVkIiwgc2VsZWN0ZWRfZmVhdHVyZXMpXQp0ZXN0X2RhdGFfc2VsZWN0ZWQgPC0gdGVzdF9kYXRhWywgYygiU3Vydml2ZWQiLCBzZWxlY3RlZF9mZWF0dXJlcyldCmBgYAoKCmBgYHtyfQoKIyBFeHRyYWN0IGZlYXR1cmUgaW1wb3J0YW5jZQppbXBvcnRhbmNlIDwtIHJhbmRvbUZvcmVzdDo6aW1wb3J0YW5jZShyZl9tb2RlbCkKaW1wb3J0YW5jZSRGZWF0dXJlIDwtIHJvd25hbWVzKGltcG9ydGFuY2UpCgojIFNlbGVjdCBmZWF0dXJlcyB3aXRoIHBvc2l0aXZlIE1lYW5EZWNyZWFzZUdpbmkKc2VsZWN0ZWRfZmVhdHVyZXMgPC0gaW1wb3J0YW5jZSRGZWF0dXJlW2ltcG9ydGFuY2UkTWVhbkRlY3JlYXNlR2luaSA+IDBdCgojIFN1YnNldCB0aGUgZGF0YSB3aXRoIHNlbGVjdGVkIGZlYXR1cmVzCnRyYWluX2RhdGFfc2VsZWN0ZWQgPC0gdHJhaW5fZGF0YVssIGMoIlN1cnZpdmVkIiwgc2VsZWN0ZWRfZmVhdHVyZXMpXQp0ZXN0X2RhdGFfc2VsZWN0ZWQgPC0gdGVzdF9kYXRhWywgYygiU3Vydml2ZWQiLCBzZWxlY3RlZF9mZWF0dXJlcyldCgp0b3BfbiA8LSA0CiMgVmlzdWFsaXplIGZlYXR1cmUgaW1wb3J0YW5jZQojIENoZWNrIHRoZSBzdHJ1Y3R1cmUgb2YgaW1wb3J0YW5jZSBvYmplY3QKc3RyKGltcG9ydGFuY2UpCgpgYGAKCgpgYGB7cn0KIyBFeHRyYWN0IGZlYXR1cmUgaW1wb3J0YW5jZQppbXBvcnRhbmNlX2RmIDwtIGRhdGEuZnJhbWUoCiAgRmVhdHVyZSA9IGltcG9ydGFuY2VbWzVdXSwKICBJbXBvcnRhbmNlID0gdW5saXN0KGltcG9ydGFuY2VbMTo0XSkKKQoKIyBPcmRlciB0aGUgZGF0YSBmcmFtZSBieSBJbXBvcnRhbmNlCmltcG9ydGFuY2VfZGYgPC0gaW1wb3J0YW5jZV9kZltvcmRlcigtaW1wb3J0YW5jZV9kZiRJbXBvcnRhbmNlKSwgXQoKIyBLZWVwIG9ubHkgdGhlIHRvcCBOIGZlYXR1cmVzCmltcG9ydGFuY2VfZGYgPC0gaW1wb3J0YW5jZV9kZlsxOnRvcF9uLCBdCgoKaW1wb3J0YW5jZV9kZiA8LSBpbXBvcnRhbmNlX2RmWzE6dG9wX24sIF0gICMgS2VlcCBvbmx5IHRoZSB0b3AgTiBmZWF0dXJlcwoKCmBgYAoKIyBQbG90CgpgYGB7cn0KIyBQbG90IHRoZSB0b3AgTiBpbXBvcnRhbnQgZmVhdHVyZXMKCiMgVmlzdWFsaXplIGZlYXR1cmUgaW1wb3J0YW5jZQpwcmludChnZ3Bsb3QyOjpnZ3Bsb3QoaW1wb3J0YW5jZV9kZiwgZ2dwbG90Mjo6YWVzKHggPSByZW9yZGVyKEZlYXR1cmUsIEltcG9ydGFuY2UpLCB5ID0gSW1wb3J0YW5jZSkpICsKICAgICAgZ2dwbG90Mjo6Z2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIGZpbGwgPSAic2t5Ymx1ZSIsIGNvbG9yID0gImJsYWNrIikgKwogICAgICBnZ3Bsb3QyOjpjb29yZF9mbGlwKCkgKwogICAgICBnZ3Bsb3QyOjpsYWJzKHRpdGxlID0gcGFzdGUoIlRvcCIsIHRvcF9uLCAiSW1wb3J0YW50IEZlYXR1cmVzIGZyb20gUmFuZG9tIEZvcmVzdCIpLAogICAgICAgICAgIHggPSAiRmVhdHVyZSIsCiAgICAgICAgICAgeSA9ICJNZWFuIERlY3JlYXNlIGluIEdpbmkiKSkKCmBgYAoKCgo=