# Convert relevant character columns to factors with specific levels where applicable
data <- data %>%
mutate(
Fever_Severity = factor(Fever_Severity, levels = c("Normal", "Mild Fever", "High Fever")),# Convert to a factor
Gender = factor(Gender), # Convert 'Gender' to a factor
Headache = factor(Headache, levels = c("No", "Yes")), # Convert to a factor
Body_Ache = factor(Body_Ache, levels = c("No", "Yes")), # Convert to a factor
Fatigue = factor(Fatigue, levels = c("No", "Yes")), # Convert to a factor
Chronic_Conditions = factor(Chronic_Conditions, levels = c("No", "Yes")), # Convert to a factor
Allergies = factor(Allergies, levels = c("No", "Yes")), # Convert to a factor
Smoking_History = factor(Smoking_History, levels = c("No", "Yes")), # Convert to a factor
Alcohol_Consumption = factor(Alcohol_Consumption, levels = c("No", "Yes")), # Convert to a factor
# 'Physical_Activity' is an ordered factor
Physical_Activity = factor(Physical_Activity, levels = c("Sedentary", "Moderate", "Active")),
Diet_Type = factor(Diet_Type), # Convert 'Diet_Type' to a factor without order
Blood_Pressure = factor(Blood_Pressure, levels = c("Low", "Normal", "High")), # Convert to an ordered factor
Previous_Medication = factor(Previous_Medication), # Convert to a factor, same as diet type, no order
Recommended_Medication = factor(Recommended_Medication) # Convert to a factor (target variable), no order
)
# Check for any missing values in each column; this will return a named vector with counts of NA values
missing_counts <- sapply(data, function(x) sum(is.na(x)))
missing_counts # Print out the missing value counts for review (nothing was missing)
## Temperature Fever_Severity Age
## 0 0 0
## Gender BMI Headache
## 0 0 0
## Body_Ache Fatigue Chronic_Conditions
## 0 0 0
## Allergies Smoking_History Alcohol_Consumption
## 0 0 0
## Humidity AQI Physical_Activity
## 0 0 0
## Diet_Type Heart_Rate Blood_Pressure
## 0 0 0
## Previous_Medication Recommended_Medication
## 0 0