# ----------------------------------------------------------
# 1. Create Hospital Data Frame (Mixed Data Types)
# ----------------------------------------------------------
hospital <- data.frame(
PatientID = c(101, 102, 103, 104, 105),
PatientName = c("Aman", "Riya", "Karan", "Neha", "Rohit"),
Gender = factor(c("Male", "Female", "Male", "Female", "Male")),
Ward = factor(c("General", "ICU", "General", "Private", "ICU")),
TreatmentCost = c(12000, 25000, 15000, 30000, NA),
MedicineCost = c(3000, 5000, 4000, 6000, 4500),
DaysAdmitted = c(3, 5, 2, 6, 4)
)
# View data frame
hospital
# ----------------------------------------------------------
# 2. Basic Information About Data Frame
# ----------------------------------------------------------
str(hospital) # Structure of data frame
summary(hospital) # Summary statistics
dim(hospital) # Dimensions
nrow(hospital) # Number of rows
ncol(hospital) # Number of columns
colnames(hospital) # Column names
# ----------------------------------------------------------
# 3. Access Column Data Using $ Operator
# ----------------------------------------------------------
hospital$PatientName
hospital$TreatmentCost
hospital[, c("TreatmentCost", "MedicineCost")]
# ----------------------------------------------------------
# 4. Add a New Column (Total Bill)
# ----------------------------------------------------------
hospital$TotalBill <-hospital$TreatmentCost + hospital$MedicineCost
#As in one of the cell there is NA so use either of the following method
#1. Use rowSums()
hospital$TotalBill <- rowSums(
hospital[, c("TreatmentCost", "MedicineCost")],
na.rm = TRUE
)
#2. Replace NA with 0
hospital$TotalBill <-
ifelse(is.na(hospital$TreatmentCost), 0, hospital$TreatmentCost) +
ifelse(is.na(hospital$MedicineCost), 0, hospital$MedicineCost)
# ----------------------------------------------------------
# 5. Add a New Row
# ----------------------------------------------------------
new_patient <- data.frame(
PatientID = 106,
PatientName = "Simran",
Gender = factor("Female", levels = levels(hospital$Gender)),
Ward = factor("Private", levels = levels(hospital$Ward)),
TreatmentCost = 20000,
MedicineCost = 5500,
DaysAdmitted = 3
)
new_patient$TotalBill <- new_patient$TreatmentCost + new_patient$MedicineCost
hospital <- rbind(hospital, new_patient)
# ----------------------------------------------------------
# 6. Add a Column at a Specific Position
# (DoctorFee after MedicineCost)
# ----------------------------------------------------------
hospital <- cbind(
hospital[, 1:6],
DoctorFee = c(2000, 3000, 2500, 3500, 3000, 2800),
hospital[, 7:8]
)
#Use the following method for dymically adding new column
# Find index of DoctorFee column dynamically
doctor_fee_index <- which(colnames(hospital) == "DoctorFee")
# Find index of last column
last_col_index <- ncol(hospital)
hospital <- cbind(
hospital[, 1:doctor_fee_index], # columns up to DoctorFee
NursingFee = c(1000, 1500, 1200, 1800, 1300, 1200),
hospital[, (doctor_fee_index + 1):last_col_index] # remaining columns
)
#What if DoctorFee is the LAST column?
doctor_fee_index <- which(colnames(hospital) == "DoctorFee")
last_col_index <- ncol(hospital)
if (doctor_fee_index == last_col_index) {
hospital <- cbind(
hospital,
NursingFee = c(1000, 1500, 1200, 1800, 1300)
)
} else {
hospital <- cbind(
hospital[, 1:doctor_fee_index],
NursingFee = c(1000, 1500, 1200, 1800, 1300),
hospital[, (doctor_fee_index + 1):last_col_index]
)
}
# ----------------------------------------------------------
# 7. Add a Row at a Specific Position (3rd Row)
# ----------------------------------------------------------
temp_row <- hospital[3, ]
temp_row$PatientID <- 107
temp_row$PatientName <- "Arjun"
hospital <- rbind(
hospital[1:2, ],
temp_row,
hospital[3:nrow(hospital), ]
)
rownames(hospital)<-NULL
# ----------------------------------------------------------
# 8. Update / Modify Data
# ----------------------------------------------------------
hospital$DaysAdmitted[hospital$PatientName == "Riya"] <- 6
# ----------------------------------------------------------
# 9. Update / Modify Data Using ifelse()
# ----------------------------------------------------------
hospital$PatientType <- ifelse(
hospital$Ward == "ICU",
"Critical",
"Normal"
)
# ----------------------------------------------------------
# 10. Remove a Column
# ----------------------------------------------------------
hospital$DoctorFee <- NULL
# ----------------------------------------------------------
# 11. Remove a Row
# ----------------------------------------------------------
hospital <- hospital[hospital$PatientName != "Arjun", ]
# ----------------------------------------------------------
# 12. Filtering / Subsetting Data
# ----------------------------------------------------------
# ICU patients
icu_patients <- subset(hospital, Ward == "ICU")
# Patients with Total Bill > 20000
high_bill_patients <- hospital[hospital$TotalBill > 20000, ]
# ----------------------------------------------------------
# 13. Scenario-Based Basic Analysis
# ----------------------------------------------------------
# Row-wise mean cost per patient
rowMeans(hospital[, c("TreatmentCost", "MedicineCost")], na.rm = TRUE)
# Total treatment cost of hospital
sum(hospital$TreatmentCost, na.rm = TRUE)
# Average medicine cost
mean(hospital$MedicineCost)
# ----------------------------------------------------------
# 14. Handling Missing Data (NA)
# ----------------------------------------------------------
any(is.na(hospital)) # Check if any NA exists
colSums(is.na(hospital)) # NA count per column
which(is.na(hospital), arr.ind=TRUE) # Location of NA
# Replace NA in TreatmentCost with column mean
hospital$TreatmentCost[is.na(hospital$TreatmentCost)] <-
mean(hospital$TreatmentCost, na.rm = TRUE)
# ----------------------------------------------------------
# 15. Sorting Data Using order()
# ----------------------------------------------------------
# Ascending order of TotalBill
hospital[order(hospital$TotalBill), ]
# Descending order of TotalBill
hospital[order(-hospital$TotalBill), ]
# ----------------------------------------------------------
# 16. Calculate Total for a Specific Row (Row Number)
# ----------------------------------------------------------
hospital$TotalBill[2] <-
hospital$TreatmentCost[2] + hospital$MedicineCost[2]
# ----------------------------------------------------------
# 17. Calculate Total Using with()
# ----------------------------------------------------------
hospital$TotalBill[3] <-
with(hospital, TreatmentCost[3] + MedicineCost[3])
# ----------------------------------------------------------
# 18. Calculate Total Using Condition (Patient Name)
# ----------------------------------------------------------
hospital$TotalBill[hospital$PatientName == "Neha"] <-
hospital$TreatmentCost[hospital$PatientName == "Neha"] +
hospital$MedicineCost[hospital$PatientName == "Neha"]
# ----------------------------------------------------------
# 19. Further Relevant Hospital Analysis
# ----------------------------------------------------------
# Total revenue by ward
aggregate(TotalBill ~ Ward, data = hospital, sum)
# Average days admitted by ward
aggregate(DaysAdmitted ~ Ward, data = hospital, mean)
# Number of patients per ward
table(hospital$Ward)
# Check overall NA
any(is.na(hospital))
# Count NA by column
colSums(is.na(hospital))
# Location of any NA
which(is.na(hospital), arr.ind = TRUE)
# Replace NA in TreatmentCost with mean of TreatmentCost
hospital$TreatmentCost[is.na(hospital$TreatmentCost)] <-
mean(hospital$TreatmentCost, na.rm = TRUE)
# Confirm replacement
colSums(is.na(hospital))
# ----------------------------------------------------------
# 4. Add Calculated Columns
# ----------------------------------------------------------
# Add TotalBill = TreatmentCost + MedicineCost
hospital$TotalBill <- hospital$TreatmentCost + hospital$MedicineCost
# View updated
print(hospital)
# ----------------------------------------------------------
# 5. Count Patients by Ward
# ----------------------------------------------------------
# Tabular count
ward_counts <- table(hospital$Ward)
print(ward_counts)
# Number of ICU patients
icu_count <- sum(hospital$Ward == "ICU")
print(icu_count)
# Number of Private ward patients
private_count <- sum(hospital$Ward == "Private")
print(private_count)
# ICU + Private
icu_private_count <- sum(hospital$Ward %in% c("ICU", "Private"))
print(icu_private_count)
# ----------------------------------------------------------
# 6. Count Patients by Gender
# ----------------------------------------------------------
gender_counts <- table(hospital$Gender)
print(gender_counts)
male_count <- sum(hospital$Gender == "Male")
female_count <- sum(hospital$Gender == "Female")
print(male_count)
print(female_count)
# ----------------------------------------------------------
# 7. Cross Tabulation: Gender vs Ward
# ----------------------------------------------------------
gender_ward_table <- table(hospital$Gender, hospital$Ward)
print(gender_ward_table)
# ----------------------------------------------------------
# 8. Percentage Distribution
# ----------------------------------------------------------
# Percentage of ICU patients
perc_icu <- (icu_count / nrow(hospital)) * 100
print(perc_icu)
# Percentage of Female patients
perc_female <- (female_count / nrow(hospital)) * 100
print(perc_female)
# ----------------------------------------------------------
# 9. Additional Business Insight
# ----------------------------------------------------------
# Average Treatment Cost
avg_treatment <- mean(hospital$TreatmentCost)
print(avg_treatment)
# Average Medicine Cost
avg_medicine <- mean(hospital$MedicineCost)
print(avg_medicine)
# Average TotalBill
avg_total_bill <- mean(hospital$TotalBill)
print(avg_total_bill)
# Average days admitted
avg_days <- mean(hospital$DaysAdmitted)
print(avg_days)
# Total revenue (sum of TotalBill)
total_revenue <- sum(hospital$TotalBill)
print(total_revenue)
# ----------------------------------------------------------
# 10. Patients with Above-Average TotalBill
# ----------------------------------------------------------
above_avg <- subset(hospital, TotalBill > avg_total_bill)
print(above_avg)
# ----------------------------------------------------------
# 11. Patients with Short Stay (< 4 days)
# ----------------------------------------------------------
short_stay <- subset(hospital, DaysAdmitted < 4)
print(short_stay)
# ----------------------------------------------------------
# 12. Identify High Billing Patients
# ----------------------------------------------------------
high_bill <- subset(hospital, TotalBill > 20000)
print(high_bill)
# ----------------------------------------------------------
# 13. Find Patient with Max TotalBill
# ----------------------------------------------------------
max_bill_value <- max(hospital$TotalBill)
max_bill_patient <- hospital[hospital$TotalBill == max_bill_value, ]
print(max_bill_patient)
# ----------------------------------------------------------
# 14. Sort Data Based on TotalBill
# ----------------------------------------------------------
# Ascending
asc_sorted <- hospital[order(hospital$TotalBill), ]
print(asc_sorted)
# Descending
desc_sorted <- hospital[order(-hospital$TotalBill), ]
print(desc_sorted)
# ----------------------------------------------------------
# 15. Row-wise Mean of Costs
# ----------------------------------------------------------
row_means <- rowMeans(hospital[, c("TreatmentCost", "MedicineCost", "TotalBill")])
print(row_means)
# ----------------------------------------------------------
# 16. Column-wise Mean and Sum of Costs
# ----------------------------------------------------------
col_means <- colMeans(hospital[, c("TreatmentCost", "MedicineCost", "TotalBill")])
print(col_means)
col_sums <- colSums(hospital[, c("TreatmentCost", "MedicineCost", "TotalBill")])
print(col_sums)