# ----------------------------------------------------------
# 1. Create Hospital Data Frame (Mixed Data Types)
# ----------------------------------------------------------
hospital <- data.frame(
  PatientID = c(101, 102, 103, 104, 105),
  PatientName = c("Aman", "Riya", "Karan", "Neha", "Rohit"),
  Gender = factor(c("Male", "Female", "Male", "Female", "Male")),
  Ward = factor(c("General", "ICU", "General", "Private", "ICU")),
  TreatmentCost = c(12000, 25000, 15000, 30000, NA),
  MedicineCost = c(3000, 5000, 4000, 6000, 4500),
  DaysAdmitted = c(3, 5, 2, 6, 4)
)
# View data frame
hospital
# ----------------------------------------------------------
# 2. Basic Information About Data Frame
# ----------------------------------------------------------
str(hospital)        # Structure of data frame
summary(hospital)    # Summary statistics
dim(hospital)        # Dimensions
nrow(hospital)       # Number of rows
ncol(hospital)       # Number of columns
colnames(hospital)   # Column names

# ----------------------------------------------------------
# 3. Access Column Data Using $ Operator
# ----------------------------------------------------------
hospital$PatientName
hospital$TreatmentCost
hospital[, c("TreatmentCost", "MedicineCost")]


# ----------------------------------------------------------
# 4. Add a New Column (Total Bill)
# ----------------------------------------------------------

hospital$TotalBill <-hospital$TreatmentCost + hospital$MedicineCost

#As in one of the cell there is NA so use either of the following method
#1. Use rowSums()
hospital$TotalBill <- rowSums(
  hospital[, c("TreatmentCost", "MedicineCost")],
  na.rm = TRUE
)

#2. Replace NA with 0
hospital$TotalBill <- 
  ifelse(is.na(hospital$TreatmentCost), 0, hospital$TreatmentCost) +
  ifelse(is.na(hospital$MedicineCost), 0, hospital$MedicineCost)
# ----------------------------------------------------------
# 5. Add a New Row
# ----------------------------------------------------------

new_patient <- data.frame(
  PatientID = 106,
  PatientName = "Simran",
  Gender = factor("Female", levels = levels(hospital$Gender)),
  Ward = factor("Private", levels = levels(hospital$Ward)),
  TreatmentCost = 20000,
  MedicineCost = 5500,
  DaysAdmitted = 3
)
new_patient$TotalBill <- new_patient$TreatmentCost + new_patient$MedicineCost

hospital <- rbind(hospital, new_patient)


# ----------------------------------------------------------
# 6. Add a Column at a Specific Position
# (DoctorFee after MedicineCost)
# ----------------------------------------------------------

hospital <- cbind(
  hospital[, 1:6],
  DoctorFee = c(2000, 3000, 2500, 3500, 3000, 2800),
  hospital[, 7:8]
)

#Use the following method for dymically adding new column 
# Find index of DoctorFee column dynamically
doctor_fee_index <- which(colnames(hospital) == "DoctorFee")

# Find index of last column
last_col_index <- ncol(hospital)
hospital <- cbind(
  hospital[, 1:doctor_fee_index],          # columns up to DoctorFee
  NursingFee = c(1000, 1500, 1200, 1800, 1300, 1200),
  hospital[, (doctor_fee_index + 1):last_col_index]  # remaining columns
)

#What if DoctorFee is the LAST column?
doctor_fee_index <- which(colnames(hospital) == "DoctorFee")
last_col_index <- ncol(hospital)

if (doctor_fee_index == last_col_index) {
  hospital <- cbind(
    hospital,
    NursingFee = c(1000, 1500, 1200, 1800, 1300)
  )
} else {
  hospital <- cbind(
    hospital[, 1:doctor_fee_index],
    NursingFee = c(1000, 1500, 1200, 1800, 1300),
    hospital[, (doctor_fee_index + 1):last_col_index]
  )
}


# ----------------------------------------------------------
# 7. Add a Row at a Specific Position (3rd Row)
# ----------------------------------------------------------

temp_row <- hospital[3, ]
temp_row$PatientID <- 107
temp_row$PatientName <- "Arjun"

hospital <- rbind(
  hospital[1:2, ],
  temp_row,
  hospital[3:nrow(hospital), ]
)
rownames(hospital)<-NULL
# ----------------------------------------------------------
# 8. Update / Modify Data
# ----------------------------------------------------------

hospital$DaysAdmitted[hospital$PatientName == "Riya"] <- 6

# ----------------------------------------------------------
# 9. Update / Modify Data Using ifelse()
# ----------------------------------------------------------

hospital$PatientType <- ifelse(
  hospital$Ward == "ICU",
  "Critical",
  "Normal"
)


# ----------------------------------------------------------
# 10. Remove a Column
# ----------------------------------------------------------

hospital$DoctorFee <- NULL


# ----------------------------------------------------------
# 11. Remove a Row
# ----------------------------------------------------------

hospital <- hospital[hospital$PatientName != "Arjun", ]


# ----------------------------------------------------------
# 12. Filtering / Subsetting Data
# ----------------------------------------------------------

# ICU patients
icu_patients <- subset(hospital, Ward == "ICU")

# Patients with Total Bill > 20000
high_bill_patients <- hospital[hospital$TotalBill > 20000, ]


# ----------------------------------------------------------
# 13. Scenario-Based Basic Analysis
# ----------------------------------------------------------

# Row-wise mean cost per patient
rowMeans(hospital[, c("TreatmentCost", "MedicineCost")], na.rm = TRUE)

# Total treatment cost of hospital
sum(hospital$TreatmentCost, na.rm = TRUE)

# Average medicine cost
mean(hospital$MedicineCost)


# ----------------------------------------------------------
# 14. Handling Missing Data (NA)
# ----------------------------------------------------------

any(is.na(hospital))                 # Check if any NA exists
colSums(is.na(hospital))             # NA count per column
which(is.na(hospital), arr.ind=TRUE) # Location of NA

# Replace NA in TreatmentCost with column mean
hospital$TreatmentCost[is.na(hospital$TreatmentCost)] <-
  mean(hospital$TreatmentCost, na.rm = TRUE)


# ----------------------------------------------------------
# 15. Sorting Data Using order()
# ----------------------------------------------------------

# Ascending order of TotalBill
hospital[order(hospital$TotalBill), ]

# Descending order of TotalBill
hospital[order(-hospital$TotalBill), ]


# ----------------------------------------------------------
# 16. Calculate Total for a Specific Row (Row Number)
# ----------------------------------------------------------

hospital$TotalBill[2] <-
  hospital$TreatmentCost[2] + hospital$MedicineCost[2]


# ----------------------------------------------------------
# 17. Calculate Total Using with()
# ----------------------------------------------------------

hospital$TotalBill[3] <-
  with(hospital, TreatmentCost[3] + MedicineCost[3])


# ----------------------------------------------------------
# 18. Calculate Total Using Condition (Patient Name)
# ----------------------------------------------------------

hospital$TotalBill[hospital$PatientName == "Neha"] <-
  hospital$TreatmentCost[hospital$PatientName == "Neha"] +
  hospital$MedicineCost[hospital$PatientName == "Neha"]


# ----------------------------------------------------------
# 19. Further Relevant Hospital Analysis
# ----------------------------------------------------------

# Total revenue by ward
aggregate(TotalBill ~ Ward, data = hospital, sum)

# Average days admitted by ward
aggregate(DaysAdmitted ~ Ward, data = hospital, mean)

# Number of patients per ward
table(hospital$Ward)










# Check overall NA
any(is.na(hospital))

# Count NA by column
colSums(is.na(hospital))

# Location of any NA
which(is.na(hospital), arr.ind = TRUE)

# Replace NA in TreatmentCost with mean of TreatmentCost
hospital$TreatmentCost[is.na(hospital$TreatmentCost)] <-
  mean(hospital$TreatmentCost, na.rm = TRUE)

# Confirm replacement
colSums(is.na(hospital))

# ----------------------------------------------------------
# 4. Add Calculated Columns
# ----------------------------------------------------------

# Add TotalBill = TreatmentCost + MedicineCost
hospital$TotalBill <- hospital$TreatmentCost + hospital$MedicineCost

# View updated
print(hospital)

# ----------------------------------------------------------
# 5. Count Patients by Ward
# ----------------------------------------------------------

# Tabular count
ward_counts <- table(hospital$Ward)
print(ward_counts)

# Number of ICU patients
icu_count <- sum(hospital$Ward == "ICU")
print(icu_count)

# Number of Private ward patients
private_count <- sum(hospital$Ward == "Private")
print(private_count)

# ICU + Private
icu_private_count <- sum(hospital$Ward %in% c("ICU", "Private"))
print(icu_private_count)

# ----------------------------------------------------------
# 6. Count Patients by Gender
# ----------------------------------------------------------

gender_counts <- table(hospital$Gender)
print(gender_counts)

male_count <- sum(hospital$Gender == "Male")
female_count <- sum(hospital$Gender == "Female")

print(male_count)
print(female_count)

# ----------------------------------------------------------
# 7. Cross Tabulation: Gender vs Ward
# ----------------------------------------------------------

gender_ward_table <- table(hospital$Gender, hospital$Ward)
print(gender_ward_table)

# ----------------------------------------------------------
# 8. Percentage Distribution
# ----------------------------------------------------------

# Percentage of ICU patients
perc_icu <- (icu_count / nrow(hospital)) * 100
print(perc_icu)

# Percentage of Female patients
perc_female <- (female_count / nrow(hospital)) * 100
print(perc_female)

# ----------------------------------------------------------
# 9. Additional Business Insight
# ----------------------------------------------------------

# Average Treatment Cost
avg_treatment <- mean(hospital$TreatmentCost)
print(avg_treatment)

# Average Medicine Cost
avg_medicine <- mean(hospital$MedicineCost)
print(avg_medicine)

# Average TotalBill
avg_total_bill <- mean(hospital$TotalBill)
print(avg_total_bill)

# Average days admitted
avg_days <- mean(hospital$DaysAdmitted)
print(avg_days)

# Total revenue (sum of TotalBill)
total_revenue <- sum(hospital$TotalBill)
print(total_revenue)

# ----------------------------------------------------------
# 10. Patients with Above-Average TotalBill
# ----------------------------------------------------------

above_avg <- subset(hospital, TotalBill > avg_total_bill)
print(above_avg)

# ----------------------------------------------------------
# 11. Patients with Short Stay (< 4 days)
# ----------------------------------------------------------

short_stay <- subset(hospital, DaysAdmitted < 4)
print(short_stay)

# ----------------------------------------------------------
# 12. Identify High Billing Patients
# ----------------------------------------------------------

high_bill <- subset(hospital, TotalBill > 20000)
print(high_bill)

# ----------------------------------------------------------
# 13. Find Patient with Max TotalBill
# ----------------------------------------------------------

max_bill_value <- max(hospital$TotalBill)
max_bill_patient <- hospital[hospital$TotalBill == max_bill_value, ]
print(max_bill_patient)

# ----------------------------------------------------------
# 14. Sort Data Based on TotalBill
# ----------------------------------------------------------

# Ascending
asc_sorted <- hospital[order(hospital$TotalBill), ]
print(asc_sorted)

# Descending
desc_sorted <- hospital[order(-hospital$TotalBill), ]
print(desc_sorted)

# ----------------------------------------------------------
# 15. Row-wise Mean of Costs
# ----------------------------------------------------------

row_means <- rowMeans(hospital[, c("TreatmentCost", "MedicineCost", "TotalBill")])
print(row_means)

# ----------------------------------------------------------
# 16. Column-wise Mean and Sum of Costs
# ----------------------------------------------------------

col_means <- colMeans(hospital[, c("TreatmentCost", "MedicineCost", "TotalBill")])
print(col_means)

col_sums <- colSums(hospital[, c("TreatmentCost", "MedicineCost", "TotalBill")])
print(col_sums)