data_frame <- read.csv("aeca2015.csv")
cat("Summary of the data frame:\n")
## Summary of the data frame:
print(summary(data_frame))
## Year County PSI PSIDescription
## Min. :2015 Length:413 Min. :21 Length:413
## 1st Qu.:2015 Class :character 1st Qu.:22 Class :character
## Median :2015 Mode :character Median :24 Mode :character
## Mean :2015 Mean :24
## 3rd Qu.:2015 3rd Qu.:26
## Max. :2015 Max. :27
## Count Population ObsRate
## Min. : 0.00 Min. : 3000 Min. : 0.000
## 1st Qu.: 0.00 1st Qu.: 28698 1st Qu.: 0.000
## Median : 3.00 Median : 106572 Median : 2.060
## Mean : 47.62 Mean : 727976 Mean : 7.198
## 3rd Qu.: 18.00 3rd Qu.: 438734 3rd Qu.:12.180
## Max. :4656.00 Max. :21475301 Max. :58.970
cat("\nDescriptive statistics for 'Count':\n")
##
## Descriptive statistics for 'Count':
print(quantile(data_frame$Count, na.rm = TRUE))
## 0% 25% 50% 75% 100%
## 0 0 3 18 4656
print(paste("Standard Deviation of Count:", sd(data_frame$Count, na.rm = TRUE)))
## [1] "Standard Deviation of Count: 290.966322222388"
cat("\nDescriptive statistics for 'Population':\n")
##
## Descriptive statistics for 'Population':
print(paste("Standard Deviation of Population:", sd(data_frame$Population, na.rm = TRUE)))
## [1] "Standard Deviation of Population: 2842719.96697826"
cat("\nFrequency table for 'PSIDescription':\n")
##
## Frequency table for 'PSIDescription':
print(table(data_frame$PSIDescription))
##
## Accidental Puncture or Laceration
## 59
## Central Venous Catheter-Related Blood Stream Infection
## 59
## Iatrogenic Pneumothorax
## 59
## Perioperative Hemorrhage or Hematoma
## 59
## Postoperative Wound Dehiscence
## 59
## Retained Surgical Item or Unretrieved Device Fragment
## 59
## Transfusion Reaction
## 59
cat("\nGenerating histogram of ObsRate...\n")
##
## Generating histogram of ObsRate...
hist(data_frame$ObsRate,
main = "Histogram of ObsRate",
xlab = "ObsRate",
col = "lightblue",
border = "white")
cat("\nGenerating histogram of the log of Population...\n")
##
## Generating histogram of the log of Population...
hist(log(data_frame$Population),
main = "Histogram of log(Population)",
xlab = "log(Population)",
col = "salmon",
border = "white")
cat("\nGenerating boxplot of ObsRate by PSIDescription...\n")
##
## Generating boxplot of ObsRate by PSIDescription...
boxplot(ObsRate ~ PSIDescription,
data = data_frame,
main = "ObsRate by PSIDescription",
xlab = "PSIDescription",
ylab = "ObsRate",
col = c("purple", "orange", "grey"))
#Calculate the correlation of Population and ObsRate
correlation_value <- cor(data_frame$Population, data_frame$ObsRate, use = "complete.obs")
cat(paste("\nCorrelation between Population and ObsRate:", round(correlation_value, 4), "\n"))
##
## Correlation between Population and ObsRate: -0.0164
#Scatterplot
cat("\nGenerating scatterplot of log(Population) vs ObsRate...\n")
##
## Generating scatterplot of log(Population) vs ObsRate...
plot(log(data_frame$Population),
data_frame$ObsRate,
main = "Scatterplot of log(Population) vs ObsRate",
xlab = "log(Population)",
ylab = "ObsRate",
pch = 19, # Use solid circles for points
col = "blue")