data_frame <- read.csv("aeca2015.csv")
cat("Summary of the data frame:\n")
## Summary of the data frame:
print(summary(data_frame))
##       Year         County               PSI     PSIDescription    
##  Min.   :2015   Length:413         Min.   :21   Length:413        
##  1st Qu.:2015   Class :character   1st Qu.:22   Class :character  
##  Median :2015   Mode  :character   Median :24   Mode  :character  
##  Mean   :2015                      Mean   :24                     
##  3rd Qu.:2015                      3rd Qu.:26                     
##  Max.   :2015                      Max.   :27                     
##      Count           Population          ObsRate      
##  Min.   :   0.00   Min.   :    3000   Min.   : 0.000  
##  1st Qu.:   0.00   1st Qu.:   28698   1st Qu.: 0.000  
##  Median :   3.00   Median :  106572   Median : 2.060  
##  Mean   :  47.62   Mean   :  727976   Mean   : 7.198  
##  3rd Qu.:  18.00   3rd Qu.:  438734   3rd Qu.:12.180  
##  Max.   :4656.00   Max.   :21475301   Max.   :58.970
cat("\nDescriptive statistics for 'Count':\n")
## 
## Descriptive statistics for 'Count':
print(quantile(data_frame$Count, na.rm = TRUE))
##   0%  25%  50%  75% 100% 
##    0    0    3   18 4656
print(paste("Standard Deviation of Count:", sd(data_frame$Count, na.rm = TRUE)))
## [1] "Standard Deviation of Count: 290.966322222388"
cat("\nDescriptive statistics for 'Population':\n")
## 
## Descriptive statistics for 'Population':
print(paste("Standard Deviation of Population:", sd(data_frame$Population, na.rm = TRUE)))
## [1] "Standard Deviation of Population: 2842719.96697826"

Table of PSIDescription

cat("\nFrequency table for 'PSIDescription':\n")
## 
## Frequency table for 'PSIDescription':
print(table(data_frame$PSIDescription))
## 
##                      Accidental Puncture or Laceration 
##                                                     59 
## Central Venous Catheter-Related Blood Stream Infection 
##                                                     59 
##                                Iatrogenic Pneumothorax 
##                                                     59 
##                   Perioperative Hemorrhage or Hematoma 
##                                                     59 
##                         Postoperative Wound Dehiscence 
##                                                     59 
##  Retained Surgical Item or Unretrieved Device Fragment 
##                                                     59 
##                                   Transfusion Reaction 
##                                                     59

Histogram of ObsRate

cat("\nGenerating histogram of ObsRate...\n")
## 
## Generating histogram of ObsRate...
hist(data_frame$ObsRate,
     main = "Histogram of ObsRate",
     xlab = "ObsRate",
     col = "lightblue",
     border = "white")

Histogram of the log of Population

cat("\nGenerating histogram of the log of Population...\n")
## 
## Generating histogram of the log of Population...
hist(log(data_frame$Population),
     main = "Histogram of log(Population)",
     xlab = "log(Population)",
     col = "salmon",
     border = "white")

Boxplot of ObsRate by PSIDescription

cat("\nGenerating boxplot of ObsRate by PSIDescription...\n")
## 
## Generating boxplot of ObsRate by PSIDescription...
boxplot(ObsRate ~ PSIDescription,
        data = data_frame,
        main = "ObsRate by PSIDescription",
        xlab = "PSIDescription",
        ylab = "ObsRate",
        col = c("purple", "orange", "grey"))

#Calculate the correlation of Population and ObsRate

correlation_value <- cor(data_frame$Population, data_frame$ObsRate, use = "complete.obs")
cat(paste("\nCorrelation between Population and ObsRate:", round(correlation_value, 4), "\n"))
## 
## Correlation between Population and ObsRate: -0.0164

#Scatterplot

cat("\nGenerating scatterplot of log(Population) vs ObsRate...\n")
## 
## Generating scatterplot of log(Population) vs ObsRate...
plot(log(data_frame$Population),
     data_frame$ObsRate,
     main = "Scatterplot of log(Population) vs ObsRate",
     xlab = "log(Population)",
     ylab = "ObsRate",
     pch = 19, # Use solid circles for points
     col = "blue")