This is the data analysis from the dataset of insured individuals and their health care claims for populations in Australia. The dataset used for this analysis is titled “dataset-analysis-1.csv”, which contains information on claim types, prices, and other relevant variables.
# library(tidyverse)
# library(readr)
# library(dplyr)
# library(scales)
# dataset_analysis <- read_csv("dataset-analysis-1.csv")
inpatient_summary <- dataset_analysis %>%
filter(TYPE_CLAIM == "Inpatient") %>%
summarise(
mean_inpatient = mean(PRICE, na.rm = TRUE),
sd_inpatient = sd(PRICE, na.rm = TRUE)
)
The mean price for inpatient claims is 4113.16
The standard deviation of inpatient claims is 8075.56.
outpatient_summary <- dataset_analysis %>%
filter(TYPE_CLAIM == "Outpatient") %>%
summarise(
mean_outpatient = mean(PRICE, na.rm = TRUE),
sd_outpatient = sd(PRICE, na.rm = TRUE)
)
The mean price for inpatient claims is 208.08
The standard deviation of inpatient claims is 380.83.
los_summary <- dataset_analysis %>%
filter(TYPE_CLAIM == "Inpatient") %>%
summarise(
mean_los = mean(LOS, na.rm = TRUE),
sd_los = sd(LOS, na.rm = TRUE)
)
The average LOS is 3.43
The standard deviation of LOS is 11.18

expenditures_positive <- expenditures_positive %>%
mutate(ln_exp = log(total_exp))
ggplot(expenditures_positive, aes(x = ln_exp)) +
geom_histogram(
binwidth = 0.5, # width of each ln-exp bin
aes(y = after_stat(count) / sum(after_stat(count))),# convert counts to fraction
fill = "steelblue",
color = "black"
) +
scale_y_continuous(labels = number_format(accuracy = 0.1)) +
labs(
title = "Fraction of Individuals by ln(Total Healthcare Expenditures) (>0)",
x = "ln(Total Expenditures)",
y = "Percentage of Individuals"
) +
theme_minimal()
.nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open');
});
});