library('ggplot2')
library('readxl')
library('ggplot2')
library('dplyr')
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library('tidyr')
setwd("C:/Users/M.Mando/Desktop/AUB Python/Module 3 - Assignment")
df <- read.csv("HH_Provider_Jan2023.csv")
attach(df)
Encoding yes/no variables into 1/0 in the data frame
df$Nursing_care <- ifelse(df$Offers.Nursing.Care.Services == "Yes", 1, 0)
df$Phys_Ther <- ifelse(df$Offers.Physical.Therapy.Services == "Yes", 1, 0)
df$Occup_Ther <- ifelse(df$Offers.Occupational.Therapy.Services == "Yes", 1, 0)
df$Speech_path <- ifelse(df$Offers.Speech.Pathology.Services == "Yes", 1, 0)
df$Medical_social <- ifelse(df$Offers.Medical.Social.Services == "Yes", 1, 0)
df$HHA <- ifelse(df$Offers.Home.Health.Aide.Services == "Yes", 1, 0)
Changing the names of the columns to smaller names
df$spending <- df$How.much.Medicare.spends.on.an.episode.of.care.at.this.agency..compared.to.Medicare.spending.across.all.agencies.nationally
df$Q18 <- df$How.often.the.home.health.team.began.their.patients..care.in.a.timely.manner
df$Q20 <- df$How.often.the.home.health.team.taught.patients..or.their.family.caregivers..about.their.drugs
df$Q22 <- df$How.often.the.home.health.team.determined.whether.patients.received.a.flu.shot.for.the.current.flu.season
df$Q24 <- df$How.often.patients.got.better.at.walking.or.moving.around
df$Q26 <- df$How.often.patients.got.better.at.getting.in.and.out.of.bed
df$Q28 <- df$How.often.patients.got.better.at.bathing
df$Q30 <- df$How.often.patients..breathing.improved
df$Q32 <- df$How.often.patients.got.better.at.taking.their.drugs.correctly.by.mouth
df$Q34 <- df$How.often.home.health.patients.had.to.be.admitted.to.the.hospital
df$Q36 <- df$How.often.patients.receiving.home.health.care.needed.urgent..unplanned.care.in.the.ER.without.being.admitted
df$Q38 <- df$Changes.in.skin.integrity.post.acute.care..pressure.ulcer.injury
df$Q40 <- df$How.often.physician.recommended.actions.to.address.medication.issues.were.completely.timely
Aggrigation by states
1- (The number of services offered by providers that are available in each state)
aggregate_Services <- function(df) {
agg <- aggregate(cbind(Nursing_care,Phys_Ther,Occup_Ther,Speech_path, Medical_social) ~ ï..State , data = df, FUN = function(x) sum(x, na.rm = TRUE))
return(agg)
}
aggregate_Services(df)
## ï..State Nursing_care Phys_Ther Occup_Ther Speech_path Medical_social
## 1 AK 15 13 10 11 10
## 2 AL 118 118 104 101 93
## 3 AR 98 98 86 78 56
## 4 AZ 171 162 151 129 143
## 5 CA 1789 1760 1737 1716 1761
## 6 CO 200 186 175 166 128
## 7 CT 80 80 77 73 75
## 8 DC 35 32 31 19 15
## 9 DE 23 18 15 10 12
## 10 FL 938 924 893 768 805
## 11 GA 104 102 98 101 93
## 12 GU 3 3 3 0 3
## 13 HI 15 14 13 13 12
## 14 IA 134 126 124 116 55
## 15 ID 50 49 49 48 48
## 16 IL 535 535 531 518 496
## 17 IN 192 172 164 138 121
## 18 KS 107 102 96 90 68
## 19 KY 91 91 84 79 69
## 20 LA 184 180 178 174 163
## 21 MA 264 240 219 202 197
## 22 MD 55 55 54 49 50
## 23 ME 19 19 17 16 16
## 24 MI 430 422 418 400 408
## 25 MN 155 140 136 116 65
## 26 MO 135 134 128 115 103
## 27 MP 3 3 0 0 1
## 28 MS 44 44 44 43 43
## 29 MT 24 24 22 22 19
## 30 NC 168 168 159 151 157
## 31 ND 17 15 15 12 10
## 32 NE 65 63 63 61 43
## 33 NH 28 24 24 19 20
## 34 NJ 41 41 41 41 41
## 35 NM 75 73 63 58 54
## 36 NV 152 148 145 135 143
## 37 NY 115 114 113 105 110
## 38 OH 748 701 663 589 381
## 39 OK 237 235 226 220 171
## 40 OR 51 51 49 50 48
## 41 PA 399 300 279 248 226
## 42 PR 36 36 29 32 36
## 43 RI 24 23 22 21 21
## 44 SC 72 72 64 66 65
## 45 SD 27 27 26 24 15
## 46 TN 129 120 116 107 111
## 47 TX 1982 1960 1897 1863 1754
## 48 UT 90 88 84 81 82
## 49 VA 233 228 217 205 178
## 50 VI 2 2 1 1 2
## 51 VT 11 11 11 11 11
## 52 WA 65 64 64 60 61
## 53 WI 102 92 90 82 62
## 54 WV 52 52 47 40 37
## 55 WY 30 27 23 22 17
2- (The percentage of services offered by providers that are available in each state)
aggregate_Services_percentage <- function(df) {
agg <- aggregate(cbind(spending,Nursing_care,Phys_Ther,Occup_Ther,Speech_path, Medical_social) ~ ï..State , data = df, FUN = function(x) mean(x, na.rm = TRUE)*100)
return(agg)
}
aggregate_Services_percentage(df)
## ï..State spending Nursing_care Phys_Ther Occup_Ther Speech_path
## 1 AK 96.71429 100 92.85714 71.42857 78.57143
## 2 AL 100.38596 100 100.00000 90.35088 87.71930
## 3 AR 104.24731 100 100.00000 89.24731 81.72043
## 4 AZ 98.50820 100 100.00000 94.26230 80.32787
## 5 CA 92.87076 100 99.92938 99.08192 98.09322
## 6 CO 98.49074 100 100.00000 95.37037 87.96296
## 7 CT 101.35821 100 100.00000 98.50746 98.50746
## 8 DC 91.44444 100 100.00000 100.00000 77.77778
## 9 DE 97.07692 100 100.00000 100.00000 69.23077
## 10 FL 102.98253 100 99.73118 97.17742 85.08065
## 11 GA 99.07000 100 100.00000 97.00000 98.00000
## 12 GU 79.00000 100 100.00000 100.00000 0.00000
## 13 HI 79.33333 100 100.00000 100.00000 91.66667
## 14 IA 99.45283 100 99.05660 99.05660 94.33962
## 15 ID 94.57447 100 97.87234 97.87234 95.74468
## 16 IL 93.82897 100 100.00000 99.39638 96.78068
## 17 IN 103.79730 100 100.00000 97.97297 85.81081
## 18 KS 104.84375 100 98.95833 94.79167 91.66667
## 19 KY 102.60000 100 100.00000 92.22222 86.66667
## 20 LA 100.90643 100 100.00000 97.66082 96.49123
## 21 MA 96.71318 100 96.12403 87.59690 84.49612
## 22 MD 98.78000 100 100.00000 100.00000 92.00000
## 23 ME 98.22222 100 100.00000 88.88889 83.33333
## 24 MI 94.08192 100 98.58757 98.02260 93.78531
## 25 MN 94.86458 100 98.95833 98.95833 89.58333
## 26 MO 97.67742 100 100.00000 95.96774 87.09677
## 27 MP 84.00000 100 100.00000 0.00000 0.00000
## 28 MS 101.83721 100 100.00000 100.00000 97.67442
## 29 MT 91.77273 100 100.00000 95.45455 95.45455
## 30 NC 93.99363 100 100.00000 94.90446 89.80892
## 31 ND 95.93333 100 100.00000 100.00000 80.00000
## 32 NE 100.73077 100 100.00000 100.00000 98.07692
## 33 NH 100.91304 100 100.00000 100.00000 78.26087
## 34 NJ 99.36585 100 100.00000 100.00000 100.00000
## 35 NM 92.06452 100 98.38710 83.87097 79.03226
## 36 NV 104.10294 100 100.00000 97.79412 91.17647
## 37 NY 98.30097 100 99.02913 98.05825 92.23301
## 38 OH 99.68403 100 100.00000 96.52778 91.66667
## 39 OK 97.47511 100 99.54751 95.92760 93.66516
## 40 OR 87.78723 100 100.00000 95.74468 97.87234
## 41 PA 99.50000 100 100.00000 98.66071 93.75000
## 42 PR 76.83871 100 100.00000 77.41935 87.09677
## 43 RI 101.70000 100 100.00000 100.00000 95.00000
## 44 SC 99.76812 100 100.00000 89.85507 94.20290
## 45 SD 100.68182 100 100.00000 95.45455 90.90909
## 46 TN 104.32174 100 99.13043 98.26087 90.43478
## 47 TX 96.56605 100 99.58932 96.44079 95.07187
## 48 UT 96.37975 100 100.00000 97.46835 93.67089
## 49 VA 96.00510 100 100.00000 96.42857 92.85714
## 50 VI 74.00000 100 100.00000 50.00000 50.00000
## 51 VT 92.40000 100 100.00000 100.00000 100.00000
## 52 WA 91.18333 100 100.00000 100.00000 95.00000
## 53 WI 96.54667 100 100.00000 98.66667 90.66667
## 54 WV 105.52000 100 100.00000 90.00000 76.00000
## 55 WY 100.96154 100 100.00000 84.61538 80.76923
## Medical_social
## 1 71.42857
## 2 78.07018
## 3 59.13978
## 4 90.98361
## 5 99.43503
## 6 85.18519
## 7 97.01493
## 8 77.77778
## 9 92.30769
## 10 89.65054
## 11 92.00000
## 12 100.00000
## 13 91.66667
## 14 51.88679
## 15 97.87234
## 16 92.95775
## 17 77.02703
## 18 67.70833
## 19 75.55556
## 20 90.64327
## 21 81.39535
## 22 92.00000
## 23 83.33333
## 24 95.76271
## 25 53.12500
## 26 78.22581
## 27 50.00000
## 28 97.67442
## 29 86.36364
## 30 93.63057
## 31 66.66667
## 32 71.15385
## 33 86.95652
## 34 100.00000
## 35 72.58065
## 36 96.32353
## 37 97.08738
## 38 78.81944
## 39 71.94570
## 40 93.61702
## 41 83.48214
## 42 100.00000
## 43 100.00000
## 44 92.75362
## 45 63.63636
## 46 93.04348
## 47 89.73306
## 48 94.93671
## 49 82.14286
## 50 100.00000
## 51 100.00000
## 52 93.33333
## 53 73.33333
## 54 70.00000
## 55 61.53846
3- (The average of star rating, discharge and readmission rate in each state)
aggregate_Discharge_Readmission_starrating <- function(df) {
agg <- aggregate(cbind(DTC.Observed.Rate , PPR.Observed.Rate, Quality.of.patient.care.star.rating) ~ ï..State , data = df, FUN = function(x) mean(x, na.rm = TRUE))
return(agg)
}
aggregate_Discharge_Readmission_starrating(df)
## ï..State DTC.Observed.Rate PPR.Observed.Rate
## 1 AK 70.46667 3.319167
## 2 AL 73.06098 2.994375
## 3 AR 72.72967 3.127802
## 4 AZ 78.87402 2.962991
## 5 CA 74.28772 3.959065
## 6 CO 78.31374 2.272637
## 7 CT 77.83169 3.357288
## 8 DC 69.59571 4.557143
## 9 DE 78.86083 2.772500
## 10 FL 78.40500 3.933125
## 11 GA 75.79469 3.580102
## 12 GU 57.50333 4.420000
## 13 HI 81.69167 3.970000
## 14 IA 78.25389 3.412889
## 15 ID 80.53222 2.028444
## 16 IL 71.93398 4.798462
## 17 IN 72.13744 3.778512
## 18 KS 72.89494 3.214941
## 19 KY 71.12814 4.270349
## 20 LA 64.21582 3.162215
## 21 MA 74.45682 3.306941
## 22 MD 79.00042 3.436667
## 23 ME 80.23000 2.653889
## 24 MI 79.08625 4.973320
## 25 MN 79.24951 3.323293
## 26 MO 78.45483 3.909746
## 27 MP 56.52000 3.130000
## 28 MS 69.45186 3.258372
## 29 MT 75.04476 3.330000
## 30 NC 75.89258 3.844040
## 31 ND 76.24286 4.642143
## 32 NE 75.48826 3.362609
## 33 NH 81.00429 2.939048
## 34 NJ 77.10700 4.284250
## 35 NM 71.79094 2.814151
## 36 NV 67.13805 4.447126
## 37 NY 76.04296 4.373673
## 38 OH 72.73249 3.866400
## 39 OK 59.49648 2.510503
## 40 OR 78.87047 3.199535
## 41 PA 77.61255 3.612450
## 42 PR 73.51565 4.205217
## 43 RI 77.05684 3.434211
## 44 SC 76.40618 3.043235
## 45 SD 78.11429 3.296667
## 46 TN 71.01467 3.538318
## 47 TX 63.81447 3.245360
## 48 UT 83.91094 1.697656
## 49 VA 74.55147 3.801525
## 50 VI 87.82000 1.640000
## 51 VT 78.19800 2.066000
## 52 WA 76.88678 3.028814
## 53 WI 78.15971 3.508143
## 54 WV 71.59532 4.870638
## 55 WY 71.93476 2.823810
## Quality.of.patient.care.star.rating
## 1 2.791667
## 2 3.973214
## 3 3.615385
## 4 3.528037
## 5 3.154982
## 6 3.478022
## 7 3.177966
## 8 3.714286
## 9 3.625000
## 10 3.765625
## 11 3.632653
## 12 2.333333
## 13 3.125000
## 14 3.005556
## 15 3.577778
## 16 3.175585
## 17 3.359504
## 18 3.205882
## 19 3.761628
## 20 3.787975
## 21 3.352941
## 22 3.854167
## 23 3.305556
## 24 3.294466
## 25 2.878049
## 26 3.266949
## 27 1.500000
## 28 3.895349
## 29 2.904762
## 30 3.486755
## 31 3.178571
## 32 3.206522
## 33 3.071429
## 34 3.612500
## 35 3.198113
## 36 2.879310
## 37 3.326531
## 38 3.362222
## 39 3.286164
## 40 3.151163
## 41 3.295000
## 42 3.586957
## 43 3.447368
## 44 3.764706
## 45 3.309524
## 46 3.845794
## 47 3.255743
## 48 3.578125
## 49 3.355932
## 50 3.750000
## 51 2.800000
## 52 3.872881
## 53 3.128571
## 54 3.882979
## 55 3.000000
5- The average of Discharge and Readmission Rates by Star Rating
aggregate_DTC_PPR_BY_starrating <- function(df) {
agg <- aggregate(cbind(DTC.Observed.Rate , PPR.Observed.Rate) ~ Quality.of.patient.care.star.rating , data = df, FUN = function(x) mean(x, na.rm = TRUE))
return(agg)
}
aggregate_DTC_PPR_BY_starrating(df)
## Quality.of.patient.care.star.rating DTC.Observed.Rate PPR.Observed.Rate
## 1 1.0 59.62042 5.052083
## 2 1.5 64.16474 4.610949
## 3 2.0 67.74376 4.097921
## 4 2.5 72.19703 3.815586
## 5 3.0 74.91795 3.712014
## 6 3.5 75.96483 3.520150
## 7 4.0 75.42615 3.437095
## 8 4.5 74.43562 3.401247
## 9 5.0 73.68809 3.484733
5- The average of “How Often” questions by spending
aggregate_Q_BY_spending <- function(df) {
agg <- aggregate(cbind(Q18,Q20,Q22,Q24,Q26,Q28,Q30,Q32,Q34,Q36,Q38,Q40 ) ~ Quality.of.patient.care.star.rating , data = df, FUN = function(x) mean(x, na.rm = TRUE))
return(agg)
}
aggregate_Q_BY_spending(df)
## Quality.of.patient.care.star.rating Q18 Q20 Q22 Q24
## 1 1.0 83.17037 97.47778 60.92222 44.11111
## 2 1.5 86.08539 94.93820 60.44972 51.34017
## 3 2.0 91.45119 95.75596 67.45278 62.78696
## 4 2.5 93.23021 96.67447 70.43244 72.25619
## 5 3.0 94.84604 97.34586 72.78198 78.10045
## 6 3.5 96.68578 98.29974 74.89114 82.51261
## 7 4.0 97.49137 98.53592 75.90581 86.62861
## 8 4.5 98.47463 99.08229 76.68029 90.88669
## 9 5.0 99.19907 99.10862 79.70699 94.99091
## Q26 Q28 Q30 Q32 Q34 Q36 Q38 Q40
## 1 47.50370 45.49259 37.87778 38.08148 21.93704 10.74815 1.0851852 84.65926
## 2 53.94382 53.42697 45.86124 48.43118 16.17135 10.69270 0.6415730 84.72837
## 3 65.82607 66.04229 61.91908 58.20318 14.73211 11.18331 0.5550079 88.39491
## 4 74.81104 75.86745 72.18205 66.66098 14.23534 11.56421 0.5214047 91.95786
## 5 80.82684 81.63803 79.61856 73.65842 14.05441 11.54040 0.3996360 93.47707
## 6 83.10588 85.76031 84.58876 79.32751 13.70980 11.33901 0.3412266 94.69753
## 7 86.88539 89.49525 88.52544 84.25458 13.60246 11.34199 0.2552817 95.41813
## 8 90.52621 92.95451 91.29371 89.59486 13.66719 11.53249 0.2157233 96.09979
## 9 94.73776 96.32774 94.44825 95.08601 12.31981 11.75524 0.1648019 96.57040
Counting the number of providers in each state
count_table <- table(df$ï..State)
count_table
##
## AK AL AR AZ CA CO CT DC DE FL GA GU HI IA ID IL
## 15 118 98 182 2183 213 80 35 23 1013 106 3 15 135 50 540
## IN KS KY LA MA MD ME MI MN MO MP MS MT NC ND NE
## 194 109 91 184 270 55 20 440 155 137 3 44 24 170 17 65
## NH NJ NM NV NY OH OK OR PA PR RI SC SD TN TX UT
## 28 41 75 170 115 791 239 53 409 37 24 73 27 130 2014 90
## VA VI VT WA WI WV WY
## 239 2 11 68 103 52 31
Create a stacked bar chart of the means by state and variable
aggr <- aggregate_Discharge_Readmission_starrating(df)
tidy_aggr <- tidyr::pivot_longer(aggr, cols = c("DTC.Observed.Rate", "PPR.Observed.Rate"), names_to = "variable", values_to = "value")
ggplot(tidy_aggr, aes(x = ï..State, y = value, fill = variable)) +
geom_bar(stat = "identity") +
geom_line(aes(x = ï..State, y = Quality.of.patient.care.star.rating * 10, color = ""),
size = 1.5, group = 1) +
scale_fill_manual(values = c("pink", "red", "blue")) +
scale_color_manual(values = "purple", name = "Star Rating") +
labs(x = "State", y = "Mean rate", fill = "") +
theme(legend.position = "bottom") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
A scatter plot shows the number of services provided around the states
# Aggregate the data
aggr <- aggregate_Services(df)
# Create a scatter plot
ggplot(aggr, aes(x = ï..State, y = Nursing_care + Phys_Ther + Occup_Ther + Speech_path + Medical_social)) +
geom_point() +
labs(x = "State", y = "Total Services Provided") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5))
Graphing “How Often” questions as line graphs in function of Star Rating
agg <- aggregate_Q_BY_spending(df)
tidy_agg <- tidyr::pivot_longer(agg, cols = c("Q18", "Q20", "Q22", "Q24", "Q26", "Q28"), names_to = "question", values_to = "average_score")
ggplot(tidy_agg, aes(x = Quality.of.patient.care.star.rating, y = average_score, color = question)) +
geom_line(size = 1) +
labs(x = "Star Rating", y = "Average Quality Score") +
scale_color_brewer(palette = "Set1") +
theme_bw()
agg <- aggregate_Q_BY_spending(df)
tidy_agg <- tidyr::pivot_longer(agg, cols = c("Q30", "Q32", "Q34", "Q36", "Q38", "Q40"), names_to = "question", values_to = "average_score")
ggplot(tidy_agg, aes(x = Quality.of.patient.care.star.rating, y = average_score, color = question)) +
geom_line(size = 1) +
labs(x = "Star Rating", y = "Average Quality Score") +
scale_color_brewer(palette = "Set1") +
theme_bw()
Graphing “How Often” questions as box-plots in function of Star Rating
# Aggregate data and convert to tidy format
aggr <- aggregate_Q_BY_spending(df)
tidy_aggr <- tidyr::pivot_longer(aggr, cols = c("Q18", "Q20", "Q22", "Q24", "Q26", "Q28", "Q30", "Q32", "Q34", "Q36", "Q38", "Q40"), names_to = "Q", values_to = "value")
# Create box plot for each Q variable
ggplot(tidy_aggr, aes(x = Quality.of.patient.care.star.rating, y = value, fill = Q)) +
geom_boxplot() +
scale_fill_manual(values = rep(c("pink", "red", "blue"), 4)) +
facet_wrap(~ Q, nrow = 3) +
labs(x = "Star rating", y = "Mean score", fill = "Q") +
theme(legend.position = "bottom")