library('ggplot2')
library('readxl')
library('ggplot2')
library('dplyr')
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library('tidyr')
setwd("C:/Users/M.Mando/Desktop/AUB Python/Module 3 -  Assignment")
df <- read.csv("HH_Provider_Jan2023.csv")
attach(df)

Encoding yes/no variables into 1/0 in the data frame

df$Nursing_care <- ifelse(df$Offers.Nursing.Care.Services == "Yes", 1, 0)
df$Phys_Ther <- ifelse(df$Offers.Physical.Therapy.Services == "Yes", 1, 0)
df$Occup_Ther <- ifelse(df$Offers.Occupational.Therapy.Services == "Yes", 1, 0)
df$Speech_path <- ifelse(df$Offers.Speech.Pathology.Services == "Yes", 1, 0)
df$Medical_social <- ifelse(df$Offers.Medical.Social.Services == "Yes", 1, 0)
df$HHA <- ifelse(df$Offers.Home.Health.Aide.Services == "Yes", 1, 0)

Changing the names of the columns to smaller names

df$spending <- df$How.much.Medicare.spends.on.an.episode.of.care.at.this.agency..compared.to.Medicare.spending.across.all.agencies.nationally
df$Q18 <- df$How.often.the.home.health.team.began.their.patients..care.in.a.timely.manner
df$Q20 <- df$How.often.the.home.health.team.taught.patients..or.their.family.caregivers..about.their.drugs
df$Q22 <- df$How.often.the.home.health.team.determined.whether.patients.received.a.flu.shot.for.the.current.flu.season
df$Q24 <- df$How.often.patients.got.better.at.walking.or.moving.around
df$Q26 <- df$How.often.patients.got.better.at.getting.in.and.out.of.bed
df$Q28 <- df$How.often.patients.got.better.at.bathing
df$Q30 <- df$How.often.patients..breathing.improved
df$Q32 <- df$How.often.patients.got.better.at.taking.their.drugs.correctly.by.mouth
df$Q34 <- df$How.often.home.health.patients.had.to.be.admitted.to.the.hospital
df$Q36 <- df$How.often.patients.receiving.home.health.care.needed.urgent..unplanned.care.in.the.ER.without.being.admitted
df$Q38 <- df$Changes.in.skin.integrity.post.acute.care..pressure.ulcer.injury
df$Q40 <- df$How.often.physician.recommended.actions.to.address.medication.issues.were.completely.timely

Aggrigation by states

1- (The number of services offered by providers that are available in each state)

aggregate_Services <- function(df) {
  agg <- aggregate(cbind(Nursing_care,Phys_Ther,Occup_Ther,Speech_path, Medical_social)   ~ ï..State  , data = df, FUN = function(x) sum(x, na.rm = TRUE))
  return(agg)
}
aggregate_Services(df)
##    ï..State Nursing_care Phys_Ther Occup_Ther Speech_path Medical_social
## 1        AK           15        13         10          11             10
## 2        AL          118       118        104         101             93
## 3        AR           98        98         86          78             56
## 4        AZ          171       162        151         129            143
## 5        CA         1789      1760       1737        1716           1761
## 6        CO          200       186        175         166            128
## 7        CT           80        80         77          73             75
## 8        DC           35        32         31          19             15
## 9        DE           23        18         15          10             12
## 10       FL          938       924        893         768            805
## 11       GA          104       102         98         101             93
## 12       GU            3         3          3           0              3
## 13       HI           15        14         13          13             12
## 14       IA          134       126        124         116             55
## 15       ID           50        49         49          48             48
## 16       IL          535       535        531         518            496
## 17       IN          192       172        164         138            121
## 18       KS          107       102         96          90             68
## 19       KY           91        91         84          79             69
## 20       LA          184       180        178         174            163
## 21       MA          264       240        219         202            197
## 22       MD           55        55         54          49             50
## 23       ME           19        19         17          16             16
## 24       MI          430       422        418         400            408
## 25       MN          155       140        136         116             65
## 26       MO          135       134        128         115            103
## 27       MP            3         3          0           0              1
## 28       MS           44        44         44          43             43
## 29       MT           24        24         22          22             19
## 30       NC          168       168        159         151            157
## 31       ND           17        15         15          12             10
## 32       NE           65        63         63          61             43
## 33       NH           28        24         24          19             20
## 34       NJ           41        41         41          41             41
## 35       NM           75        73         63          58             54
## 36       NV          152       148        145         135            143
## 37       NY          115       114        113         105            110
## 38       OH          748       701        663         589            381
## 39       OK          237       235        226         220            171
## 40       OR           51        51         49          50             48
## 41       PA          399       300        279         248            226
## 42       PR           36        36         29          32             36
## 43       RI           24        23         22          21             21
## 44       SC           72        72         64          66             65
## 45       SD           27        27         26          24             15
## 46       TN          129       120        116         107            111
## 47       TX         1982      1960       1897        1863           1754
## 48       UT           90        88         84          81             82
## 49       VA          233       228        217         205            178
## 50       VI            2         2          1           1              2
## 51       VT           11        11         11          11             11
## 52       WA           65        64         64          60             61
## 53       WI          102        92         90          82             62
## 54       WV           52        52         47          40             37
## 55       WY           30        27         23          22             17

2- (The percentage of services offered by providers that are available in each state)

aggregate_Services_percentage <- function(df) {
  agg <- aggregate(cbind(spending,Nursing_care,Phys_Ther,Occup_Ther,Speech_path, Medical_social)   ~ ï..State  , data = df, FUN = function(x) mean(x, na.rm = TRUE)*100)
  return(agg)
}
aggregate_Services_percentage(df)
##    ï..State  spending Nursing_care Phys_Ther Occup_Ther Speech_path
## 1        AK  96.71429          100  92.85714   71.42857    78.57143
## 2        AL 100.38596          100 100.00000   90.35088    87.71930
## 3        AR 104.24731          100 100.00000   89.24731    81.72043
## 4        AZ  98.50820          100 100.00000   94.26230    80.32787
## 5        CA  92.87076          100  99.92938   99.08192    98.09322
## 6        CO  98.49074          100 100.00000   95.37037    87.96296
## 7        CT 101.35821          100 100.00000   98.50746    98.50746
## 8        DC  91.44444          100 100.00000  100.00000    77.77778
## 9        DE  97.07692          100 100.00000  100.00000    69.23077
## 10       FL 102.98253          100  99.73118   97.17742    85.08065
## 11       GA  99.07000          100 100.00000   97.00000    98.00000
## 12       GU  79.00000          100 100.00000  100.00000     0.00000
## 13       HI  79.33333          100 100.00000  100.00000    91.66667
## 14       IA  99.45283          100  99.05660   99.05660    94.33962
## 15       ID  94.57447          100  97.87234   97.87234    95.74468
## 16       IL  93.82897          100 100.00000   99.39638    96.78068
## 17       IN 103.79730          100 100.00000   97.97297    85.81081
## 18       KS 104.84375          100  98.95833   94.79167    91.66667
## 19       KY 102.60000          100 100.00000   92.22222    86.66667
## 20       LA 100.90643          100 100.00000   97.66082    96.49123
## 21       MA  96.71318          100  96.12403   87.59690    84.49612
## 22       MD  98.78000          100 100.00000  100.00000    92.00000
## 23       ME  98.22222          100 100.00000   88.88889    83.33333
## 24       MI  94.08192          100  98.58757   98.02260    93.78531
## 25       MN  94.86458          100  98.95833   98.95833    89.58333
## 26       MO  97.67742          100 100.00000   95.96774    87.09677
## 27       MP  84.00000          100 100.00000    0.00000     0.00000
## 28       MS 101.83721          100 100.00000  100.00000    97.67442
## 29       MT  91.77273          100 100.00000   95.45455    95.45455
## 30       NC  93.99363          100 100.00000   94.90446    89.80892
## 31       ND  95.93333          100 100.00000  100.00000    80.00000
## 32       NE 100.73077          100 100.00000  100.00000    98.07692
## 33       NH 100.91304          100 100.00000  100.00000    78.26087
## 34       NJ  99.36585          100 100.00000  100.00000   100.00000
## 35       NM  92.06452          100  98.38710   83.87097    79.03226
## 36       NV 104.10294          100 100.00000   97.79412    91.17647
## 37       NY  98.30097          100  99.02913   98.05825    92.23301
## 38       OH  99.68403          100 100.00000   96.52778    91.66667
## 39       OK  97.47511          100  99.54751   95.92760    93.66516
## 40       OR  87.78723          100 100.00000   95.74468    97.87234
## 41       PA  99.50000          100 100.00000   98.66071    93.75000
## 42       PR  76.83871          100 100.00000   77.41935    87.09677
## 43       RI 101.70000          100 100.00000  100.00000    95.00000
## 44       SC  99.76812          100 100.00000   89.85507    94.20290
## 45       SD 100.68182          100 100.00000   95.45455    90.90909
## 46       TN 104.32174          100  99.13043   98.26087    90.43478
## 47       TX  96.56605          100  99.58932   96.44079    95.07187
## 48       UT  96.37975          100 100.00000   97.46835    93.67089
## 49       VA  96.00510          100 100.00000   96.42857    92.85714
## 50       VI  74.00000          100 100.00000   50.00000    50.00000
## 51       VT  92.40000          100 100.00000  100.00000   100.00000
## 52       WA  91.18333          100 100.00000  100.00000    95.00000
## 53       WI  96.54667          100 100.00000   98.66667    90.66667
## 54       WV 105.52000          100 100.00000   90.00000    76.00000
## 55       WY 100.96154          100 100.00000   84.61538    80.76923
##    Medical_social
## 1        71.42857
## 2        78.07018
## 3        59.13978
## 4        90.98361
## 5        99.43503
## 6        85.18519
## 7        97.01493
## 8        77.77778
## 9        92.30769
## 10       89.65054
## 11       92.00000
## 12      100.00000
## 13       91.66667
## 14       51.88679
## 15       97.87234
## 16       92.95775
## 17       77.02703
## 18       67.70833
## 19       75.55556
## 20       90.64327
## 21       81.39535
## 22       92.00000
## 23       83.33333
## 24       95.76271
## 25       53.12500
## 26       78.22581
## 27       50.00000
## 28       97.67442
## 29       86.36364
## 30       93.63057
## 31       66.66667
## 32       71.15385
## 33       86.95652
## 34      100.00000
## 35       72.58065
## 36       96.32353
## 37       97.08738
## 38       78.81944
## 39       71.94570
## 40       93.61702
## 41       83.48214
## 42      100.00000
## 43      100.00000
## 44       92.75362
## 45       63.63636
## 46       93.04348
## 47       89.73306
## 48       94.93671
## 49       82.14286
## 50      100.00000
## 51      100.00000
## 52       93.33333
## 53       73.33333
## 54       70.00000
## 55       61.53846

3- (The average of star rating, discharge and readmission rate in each state)

aggregate_Discharge_Readmission_starrating <- function(df) {
  agg <- aggregate(cbind(DTC.Observed.Rate , PPR.Observed.Rate, Quality.of.patient.care.star.rating)   ~ ï..State  , data = df, FUN = function(x) mean(x, na.rm = TRUE))
  return(agg)
}
aggregate_Discharge_Readmission_starrating(df)
##    ï..State DTC.Observed.Rate PPR.Observed.Rate
## 1        AK          70.46667          3.319167
## 2        AL          73.06098          2.994375
## 3        AR          72.72967          3.127802
## 4        AZ          78.87402          2.962991
## 5        CA          74.28772          3.959065
## 6        CO          78.31374          2.272637
## 7        CT          77.83169          3.357288
## 8        DC          69.59571          4.557143
## 9        DE          78.86083          2.772500
## 10       FL          78.40500          3.933125
## 11       GA          75.79469          3.580102
## 12       GU          57.50333          4.420000
## 13       HI          81.69167          3.970000
## 14       IA          78.25389          3.412889
## 15       ID          80.53222          2.028444
## 16       IL          71.93398          4.798462
## 17       IN          72.13744          3.778512
## 18       KS          72.89494          3.214941
## 19       KY          71.12814          4.270349
## 20       LA          64.21582          3.162215
## 21       MA          74.45682          3.306941
## 22       MD          79.00042          3.436667
## 23       ME          80.23000          2.653889
## 24       MI          79.08625          4.973320
## 25       MN          79.24951          3.323293
## 26       MO          78.45483          3.909746
## 27       MP          56.52000          3.130000
## 28       MS          69.45186          3.258372
## 29       MT          75.04476          3.330000
## 30       NC          75.89258          3.844040
## 31       ND          76.24286          4.642143
## 32       NE          75.48826          3.362609
## 33       NH          81.00429          2.939048
## 34       NJ          77.10700          4.284250
## 35       NM          71.79094          2.814151
## 36       NV          67.13805          4.447126
## 37       NY          76.04296          4.373673
## 38       OH          72.73249          3.866400
## 39       OK          59.49648          2.510503
## 40       OR          78.87047          3.199535
## 41       PA          77.61255          3.612450
## 42       PR          73.51565          4.205217
## 43       RI          77.05684          3.434211
## 44       SC          76.40618          3.043235
## 45       SD          78.11429          3.296667
## 46       TN          71.01467          3.538318
## 47       TX          63.81447          3.245360
## 48       UT          83.91094          1.697656
## 49       VA          74.55147          3.801525
## 50       VI          87.82000          1.640000
## 51       VT          78.19800          2.066000
## 52       WA          76.88678          3.028814
## 53       WI          78.15971          3.508143
## 54       WV          71.59532          4.870638
## 55       WY          71.93476          2.823810
##    Quality.of.patient.care.star.rating
## 1                             2.791667
## 2                             3.973214
## 3                             3.615385
## 4                             3.528037
## 5                             3.154982
## 6                             3.478022
## 7                             3.177966
## 8                             3.714286
## 9                             3.625000
## 10                            3.765625
## 11                            3.632653
## 12                            2.333333
## 13                            3.125000
## 14                            3.005556
## 15                            3.577778
## 16                            3.175585
## 17                            3.359504
## 18                            3.205882
## 19                            3.761628
## 20                            3.787975
## 21                            3.352941
## 22                            3.854167
## 23                            3.305556
## 24                            3.294466
## 25                            2.878049
## 26                            3.266949
## 27                            1.500000
## 28                            3.895349
## 29                            2.904762
## 30                            3.486755
## 31                            3.178571
## 32                            3.206522
## 33                            3.071429
## 34                            3.612500
## 35                            3.198113
## 36                            2.879310
## 37                            3.326531
## 38                            3.362222
## 39                            3.286164
## 40                            3.151163
## 41                            3.295000
## 42                            3.586957
## 43                            3.447368
## 44                            3.764706
## 45                            3.309524
## 46                            3.845794
## 47                            3.255743
## 48                            3.578125
## 49                            3.355932
## 50                            3.750000
## 51                            2.800000
## 52                            3.872881
## 53                            3.128571
## 54                            3.882979
## 55                            3.000000

5- The average of Discharge and Readmission Rates by Star Rating

aggregate_DTC_PPR_BY_starrating <- function(df) {
  agg <- aggregate(cbind(DTC.Observed.Rate , PPR.Observed.Rate)   ~ Quality.of.patient.care.star.rating  , data = df, FUN = function(x) mean(x, na.rm = TRUE))
  return(agg)
}
aggregate_DTC_PPR_BY_starrating(df)
##   Quality.of.patient.care.star.rating DTC.Observed.Rate PPR.Observed.Rate
## 1                                 1.0          59.62042          5.052083
## 2                                 1.5          64.16474          4.610949
## 3                                 2.0          67.74376          4.097921
## 4                                 2.5          72.19703          3.815586
## 5                                 3.0          74.91795          3.712014
## 6                                 3.5          75.96483          3.520150
## 7                                 4.0          75.42615          3.437095
## 8                                 4.5          74.43562          3.401247
## 9                                 5.0          73.68809          3.484733

5- The average of “How Often” questions by spending

aggregate_Q_BY_spending <- function(df) {
  agg <- aggregate(cbind(Q18,Q20,Q22,Q24,Q26,Q28,Q30,Q32,Q34,Q36,Q38,Q40 )   ~ Quality.of.patient.care.star.rating , data = df, FUN = function(x) mean(x, na.rm = TRUE))
  return(agg)
}
aggregate_Q_BY_spending(df)
##   Quality.of.patient.care.star.rating      Q18      Q20      Q22      Q24
## 1                                 1.0 83.17037 97.47778 60.92222 44.11111
## 2                                 1.5 86.08539 94.93820 60.44972 51.34017
## 3                                 2.0 91.45119 95.75596 67.45278 62.78696
## 4                                 2.5 93.23021 96.67447 70.43244 72.25619
## 5                                 3.0 94.84604 97.34586 72.78198 78.10045
## 6                                 3.5 96.68578 98.29974 74.89114 82.51261
## 7                                 4.0 97.49137 98.53592 75.90581 86.62861
## 8                                 4.5 98.47463 99.08229 76.68029 90.88669
## 9                                 5.0 99.19907 99.10862 79.70699 94.99091
##        Q26      Q28      Q30      Q32      Q34      Q36       Q38      Q40
## 1 47.50370 45.49259 37.87778 38.08148 21.93704 10.74815 1.0851852 84.65926
## 2 53.94382 53.42697 45.86124 48.43118 16.17135 10.69270 0.6415730 84.72837
## 3 65.82607 66.04229 61.91908 58.20318 14.73211 11.18331 0.5550079 88.39491
## 4 74.81104 75.86745 72.18205 66.66098 14.23534 11.56421 0.5214047 91.95786
## 5 80.82684 81.63803 79.61856 73.65842 14.05441 11.54040 0.3996360 93.47707
## 6 83.10588 85.76031 84.58876 79.32751 13.70980 11.33901 0.3412266 94.69753
## 7 86.88539 89.49525 88.52544 84.25458 13.60246 11.34199 0.2552817 95.41813
## 8 90.52621 92.95451 91.29371 89.59486 13.66719 11.53249 0.2157233 96.09979
## 9 94.73776 96.32774 94.44825 95.08601 12.31981 11.75524 0.1648019 96.57040

Counting the number of providers in each state

count_table <- table(df$ï..State)
count_table
## 
##   AK   AL   AR   AZ   CA   CO   CT   DC   DE   FL   GA   GU   HI   IA   ID   IL 
##   15  118   98  182 2183  213   80   35   23 1013  106    3   15  135   50  540 
##   IN   KS   KY   LA   MA   MD   ME   MI   MN   MO   MP   MS   MT   NC   ND   NE 
##  194  109   91  184  270   55   20  440  155  137    3   44   24  170   17   65 
##   NH   NJ   NM   NV   NY   OH   OK   OR   PA   PR   RI   SC   SD   TN   TX   UT 
##   28   41   75  170  115  791  239   53  409   37   24   73   27  130 2014   90 
##   VA   VI   VT   WA   WI   WV   WY 
##  239    2   11   68  103   52   31

Create a stacked bar chart of the means by state and variable

aggr <- aggregate_Discharge_Readmission_starrating(df)
tidy_aggr <- tidyr::pivot_longer(aggr, cols = c("DTC.Observed.Rate", "PPR.Observed.Rate"), names_to = "variable", values_to = "value")
ggplot(tidy_aggr, aes(x = ï..State, y = value, fill = variable)) +
  geom_bar(stat = "identity") +
  geom_line(aes(x = ï..State, y = Quality.of.patient.care.star.rating * 10, color = ""),
            size = 1.5, group = 1) +
  scale_fill_manual(values = c("pink", "red", "blue")) +
  scale_color_manual(values = "purple", name = "Star Rating") +
  labs(x = "State", y = "Mean rate", fill = "") +
  theme(legend.position = "bottom") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

A scatter plot shows the number of services provided around the states

# Aggregate the data
aggr <- aggregate_Services(df)

# Create a scatter plot
ggplot(aggr, aes(x = ï..State, y = Nursing_care + Phys_Ther + Occup_Ther + Speech_path + Medical_social)) +
  geom_point() +
  labs(x = "State", y = "Total Services Provided") +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

Graphing “How Often” questions as line graphs in function of Star Rating

agg <- aggregate_Q_BY_spending(df)
tidy_agg <- tidyr::pivot_longer(agg, cols = c("Q18", "Q20", "Q22", "Q24", "Q26", "Q28"), names_to = "question", values_to = "average_score")

ggplot(tidy_agg, aes(x = Quality.of.patient.care.star.rating, y = average_score, color = question)) +
  geom_line(size = 1) +
  labs(x = "Star Rating", y = "Average Quality Score") +
  scale_color_brewer(palette = "Set1") +
  theme_bw()

agg <- aggregate_Q_BY_spending(df)
tidy_agg <- tidyr::pivot_longer(agg, cols = c("Q30", "Q32", "Q34", "Q36", "Q38", "Q40"), names_to = "question", values_to = "average_score")

ggplot(tidy_agg, aes(x = Quality.of.patient.care.star.rating, y = average_score, color = question)) +
  geom_line(size = 1) +
  labs(x = "Star Rating", y = "Average Quality Score") +
  scale_color_brewer(palette = "Set1") +
  theme_bw()

Graphing “How Often” questions as box-plots in function of Star Rating

# Aggregate data and convert to tidy format
aggr <- aggregate_Q_BY_spending(df)
tidy_aggr <- tidyr::pivot_longer(aggr, cols = c("Q18", "Q20", "Q22", "Q24", "Q26", "Q28", "Q30", "Q32", "Q34", "Q36", "Q38", "Q40"), names_to = "Q", values_to = "value")

# Create box plot for each Q variable
ggplot(tidy_aggr, aes(x = Quality.of.patient.care.star.rating, y = value, fill = Q)) +
  geom_boxplot() +
  scale_fill_manual(values = rep(c("pink", "red", "blue"), 4)) +
  facet_wrap(~ Q, nrow = 3) +
  labs(x = "Star rating", y = "Mean score", fill = "Q") +
  theme(legend.position = "bottom")