load(file = "providerspokane.rda")
#preview the dataset
summary(providerspokane)
##  National.Provider.Identifier
##  Min.   :1.003e+09           
##  1st Qu.:1.276e+09           
##  Median :1.509e+09           
##  Mean   :1.516e+09           
##  3rd Qu.:1.771e+09           
##  Max.   :1.993e+09           
##                              
##                         Last.Name.Organization.Name.of.the.Provider
##  PATHOLOGY ASSOCIATES MEDICAL LABORATORIES LLC:  491               
##  MUELLER                                      :  109               
##  KING                                         :  105               
##  JOHNSON                                      :  103               
##  JONES                                        :   99               
##  LEE                                          :   99               
##  (Other)                                      :17972               
##  First.Name.of.the.Provider Middle.Initial.of.the.Provider
##         :  964                     :3583                  
##  MICHAEL:  630              M      :1571                  
##  JOHN   :  469              J      :1569                  
##  DAVID  :  434              A      :1506                  
##  ROBERT :  410              L      :1299                  
##  WILLIAM:  324              D      :1273                  
##  (Other):15747              (Other):8177                  
##  Credentials.of.the.Provider Gender.of.the.Provider Entity.Type.of.the.Provider
##  MD     :7497                 :  964                I:18014                    
##  M.D.   :4871                F: 5558                O:  964                    
##         :1322                M:12456                                           
##  PA-C   : 904                                                                  
##  ARNP   : 873                                                                  
##  D.O.   : 313                                                                  
##  (Other):3198                                                                  
##  Street.Address.1.of.the.Provider Street.Address.2.of.the.Provider
##  801 S STEVENS ST: 1663                    :10058                 
##  122 W 7TH AVE   : 1194           SUITE 200:  538                 
##  400 E 5TH AVE   : 1176           SUITE 310:  421                 
##  910 W 5TH AVE   : 1038           SUITE 100:  345                 
##  101 W 8TH AVE   :  820           SUITE 700:  311                 
##  105 W 8TH AVE   :  802           SUITE 400:  306                 
##  (Other)         :12285           (Other)  : 6999                 
##  City.of.the.Provider Zip.Code.of.the.Provider State.Code.of.the.Provider
##  SPOKANE:18978        992042654: 1663          WA:18975                  
##                       992021334: 1176          WI:    2                  
##                       992042349: 1125          WY:    1                  
##                       992042966: 1062                                    
##                       992042307:  702                                    
##                       992042302:  607                                    
##                       (Other)  :12643                                    
##  Country.Code.of.the.Provider              Provider.Type  
##  US:18978                     Diagnostic Radiology: 2414  
##                               Internal Medicine   : 1809  
##                               Family Practice     : 1541  
##                               Physician Assistant : 1240  
##                               Nurse Practitioner  : 1214  
##                               Hematology/Oncology :  745  
##                               (Other)             :10015  
##  Medicare.Participation.Indicator Place.of.Service   HCPCS.Code   
##  N:    2                          F: 6157          99213  :  820  
##  Y:18976                          O:12821          99214  :  742  
##                                                    99204  :  316  
##                                                    99203  :  312  
##                                                    99232  :  286  
##                                                    99212  :  261  
##                                                    (Other):16241  
##                                                                   HCPCS.Description
##  Established patient office or other outpatient visit, typically 15 minutes:  820  
##  Established patient office or other outpatient, visit typically 25 minutes:  742  
##  New patient office or other outpatient visit, typically 45 minutes        :  316  
##  New patient office or other outpatient visit, typically 30 minutes        :  312  
##  Subsequent hospital inpatient care, typically 25 minutes per day          :  286  
##  Established patient office or other outpatient visit, typically 10 minutes:  261  
##  (Other)                                                                   :16241  
##  HCPCS.Drug.Indicator Number.of.Services Number.of.Medicare.Beneficiaries
##  N:17844              Min.   :    11.0   Min.   :   11.0                 
##  Y: 1134              1st Qu.:    20.0   1st Qu.:   17.0                 
##                       Median :    42.0   Median :   31.0                 
##                       Mean   :   291.2   Mean   :  101.8                 
##                       3rd Qu.:   115.0   3rd Qu.:   71.0                 
##                       Max.   :155972.0   Max.   :70013.0                 
##                                                                          
##  Number.of.Distinct.Medicare.Beneficiary.Per.Day.Services
##  Min.   :    11.0                                        
##  1st Qu.:    19.0                                        
##  Median :    39.0                                        
##  Mean   :   154.1                                        
##  3rd Qu.:   101.0                                        
##  Max.   :155971.0                                        
##                                                          
##  Average.Medicare.Allowed.Amount Average.Submitted.Charge.Amount
##  Min.   :    0.019               Min.   :    0.23               
##  1st Qu.:   19.003               1st Qu.:   48.00               
##  Median :   56.170               Median :  132.00               
##  Mean   :  104.731               Mean   :  295.21               
##  3rd Qu.:  107.572               3rd Qu.:  276.00               
##  Max.   :29872.890               Max.   :45998.00               
##                                                                 
##  Average.Medicare.Payment.Amount Average.Medicare.Standardized.Amount
##  Min.   :    0.011               Min.   :    0.011                   
##  1st Qu.:   15.498               1st Qu.:   15.990                   
##  Median :   40.720               Median :   43.613                   
##  Mean   :   80.178               Mean   :   83.320                   
##  3rd Qu.:   81.984               3rd Qu.:   85.360                   
##  Max.   :23420.340               Max.   :21855.610                   
## 

If I did just ‘providerspokane’, it would print thousands of lines of code and take up most of an html file. So a summary of providerspokane provides an idea of the data.

Run Libraries

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.3
## ✓ tidyr   1.0.0     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
# Turns off the scientific notation in the graphs. 
options(scipen = 999)

Throughout the project, there were several graphs that had scientific notation for the intervaling on the y-axis. This forces R to not use scientifc notation. It provides a better understanding of the graphs and the numbering scale on the y-axis, specifically.

# Fills in an blanks with NA
providerspokane1 <- providerspokane %>% mutate_all(na_if,"")

Throughout the dataset there are several blanks, especially in the Gender of the Provider field which is used throughout this report. To fix that, replace all blanks with NA instead of having to repeat the process several times for each question.

#Reference of Total Counts:

Gender of Provider

# Creates a total count of each type of gender
GenderCount <- providerspokane1 %>%
  group_by(Gender.of.the.Provider) %>%
  summarise(Number=length(Gender.of.the.Provider))
## Warning: Factor `Gender.of.the.Provider` contains implicit NA, consider using
## `forcats::fct_explicit_na`
ggplot(GenderCount,aes(reorder(Gender.of.the.Provider,Number),Number)) + geom_bar(stat="identity", fill = "grey") + 
  labs(x="Gender", y='Count') + 
  ggtitle('Total Count of Each Type of Gender')

Provider Type

# Creates a total count of each type of place of service
TypeCount <- providerspokane1 %>%
  group_by(Provider.Type) %>%
  summarise(Number=length(Provider.Type))
ggplot(TypeCount,aes(reorder(Provider.Type,Number),Number)) + geom_bar(stat="identity", fill = "grey") + 
  labs(x="Provider Type", y='Count') + 
  ggtitle('Total Count of Each Type of Provider Type') +
  theme(axis.text.x = element_text(angle = 90))

Place of Service

F: Facility O: Non-Facility

# Creates a total count of each type of place of service
PlaceServiceCount <- providerspokane1 %>%
  group_by(Place.of.Service) %>%
  summarise(Number=length(Place.of.Service))
ggplot(PlaceServiceCount,aes(reorder(Place.of.Service,Number),Number)) + geom_bar(stat="identity", fill = "grey") + 
  labs(x="Place of Service", y='Count') + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) +
  ggtitle('Total Count of Each Type of Place of Service')

NOTE: The ‘F’ in the dataset stands for Facilities. The ‘O’ stands for Non-facilities, such as offices.

1

Study how the distributions and means of the number of services, distinct beneficiary per day services, average medicare submitted charged amounts, average medicare paid amount vary as a function of Gender, the Provider Type, and Place of Service, and type of service that is being provided.

Distribution and Means of Number of Services

Gender

ggplot(providerspokane1,aes(Gender.of.the.Provider,Number.of.Services)) + 
  geom_bar(stat="identity", fill = "mediumorchid1") +  
  labs(x="Gender", y='Number of Services') + 
  ggtitle('Gender and Total Number of Services')

This graph shows the gender as a total count of the providers as to the number of providers. In the graph, males had the highest number of services provided, whereas females had the lowest. The problem with this graphic is the high number of NA for gender, so it does not necessarily provide the most accurate account of gender verus number of services.

Gender_Services <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Gender.of.the.Provider) %>%   # the grouping variable
  summarise(mean_services = mean(Number.of.Services))  # calculates the mean of each group
## Warning: Factor `Gender.of.the.Provider` contains implicit NA, consider using
## `forcats::fct_explicit_na`
ggplot(Gender_Services,aes(Gender.of.the.Provider,mean_services)) + 
  geom_bar(stat="identity", fill = "mediumorchid1") +  
  labs(x="Gender", y='Average Number of Services') + 
  ggtitle('Gender and Average Number of Services')

This average shows the amount each individual person would have to do. The NA person has the highest average of services, where males have the lowest. Since there are so many males as seen with the count above, the average is much lower.

Note: There is a scale difference on the y-axis between the Average Number of Services and the Number of Services between the two graphs.

Provider Type

ggplot(providerspokane1,aes(Provider.Type,Number.of.Services)) + 
  geom_bar(stat="identity", fill = "mediumorchid2") +  
  labs(x="Provider Type", y='Number of Services') + 
  ggtitle('Provider Type and Total Number of Services') +
  theme(axis.text.x = element_text(angle = 90))

This is the intial graph showing the total number of services by the provider type. The problem with this graphic is the all the labels due to the amount of different provider types. If I were to break this graph apart, I would consider separating it down by focus of anatomy, such as lungs, bones, heart, etc. This would provide a more accruate showcase between the provider types. When it comes to the total number of services, Hand Surgery and Chirporactic are the two highest provider types.

Type_Services <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Provider.Type) %>%   # the grouping variable
  summarise(mean_services = mean(Number.of.Services))  # calculates the mean of each group
ggplot(Type_Services,aes(Provider.Type,mean_services)) + 
  geom_bar(stat="identity", fill = "mediumorchid2") +  
  labs(x="Provider Type", y='Average Number of Services') + 
  ggtitle('Provider Type and Average Number of Services') +
  theme(axis.text.x = element_text(angle = 90))

When it comes to the average, the Ambulance Service Supplier had the highest for the provider type.

Place of Service

ggplot(providerspokane1,aes(Place.of.Service,Number.of.Services)) + geom_bar(stat="identity", fill = "mediumorchid2") + 
  labs(x="Place of Service", y='Number of Services')  + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) +
  ggtitle('Place of Service and Total Number of Services')

F = facility and O = non-facility (office space). This count shows that the ‘O’ place of service provided a higher number of services over the ‘F’ place of service. It makes me wonder whether there is a higher number of F or O type place of service. Below I performed a total count of each type of Place of Service. When it comes to the total count, the Non-Facility (O) has a higher count total. This would explain why there is such a large difference between the total number of services at each type of place of service. Although the two graphs looks the same, it is important to note that the count interval is different between the reference graph of total count and this graph.

Place_Services <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Place.of.Service) %>%   # the grouping variable
  summarise(mean_Services = mean(Number.of.Services))  # calculates the mean of each group
ggplot(Place_Services,aes(Place.of.Service,mean_Services)) + geom_bar(stat="identity", fill = "mediumorchid2") + 
  labs(x="Place of Service", y='Average Number of Services') + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) + 
  ggtitle('Place of Service and Average Number of Services')

When it comes to the total number of of service and the average number of services for the place of service, the non-facilities (O) stays at a higher rate than the F facilities. Each non-facility has a higher average number of services when compared to a facility.

Distribution and Means of Distinct Beneficiary per Day Services

A Medicare Beneficiary is an individual who is entitled to benefits under the Medicare plan.

Gender

ggplot(providerspokane1,aes(Gender.of.the.Provider,Number.of.Distinct.Medicare.Beneficiary.Per.Day.Services)) + geom_bar(stat="identity", fill = "aquamarine2") +  
  labs(x="Gender", y='Number of Distinct Beneficiary') + 
  ggtitle('Gender and Total Number of Distinct Beneficiary')

When it comes to the number of distinct medicare benficiary per day servcies the female has the least amount, whereas the NA has the most. It is intersting to note that the NA field is the largest, which showcases that the gender of the provider does not truly affect the total number of distinct medicare benficiary per day services. The gender is irrelevant to the total. The reason the Male count is much higher could even be due to the total number of male providers being much higher than the total female providers.

Gender_Distinct <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Gender.of.the.Provider) %>%   # the grouping variable
  summarise(mean_distinct = mean(Number.of.Distinct.Medicare.Beneficiary.Per.Day.Services))  # calculates the mean of each group
## Warning: Factor `Gender.of.the.Provider` contains implicit NA, consider using
## `forcats::fct_explicit_na`
ggplot(Gender_Distinct,aes(Gender.of.the.Provider,mean_distinct)) + geom_bar(stat="identity", fill = "aquamarine2") +  
  labs(x="Gender", y='Average Number of Distinct Beneficiary') + 
  ggtitle('Gender and Average Number of Distinct Beneficiary')

The NA field for Gender has the highest average number of distinct beneficiary. It also had the highest total count.

Provider Type

ggplot(providerspokane1,aes(Provider.Type,Number.of.Distinct.Medicare.Beneficiary.Per.Day.Services)) + geom_bar(stat="identity", fill = "aquamarine2") +  
  labs(x="Provider Type", y='Number of Distinct Beneficiary') + 
  ggtitle('Provider Type and Total Number of Distinct Beneficiary') +
  theme(axis.text.x = element_text(angle = 90))

When it comes to the number of distinct medicare benficiary per day servcies, there seems to be a relatively even spread throughout the data. There is one outlier, which has nearly a 120,000 distict medicare benficiary, which is the Chiropractic provider type. It appears that most all the other provider types are more specialized and occasional visits. The chiropractic is a more common and more regularly scheduled provider type, which would explain the higher count.

Type_Distinct <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Provider.Type) %>%   # the grouping variable
  summarise(mean_distinct = mean(Number.of.Distinct.Medicare.Beneficiary.Per.Day.Services))  # calculates the mean of each group
ggplot(Type_Distinct,aes(Provider.Type,mean_distinct)) + geom_bar(stat="identity", fill = "aquamarine2") +  
  labs(x="Provider Type", y='Average Number of Distinct Beneficiary') + 
  ggtitle('Provider Type and Average Number of Distinct Beneficiary') +
  theme(axis.text.x = element_text(angle = 90))

The Chiropractic provider type still has one of the outliers for the average number of distinct beneficiary, but does not have the highest. The Ambulance Service Supplier once again, has the highest average number.

Place of Service

ggplot(providerspokane1,aes(Place.of.Service,Number.of.Distinct.Medicare.Beneficiary.Per.Day.Services)) + geom_bar(stat="identity", fill = "aquamarine2") +  
  labs(x="Place of Service", y='Number of Distinct Beneficiary') + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) +
  ggtitle('Place of Service and Total Number of Distinct Beneficiary')

As mentioned previously, O are the non-facilities such as offices. These would host a larger number of distinct beneficiaries.

Place_Distinct <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Place.of.Service) %>%   # the grouping variable
  summarise(mean_distinct = mean(Number.of.Services))  # calculates the mean of each group
ggplot(Place_Distinct,aes(Place.of.Service,mean_distinct)) + geom_bar(stat="identity", fill = "aquamarine2") +  
  labs(x="Place of Service", y='Average Number of Distinct Beneficiary') + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) +
  ggtitle('Place of Service and Average Number of Distinct Beneficiary')

The ratio seems to follow the total count of each place of service. The non-facilities (O) have a higher average of distinct medicare benficiary per day of services.

Note: There is a difference of scale on the y-axis.

Distribution and Means of Average Medicare Submitted Charged Amounts

Gender

ggplot(providerspokane1,aes(Gender.of.the.Provider,Average.Submitted.Charge.Amount)) + geom_bar(stat="identity", fill = "gold2") +  
  labs(x="Gender", y='Total Average Submitted Charged Amounts') + 
  ggtitle('Gender and Total Average Submitted Charged Amounts')

When it comes to the average submitted charge amount, the males have the highest total of the averages. The NA had the lowest total.

Gender_Submitted <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Gender.of.the.Provider) %>%   # the grouping variable
  summarise(mean_submitted = mean(Average.Submitted.Charge.Amount))  # calculates the mean of each group
## Warning: Factor `Gender.of.the.Provider` contains implicit NA, consider using
## `forcats::fct_explicit_na`
ggplot(Gender_Submitted,aes(Gender.of.the.Provider,mean_submitted)) + geom_bar(stat="identity", fill = "gold2") +  
  labs(x="Gender", y='Average Submitted Charged Amounts') + 
  ggtitle('Gender and Average Submitted Charged Amounts')

This graph shows the average of the average submitted charged amounts, which means the average each person would have to submit. The NA gender for the provider shows that they would have to have the highest average submmited charged amount due to the small total number in the dataset of NA gender providers.

Provider Type

ggplot(providerspokane1,aes(Provider.Type,Average.Submitted.Charge.Amount)) + geom_bar(stat="identity", fill = "gold2") +  
  labs(x="Provider Type", y='Total Average Submitted Charged Amounts') + 
  ggtitle('Provider Type and Total Average Submitted Charged Amounts') +
  theme(axis.text.x = element_text(angle = 90))

The Provider Type and the Average Submitted Charged Amounts has a wide range. Dermatology has the highest total average submitted charged amount. When looking at the reference graph of total of each provider type, dermatology is slightly above the middle of the range. So it is odd that it has such as high average. It could be the pricing strucutre of the provider type that creates such a high total average.

Type_Submitted <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Provider.Type) %>%   # the grouping variable
  summarise(mean_submitted = mean(Average.Submitted.Charge.Amount))  # calculates the mean of each group
ggplot(Type_Submitted,aes(Provider.Type,mean_submitted)) + geom_bar(stat="identity", fill = "gold2") +  
  labs(x="Provider Type", y='Average Submitted Charged Amounts') + 
  ggtitle('Provider Type and Average Submitted Charged Amounts') +
  theme(axis.text.x = element_text(angle = 90))

There are two noticably higher averages for provider types Ambulance Service Supplier and Ambulatory Surgical Center. When looking at the reference chart, the ambulance service supplier has a small and barely visible total. It would explain why a high average would be need in order to reach the total average in the first graph.

Place of Service

ggplot(providerspokane1,aes(Place.of.Service,Average.Submitted.Charge.Amount)) + geom_bar(stat="identity", fill = "gold2") +  
  labs(x="Place of Service", y='Total Average Submitted Charged Amounts') + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) +
  ggtitle('Place of Service and Total Average Submitted Charged Amounts')

This is the first case where the facility place of service has a higher category. Since the facility place of service includes more medical type practices, it would explain why the total average submitted charged amounts are much higher than that of the non-facilities.

Place_Submitted <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Place.of.Service) %>%   # the grouping variable
  summarise(mean_submitted = mean(Average.Submitted.Charge.Amount))  # calculates the mean of each group
ggplot(Place_Submitted,aes(Place.of.Service,mean_submitted)) + geom_bar(stat="identity", fill = "gold2") +  
  labs(x="Place of Service", y='Average Submitted Charged Amounts') + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) +
  ggtitle('Place of Service and Average Submitted Charged Amounts')

The facility place of service has the highest average submitted charged amounts due to the the fact that there are less of these as a place of service (see reference chart). If you take the total average and divide it by the total of each type, the facility will have a higher submitted charged amount.

Distribution and Means of Average Medicare Paid Amount

Gender

ggplot(providerspokane1,aes(Gender.of.the.Provider,Average.Medicare.Payment.Amount)) + geom_bar(stat="identity", fill = "firebrick3") +  
  labs(x="Gender", y='Average Medicare Paid Amount') + 
  ggtitle('Gender and Average Medicare Paid Amount')

The male has the hgihest total average medicare payment amount. The NA has the smallest amount.

Gender_Paid <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Gender.of.the.Provider) %>%   # the grouping variable
  summarise(mean_paid = mean(Average.Medicare.Payment.Amount))  # calculates the mean of each group
## Warning: Factor `Gender.of.the.Provider` contains implicit NA, consider using
## `forcats::fct_explicit_na`
ggplot(Gender_Paid,aes(Gender.of.the.Provider,mean_paid)) + geom_bar(stat="identity", fill = "firebrick3") +  
  labs(x="Gender", y='Average Medicare Paid Amount') + 
  ggtitle('Gender and Average of Average Medicare Paid Amount')

It is important to note that althought the NA group had the lowest total, it also has the lowest total count (see reference chart). This explains why the average has to be higher in order to meet the total. Since the male is the largest grouping, they have a smaller average because there are so many to fulfill the total average.

Provider Type

ggplot(providerspokane1,aes(Provider.Type,Average.Medicare.Payment.Amount)) + geom_bar(stat="identity", fill = "firebrick3") +  
  labs(x="Provider Type", y='Total Average Medicare Paid Amount') + 
  ggtitle('Provider Type and Total Average Medicare Paid Amount') +
  theme(axis.text.x = element_text(angle = 90))

When comparing the distribution of the provider type and total average medicare paid amount, there is a much larger range. Thare are several notable peaks, such as the Ambulatory Surgical Center, Diagnostic Radiologu, and Infectious Disease.

Type_Paid <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Provider.Type) %>%   # the grouping variable
  summarise(mean_paid = mean(Average.Medicare.Payment.Amount))  # calculates the mean of each group
ggplot(Type_Paid,aes(Provider.Type,mean_paid)) + geom_bar(stat="identity", fill = "firebrick3") +  
  labs(x="Provider Type", y='Average Medicare Paid Amount') + 
  ggtitle('Provider Type and Average Medicare Paid Amount') +
  theme(axis.text.x = element_text(angle = 90))

After averaging the totals, the range is more steady. The Ambulance Service Supplier and the Ambulatory Surgical Center have the two highest peaks. Both those two provider types have a smaller total count of locations. This would explain why they have higher averages paid due to the high total average.

Place of Service

ggplot(providerspokane1,aes(Place.of.Service,Average.Medicare.Payment.Amount)) + geom_bar(stat="identity", fill = "firebrick3") +  
  labs(x="Place of Service", y='Total Average Medicare Paid Amount') + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) +
  ggtitle('Place of Service and Total Average Medicare Paid Amount')

The total average medicare paid amount between the two types of place of service are quite close, with about a 75,000 difference. Of all the comparisons, this has been the closest.

Place_Paid <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Place.of.Service) %>%   # the grouping variable
  summarise(mean_paid = mean(Average.Medicare.Payment.Amount))  # calculates the mean of each group
ggplot(Place_Paid,aes(Place.of.Service,mean_paid)) + 
  geom_bar(stat="identity", fill = "firebrick3") +  
  labs(x="Place of Service", y='Average Medicare Paid Amount') + scale_x_discrete(labels=c("Facilities", "Non-Facilities")) +
  ggtitle('Place of Service and Average Medicare Paid Amount')

Although the totals were close, the averages is quite different between the place of services. This has to do with the total of each type of place of service (see reference). Since there are so many non-facilities (O), each has a smaller average medicare paid amount.

2

Study the relationship between average submitted charge amount and average medicare payment amount and analyze how the relationship of these variables varies as a function of the number of services, distinct beneficiary per day services, and Provider Type.

Average Submitted Charge Amount vs. Average Medicare Payment Amount

# Creates a total count average submitted charge amount
Submitted <- sum(as.numeric(providerspokane1$Average.Submitted.Charge.Amount))

Submitted
## [1] 5602506
# Creates a total count average medicare payment amount
Payment <- sum(as.numeric(providerspokane1$Average.Medicare.Payment.Amount))

Payment
## [1] 1521614
# Create vectors
averages <- c('Submitted','Payment')
totals <- c(sum(as.numeric(providerspokane1$Average.Submitted.Charge.Amount)), sum(as.numeric(providerspokane1$Average.Medicare.Payment.Amount)))
# Combine into dataframe
average.data <- data.frame(averages, totals)

average.data
##    averages  totals
## 1 Submitted 5602506
## 2   Payment 1521614
ggplot(average.data,aes(averages,totals)) + 
  geom_bar(stat="identity", fill = "Maroon") +  
  labs(x="Averages", y='Totals') + 
  ggtitle('Total Submitted vs. Payment Amount')

This graph shows the difference between the Medicare Payment Amount and the Submitted Charge Amount. This shows that the Medicare Payment Amount is minimal when compared to the actual submitted charge amount. The Medicare Payment Amount does not even cover half the Submitted Charge Amount.

Number of Services

# Selecting the columns needed
services_average <- providerspokane1 %>%
  select(Number.of.Services, Average.Submitted.Charge.Amount, Average.Medicare.Payment.Amount)
ggplot(providerspokane1, aes(x=Average.Submitted.Charge.Amount, y=Average.Medicare.Payment.Amount, color=Number.of.Services)) + geom_point(size = 1,alpha=.1) + geom_smooth(method = "loess", color ='Grey') + labs(y = "Average Medicare Payment Amount", x = "Average Submitted Charge Amount", color = "Number of Services") +ggtitle("Average Payment vs. Average Submitted by Number of Services")

Distinct Beneficiary Per Day Services

ggplot(providerspokane1, aes(x=Average.Submitted.Charge.Amount, y=Average.Medicare.Payment.Amount, color=Number.of.Distinct.Medicare.Beneficiary.Per.Day.Services)) + geom_point(size = 1,alpha=.1) + geom_smooth(method = "loess", color ='Grey') + labs(y = "Average Medicare Payment Amount", x = "Average Submitted Charge Amount", color = "Number of Distinct Medicare Beneficiary") +ggtitle("Average Payment vs. Average Submitted by Number of Distinct Beneficiary")

Provider Type

Type_Ave <- providerspokane1 %>% # the names of the new data frame and the data frame to be summarised
  group_by(Provider.Type) %>%   # the grouping variable
  summarise(Average.Paid = mean(Average.Medicare.Payment.Amount),Average.Submitted = mean(Average.Submitted.Charge.Amount))  # calculates the mean of each group
# Pivots Longer to create the one Category of Kind of Amount
Type_Ave2 <- Type_Ave %>%
  pivot_longer(c(`Average.Paid`, `Average.Submitted`), names_to = "Kind", values_to = "Average")
ggplot(Type_Ave2,aes(x=Provider.Type, y=Average, fill=Kind))  + geom_bar(stat="identity", position = "dodge") + 
  theme(axis.text.x = element_text(angle = 90))+ labs(y = "Average Medicare Payment Amount", x = "Average Submitted Charge Amount", color = "Kind") +ggtitle("Average Payment vs. Average Submitted by Number of Distinct Beneficiary")

This graph shows the comparison of the averages the Average Medicare Payment Amount and the Average Submited Charge Amount by provider type. As can be seen, the Average Submitted Charge Amount is drastically higher than the Average Medicare Payment Amount.