Descriptive Statistics
library(readxl)
ratemdsfinal <- read_excel("ratemdsfinal.xlsx")
## New names:
## * `` -> ...1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(xtable)
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.2. https://CRAN.R-project.org/package=stargazer
library(knitr)
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
colSums(is.na(ratemdsfinal))
## ...1 Doctor_names
## 0 0
## Gender Years of Experience
## 0 59
## Doctor_reviews Rating
## 0 0
## Review_sentences Count of Sentiment
## 0 0
## Sum of Pos Score Count of Positive Sentiment
## 0 0
## Positive_Proportion Average pos_score
## 0 0
## Sum of Neg Score Negative_Proportion
## 0 0
## Count of Negative Sentiment Average neg_score
## 0 0
## Communication_positive Expertisepositive
## 0 0
## Timepositive Bedside_positive
## 0 0
## Officepositive Costpositive
## 0 0
## Communication_negative Expertisenegative
## 0 0
## Time_negative Bedside_negative
## 0 0
## Office_negative Cost_negative
## 0 0
## Word Count Review Count
## 0 0
## Phrase Count Average words per review
## 0 0
## Overall_score
## 0
ratemdsfinal$`Years of Experience`[is.na(ratemdsfinal$`Years of Experience`)]=round(mean(ratemdsfinal$`Years of Experience`,na.rm = T))
# ghetting only the numeric columsn
num_cols <- unlist(lapply(ratemdsfinal, is.numeric))
rate <- ratemdsfinal[, num_cols]
rate$...1<- NULL
str(rate)
## tibble [793 × 28] (S3: tbl_df/tbl/data.frame)
## $ Years of Experience : num [1:793] 21 15 26 21 27 38 49 36 25 26 ...
## $ Rating : num [1:793] 4.85 4.47 4.92 4.89 4.89 5 4.44 4.75 4.93 4.78 ...
## $ Count of Sentiment : num [1:793] 14 23 9 9 9 7 19 11 7 8 ...
## $ Sum of Pos Score : num [1:793] 9.81 18.01 7.11 8.41 6.54 ...
## $ Count of Positive Sentiment: num [1:793] 12 21 8 9 9 7 15 10 7 8 ...
## $ Positive_Proportion : num [1:793] 0.857 0.913 0.889 1 1 ...
## $ Average pos_score : num [1:793] 0.817 0.858 0.889 0.934 0.726 ...
## $ Sum of Neg Score : num [1:793] -0.9394 -0.6902 -0.0018 0 0 ...
## $ Negative_Proportion : num [1:793] 0.143 0.087 0.111 0 0 ...
## $ Count of Negative Sentiment: num [1:793] 2 2 1 0 0 0 2 1 0 0 ...
## $ Average neg_score : num [1:793] -0.4697 -0.3451 -0.0018 0 0 ...
## $ Communication_positive : num [1:793] 0.0323 0.0876 0 0.0513 0.1 ...
## $ Expertisepositive : num [1:793] 0.145 0.073 0.139 0.103 0.267 ...
## $ Timepositive : num [1:793] 0.0806 0.0438 0.0556 0.1538 0.1667 ...
## $ Bedside_positive : num [1:793] 0 0.19 0.194 0.103 0.167 ...
## $ Officepositive : num [1:793] 0.0645 0.0949 0.1111 0.0513 0.0333 ...
## $ Costpositive : num [1:793] 0.0161 0.073 0.0833 0 0.0667 ...
## $ Communication_negative : num [1:793] 0.0484 0.0438 0.0278 0 0 ...
## $ Expertisenegative : num [1:793] 0.0645 0.0219 0.0278 0 0.0333 ...
## $ Time_negative : num [1:793] 0.0968 0.0511 0 0 0 ...
## $ Bedside_negative : num [1:793] 0.0161 0.0219 0 0 0 ...
## $ Office_negative : num [1:793] 0.0323 0.0365 0 0 0 ...
## $ Cost_negative : num [1:793] 0 0.0219 0 0 0 ...
## $ Word Count : num [1:793] 523 1604 288 358 280 ...
## $ Review Count : num [1:793] 14 23 9 9 9 7 19 11 7 8 ...
## $ Phrase Count : num [1:793] 67 165 38 42 48 32 133 93 41 67 ...
## $ Average words per review : num [1:793] 37 70 32 40 31 36 69 100 38 77 ...
## $ Overall_score : num [1:793] 0.348 0.513 0.887 0.934 0.726 ...
names(rate)[1]<- "Exp"
# Print Descriptive Statistics for RateMDs
kable(xtable(describe(rate)[c(3,4,5,8,9,11)]))
|
|
mean
|
sd
|
median
|
min
|
max
|
skew
|
|
Exp
|
25.9394704
|
10.7023017
|
26.0000000
|
2.0000
|
56.0000
|
0.2925266
|
|
Rating
|
3.9887516
|
1.1456304
|
4.5000000
|
1.0000
|
5.0000
|
-1.0690984
|
|
Count of Sentiment
|
2.9621690
|
3.2622855
|
2.0000000
|
1.0000
|
30.0000
|
3.0905276
|
|
Sum of Pos Score
|
1.6952861
|
2.0505289
|
0.9493000
|
0.0000
|
18.0120
|
2.9120548
|
|
Count of Positive Sentiment
|
2.1601513
|
2.5047079
|
1.0000000
|
0.0000
|
21.0000
|
2.8523571
|
|
Positive_Proportion
|
0.7329744
|
0.3628249
|
1.0000000
|
0.0000
|
1.0000
|
-1.0791118
|
|
Average pos_score
|
0.6597428
|
0.3194366
|
0.7778000
|
0.0000
|
0.9923
|
-1.1775197
|
|
Sum of Neg Score
|
-0.4450890
|
0.8923222
|
0.0000000
|
-10.8437
|
0.0000
|
-4.3611618
|
|
Negative_Proportion
|
0.2263399
|
0.3439964
|
0.0000000
|
0.0000
|
1.0000
|
1.3315686
|
|
Count of Negative Sentiment
|
0.6935687
|
1.2951625
|
0.0000000
|
0.0000
|
15.0000
|
4.1368418
|
|
Average neg_score
|
-0.2453372
|
0.3380014
|
0.0000000
|
-0.9810
|
0.0000
|
-0.8781404
|
|
Communication_positive
|
0.0519260
|
0.1105353
|
0.0000000
|
0.0000
|
1.0000
|
4.0196863
|
|
Expertisepositive
|
0.1172052
|
0.1680621
|
0.0666667
|
0.0000
|
1.0000
|
2.6532162
|
|
Timepositive
|
0.0530472
|
0.1028384
|
0.0000000
|
0.0000
|
1.0000
|
3.2624407
|
|
Bedside_positive
|
0.1393430
|
0.1792870
|
0.1000000
|
0.0000
|
1.0000
|
2.2700317
|
|
Officepositive
|
0.0567404
|
0.1138064
|
0.0000000
|
0.0000
|
1.0000
|
3.5556046
|
|
Costpositive
|
0.0388190
|
0.0780689
|
0.0000000
|
0.0000
|
0.5000
|
2.6948463
|
|
Communication_negative
|
0.0427782
|
0.0923958
|
0.0000000
|
0.0000
|
1.0000
|
3.9127936
|
|
Expertisenegative
|
0.0620597
|
0.1284017
|
0.0000000
|
0.0000
|
1.0000
|
3.9608482
|
|
Time_negative
|
0.0499659
|
0.1058386
|
0.0000000
|
0.0000
|
1.0000
|
3.8477383
|
|
Bedside_negative
|
0.0335470
|
0.1045708
|
0.0000000
|
0.0000
|
1.0000
|
6.1851972
|
|
Office_negative
|
0.0337728
|
0.0817816
|
0.0000000
|
0.0000
|
0.6000
|
3.6060950
|
|
Cost_negative
|
0.0140710
|
0.0598381
|
0.0000000
|
0.0000
|
1.0000
|
8.8894980
|
|
Word Count
|
169.6973518
|
250.4917975
|
78.0000000
|
0.0000
|
2350.0000
|
3.5365532
|
|
Review Count
|
2.9621690
|
3.2622855
|
2.0000000
|
1.0000
|
30.0000
|
3.0905276
|
|
Phrase Count
|
17.4186633
|
23.0809676
|
9.0000000
|
1.0000
|
195.0000
|
3.4406515
|
|
Average words per review
|
52.6557377
|
53.6781351
|
40.0000000
|
0.0000
|
737.0000
|
5.8080336
|
|
Overall_score
|
0.4144056
|
0.5385908
|
0.6249000
|
-0.9810
|
0.9914
|
-0.9302287
|
# Checking for Normality
#library(ggpubr)
#ggqqplot(rate$Rating)
#shapiro.test(rate$Rating)
#ggqqplot(rate$Positive_Proportion)
#shapiro.test(rate$Positive_Proportion)
#ggqqplot(rate$Negative_Proportion)
#shapiro.test(rate$Negative_Proportion)
#ggqqplot(rate$`Average pos_score`)
#shapiro.test(rate$`Average pos_score`)
#ggqqplot(rate$`Average neg_score`)
#shapiro.test(rate$`Average neg_score`)
## Multiple t-tests for RateMDs
a<- lapply(rate, function(x) t.test(x ~ ratemdsfinal$Gender, var.equal = TRUE))
# T-Test for RateMDs with Gender
print(a)
## $Exp
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -3.8797, df = 791, p-value = 0.0001133
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.406922 -1.445720
## sample estimates:
## mean in group Female mean in group Male
## 24.54458 27.47090
##
##
## $Rating
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -1.1734, df = 791, p-value = 0.241
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.25540488 0.06430129
## sample estimates:
## mean in group Female mean in group Male
## 3.943205 4.038757
##
##
## $`Count of Sentiment`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -2.2132, df = 791, p-value = 0.02717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.96627433 -0.05789856
## sample estimates:
## mean in group Female mean in group Male
## 2.718072 3.230159
##
##
## $`Sum of Pos Score`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -2.6979, df = 791, p-value = 0.007126
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.6768438 -0.1067308
## sample estimates:
## mean in group Female mean in group Male
## 1.508533 1.900320
##
##
## $`Count of Positive Sentiment`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -2.7495, df = 791, p-value = 0.006104
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.8357665 -0.1394997
## sample estimates:
## mean in group Female mean in group Male
## 1.927711 2.415344
##
##
## $Positive_Proportion
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.34504, df = 791, p-value = 0.7302
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05957205 0.04176041
## sample estimates:
## mean in group Female mean in group Male
## 0.7287292 0.7376351
##
##
## $`Average pos_score`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 1.9515e-05, df = 791, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04461023 0.04461111
## sample estimates:
## mean in group Female mean in group Male
## 0.6597430 0.6597426
##
##
## $`Sum of Neg Score`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.28433, df = 791, p-value = 0.7762
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1426599 0.1065606
## sample estimates:
## mean in group Female mean in group Male
## -0.4536928 -0.4356431
##
##
## $Negative_Proportion
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 0.71821, df = 791, p-value = 0.4728
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03045368 0.06559611
## sample estimates:
## mean in group Female mean in group Male
## 0.2347156 0.2171444
##
##
## $`Count of Negative Sentiment`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 0.0092704, df = 791, p-value = 0.9926
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1800207 0.1817291
## sample estimates:
## mean in group Female mean in group Male
## 0.6939759 0.6931217
##
##
## $`Average neg_score`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.036431, df = 791, p-value = 0.9709
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04807935 0.04632724
## sample estimates:
## mean in group Female mean in group Male
## -0.2457548 -0.2448787
##
##
## $Communication_positive
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.84318, df = 791, p-value = 0.3994
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.022057602 0.008801988
## sample estimates:
## mean in group Female mean in group Male
## 0.04876669 0.05539450
##
##
## $Expertisepositive
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.21835, df = 791, p-value = 0.8272
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02608057 0.02085918
## sample estimates:
## mean in group Female mean in group Male
## 0.1159607 0.1185714
##
##
## $Timepositive
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 0.75394, df = 791, p-value = 0.4511
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.008842558 0.019870758
## sample estimates:
## mean in group Female mean in group Male
## 0.05567561 0.05016151
##
##
## $Bedside_positive
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 1.1781, df = 791, p-value = 0.2391
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01000301 0.04002949
## sample estimates:
## mean in group Female mean in group Male
## 0.1464993 0.1314861
##
##
## $Officepositive
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.5259, df = 791, p-value = 0.5991
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02014804 0.01163348
## sample estimates:
## mean in group Female mean in group Male
## 0.05471113 0.05896841
##
##
## $Costpositive
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 1.7647, df = 791, p-value = 0.078
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.001099019 0.020663489
## sample estimates:
## mean in group Female mean in group Male
## 0.04348194 0.03369971
##
##
## $Communication_negative
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.09706, df = 791, p-value = 0.9227
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01354141 0.01226538
## sample estimates:
## mean in group Female mean in group Male
## 0.04247408 0.04311209
##
##
## $Expertisenegative
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -1.4673, df = 791, p-value = 0.1427
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.031293214 0.004521759
## sample estimates:
## mean in group Female mean in group Male
## 0.05567915 0.06906487
##
##
## $Time_negative
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -1.547, df = 791, p-value = 0.1223
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.026389721 0.003127285
## sample estimates:
## mean in group Female mean in group Male
## 0.04442160 0.05605282
##
##
## $Bedside_negative
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 1.3822, df = 791, p-value = 0.1673
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004315428 0.024856885
## sample estimates:
## mean in group Female mean in group Male
## 0.03844281 0.02817208
##
##
## $Office_negative
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 0.2261, df = 791, p-value = 0.8212
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01010528 0.01273628
## sample estimates:
## mean in group Female mean in group Male
## 0.03439982 0.03308432
##
##
## $Cost_negative
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = 0.66558, df = 791, p-value = 0.5059
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.005521611 0.011187012
## sample estimates:
## mean in group Female mean in group Male
## 0.01542129 0.01258859
##
##
## $`Word Count`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -1.8513, df = 791, p-value = 0.0645
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -67.827806 1.985592
## sample estimates:
## mean in group Female mean in group Male
## 154.0048 186.9259
##
##
## $`Review Count`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -2.2132, df = 791, p-value = 0.02717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.96627433 -0.05789856
## sample estimates:
## mean in group Female mean in group Male
## 2.718072 3.230159
##
##
## $`Phrase Count`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -2.0798, df = 791, p-value = 0.03787
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.6204551 -0.1912998
## sample estimates:
## mean in group Female mean in group Male
## 15.79518 19.20106
##
##
## $`Average words per review`
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.1802, df = 791, p-value = 0.857
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.184387 6.808063
## sample estimates:
## mean in group Female mean in group Male
## 52.32771 53.01587
##
##
## $Overall_score
##
## Two Sample t-test
##
## data: x by ratemdsfinal$Gender
## t = -0.022851, df = 791, p-value = 0.9818
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07609207 0.07434085
## sample estimates:
## mean in group Female mean in group Male
## 0.4139883 0.4148639
Statistics based on Gender
#str(ratemdsfinal)
ratemdsfinal$Gender<- factor(ratemdsfinal$Gender)
rate %>% #plot rating over Gender
ggplot(aes(x=Rating,fill=ratemdsfinal$Gender))+
geom_density(alpha=.4,position="identity")+
labs(title = "Density of Rating by Gender on RateMDs\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

rate %>% #plot Positive_Proportion over Gender
ggplot(aes(x=Positive_Proportion,fill=ratemdsfinal$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Positive Proportion by Gender on RateMDs\n(blue=Female, red=Male)",x="Positive_Proportion",y = "Density of Positive Proportion")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

rate %>% #plot Negative_Proportion over Gender
ggplot(aes(x=Negative_Proportion,fill=ratemdsfinal$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Negative_Proportion by Gender on RateMDs\n(blue=Female, red=Male)",x="Negative_Proportion",y = "Density of Negative_Proportion")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

rate %>% #plot `Average neg_score` over Gender
ggplot(aes(x=`Average neg_score`,fill=ratemdsfinal$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of `Average neg_score` by Gender on RateMDs\n(blue=Female, red=Male)",x="`Average neg_score`",y = "Density of `Average neg_score`")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

rate %>% #plot `Average pos_score` over Gender
ggplot(aes(x=`Average pos_score`,fill=ratemdsfinal$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of `Average pos_score` by Gender on RateMDs\n(blue=Female, red=Male)",x="`Average pos_score`",y = "Density of `Average pos_score`")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

ratemdsfinal %>% #plot Years of Experience over Gender
ggplot(aes(x=`Years of Experience`,fill=ratemdsfinal$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Years of Experience by Gender on RateMDs\n(blue=Female, red=Male)",x="Years of Experience - in years",y = "Density of Years of Experience")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter
## Warning: Use of `ratemdsfinal$Gender` is discouraged. Use `Gender` instead.

rate %>% #plot Overall_score over Gender
ggplot(aes(x=Overall_score,fill=ratemdsfinal$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Overall_score by Gender on RateMDs\n(blue=Female, red=Male)",x="Overall_score",y = "Density of Overall_score")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter
## Statistics by No of Years of Experience
summary(ratemdsfinal$`Years of Experience`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 19.00 26.00 25.94 33.00 56.00
ratemdsfinal$`Years of Experience` <- as.factor(ratemdsfinal$`Years of Experience`)
levels(ratemdsfinal$`Years of Experience`) = list("Less than 10 years" = c(0:10),
"11 to 20 years" =c(11:20),
"21 to 30 years " = c(21:30),
"31 years and above" = c(31:56))
summary(ratemdsfinal$`Years of Experience`)
## Less than 10 years 11 to 20 years 21 to 30 years 31 years and above
## 61 174 310 248
boxplot(ratemdsfinal$Rating~ratemdsfinal$`Years of Experience`, # Specify the variables to graph, quantitative variable first
col=c("orange","thistle","lightgreen","lightblue","gray","yellow"), # Specify the data set that contains the variables
main = "Rating over Years of Experience on RateMDs", #Create the chart title
xlab = "Rating", #Create the x-axis label
ylab = "",
las=1, # Width of box as proportion of original
whisklty = 1, # Whisker line type; 1 = solid line
staplelty = 0, # Staple (line at end) type; 0 = none
outpch = 16, # Symbols for outliers; 16 = filled circle
outcol = "slategray3",
notch=T,horizontal = T# Color for outliers
) #Create the y-axis label

# Checking the Significance Level of the Factors created
years.aov1 <- aov(ratemdsfinal$Rating~ratemdsfinal$`Years of Experience`, data = ratemdsfinal)
summary(years.aov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## ratemdsfinal$`Years of Experience` 3 8.3 2.771 2.12 0.0963 .
## Residuals 789 1031.2 1.307
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(years.aov1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = ratemdsfinal$Rating ~ ratemdsfinal$`Years of Experience`, data = ratemdsfinal)
##
## $`ratemdsfinal$`Years of Experience``
## diff lwr upr
## 11 to 20 years-Less than 10 years 0.236710948 -0.20122948 0.6746514
## 21 to 30 years -Less than 10 years 0.359677948 -0.05257317 0.7719291
## 31 years and above-Less than 10 years 0.365290851 -0.05534776 0.7859295
## 21 to 30 years -11 to 20 years 0.122967000 -0.15582985 0.4017638
## 31 years and above-11 to 20 years 0.128579904 -0.16247600 0.4196358
## 31 years and above-21 to 30 years 0.005612903 -0.24513139 0.2563572
## p adj
## 11 to 20 years-Less than 10 years 0.5050019
## 21 to 30 years -Less than 10 years 0.1118890
## 31 years and above-Less than 10 years 0.1145611
## 21 to 30 years -11 to 20 years 0.6676319
## 31 years and above-11 to 20 years 0.6664955
## 31 years and above-21 to 30 years 0.9999313
# Thus, there is not much significance
Plots based on Years of Experience over Dependent Variables
boxplot(ratemdsfinal$Rating~ratemdsfinal$`Years of Experience`, # Specify the variables to graph, quantitative variable first
col=c("orange","thistle","lightgreen","lightblue","gray","yellow"), # Specify the data set that contains the variables
main = "Rating over Years of Experience on RateMDs", #Create the chart title
xlab = "Rating", #Create the x-axis label
ylab = "Years of Experience",
las=1, # Width of box as proportion of original
whisklty = 1, # Whisker line type; 1 = solid line
staplelty = 0, # Staple (line at end) type; 0 = none
outpch = 16, # Symbols for outliers; 16 = filled circle
outcol = "slategray3",
notch=T,horizontal = T# Color for outliers
) #Create the y-axis label

rate %>% #plot rating over Gender
ggplot(aes(x=Rating,fill=ratemdsfinal$Gender))+
geom_density(alpha=.4,position="identity")+
labs(title = "Density of Rating over Gender spread across Years of Experience on RateMDs\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
theme(legend.position="none")+#hide the side signal of fill parameter
facet_wrap(ratemdsfinal$`Years of Experience`)

Healthgrades
library(readxl)
healthgrades <- read_excel("healthgradesfinal.xlsx")
str(healthgrades)
## tibble [247 × 33] (S3: tbl_df/tbl/data.frame)
## $ DoctorName : chr [1:247] "Dr. Amy Williams, MD" "Dr. Roma Franzia, MD" "Dr. Elizabeth Manjooran, MD" "Dr. Jason Canel, MD" ...
## $ Age : chr [1:247] "• Age 42" "• Age 50" "• Age 59" "• Age 48" ...
## $ Gender : chr [1:247] "Female" "Female" "Female" "Male" ...
## $ Speciality : chr [1:247] "Pediatrics" "Pediatrics" "Pediatrics" "Pediatrics" ...
## $ Years of Experience : num [1:247] 12 21 28 11 43 24 42 39 16 41 ...
## $ Biography : chr [1:247] "Dr. Amy Williams, MD is a pediatrics specialist in Chicago, IL. She specializes in pediatrics." "Dr. Roma Franzia, MD is a pediatrics specialist in Winnetka, IL and has been practicing for 21 years. She gradu"| __truncated__ "Dr. Elizabeth Manjooran, MD is a pediatrics specialist in Des Plaines, IL. She specializes in pediatrics." "Dr. Jason Canel, MD is a pediatrics specialist in Glenview, IL and has been practicing for 11 years. He graduat"| __truncated__ ...
## $ Rating : num [1:247] 4.9 4.7 5 4.8 5 5 4.8 4.6 5 5 ...
## $ Reviews : chr [1:247] "[\"I've been seeing Dr. Williams for years and through 2 pregnancies. She is the absolute best. Caring, compass"| __truncated__ "[\"Dr. Roma Franzia is the best pediatrician ever! So caring about my kids and family as a whole, always availa"| __truncated__ "['She’s not only a very good doctor but also a wonderful person! Also her office staff is very professional, wa"| __truncated__ "[\"We were assigned Dr Canel at my daughter's birth. I am so glad he was on call that day! My daughter is now t"| __truncated__ ...
## $ Review_cleaned : chr [1:247] "['[\"I\\'ve been seeing Dr', ' Williams for years ', ' through 2 pregnancies', ' She is the absolute best', ' C"| __truncated__ "['[\"Dr', ' Roma Franzia is the best pediatrician ever', ' So caring about my kids ', ' family as a whole', ' a"| __truncated__ "[\"['She’s not only a very good doctor \", ' also a wonderful person', ' Also her office staff is very professi"| __truncated__ "['[\"We were assigned Dr Canel at my daughter\\'s birth', ' I am so glad he was on call that day', ' My daughte"| __truncated__ ...
## $ Word Count : num [1:247] 614 793 176 199 84 182 144 690 50 68 ...
## $ Review Count : num [1:247] 21 11 4 5 2 3 5 17 3 2 ...
## $ Phrase Count : num [1:247] 166 143 35 47 16 42 41 152 11 11 ...
## $ Total Count of Sentiment: num [1:247] 21 11 4 5 2 3 5 17 3 2 ...
## $ Positive_Proportion : num [1:247] 1 1 1 1 1 ...
## $ Count of pos Sentiment : num [1:247] 21 11 4 5 2 3 5 16 3 2 ...
## $ Average pos_score : num [1:247] 0.824 0.792 0.916 0.897 0.936 ...
## $ Negative_Proportion : num [1:247] 0 0 0 0 0 ...
## $ Count of neg Sentiment : num [1:247] 0 0 0 0 0 0 0 1 0 0 ...
## $ Average neg_score : num [1:247] 0 0 0 0 0 0 0 -0.612 0 0 ...
## $ Communication_positive : num [1:247] 0.0676 0.0286 0.1 0.1538 0.3333 ...
## $ Expertisepositive : num [1:247] 0.189 0.114 0.1 0.154 0.167 ...
## $ Timepositive : num [1:247] 0.1216 0.0714 0 0.1154 0 ...
## $ Bedside_positive : num [1:247] 0.108 0.186 0.35 0.115 0.333 ...
## $ Officepositive : num [1:247] 0.027 0.0429 0.15 0.0769 0 ...
## $ Costpositive : num [1:247] 0.0541 0.0286 0 0.0385 0 ...
## $ Communication_negative : num [1:247] 0 0.0143 0 0 0 ...
## $ Expertisenegative : num [1:247] 0.027 0.0143 0 0 0 ...
## $ Time_negative : num [1:247] 0 0.0429 0 0 0 ...
## $ Bedside_negative : num [1:247] 0 0.0429 0 0 0 ...
## $ Office_negative : num [1:247] 0 0.0143 0 0 0 ...
## $ Cost_negative : num [1:247] 0 0 0 0 0 0 0 0 0 0 ...
## $ Average words per review: num [1:247] 29 72 44 40 42 61 29 41 17 34 ...
## $ Overall_score : num [1:247] 0.824 0.792 0.916 0.897 0.936 ...
healthgrades$Gender<- factor(healthgrades$Gender)
colSums(is.na(healthgrades))
## DoctorName Age Gender
## 0 0 0
## Speciality Years of Experience Biography
## 0 4 0
## Rating Reviews Review_cleaned
## 0 0 0
## Word Count Review Count Phrase Count
## 0 0 0
## Total Count of Sentiment Positive_Proportion Count of pos Sentiment
## 0 0 0
## Average pos_score Negative_Proportion Count of neg Sentiment
## 0 0 0
## Average neg_score Communication_positive Expertisepositive
## 0 0 0
## Timepositive Bedside_positive Officepositive
## 0 0 0
## Costpositive Communication_negative Expertisenegative
## 0 0 0
## Time_negative Bedside_negative Office_negative
## 0 0 0
## Cost_negative Average words per review Overall_score
## 0 0 0
healthgrades$`Years of Experience`[is.na(healthgrades$`Years of Experience`)]=round(mean(healthgrades$`Years of Experience`,na.rm = T))
# ghetting only the numeric columsn
num_cols <- unlist(lapply(healthgrades, is.numeric))
health <- healthgrades[, num_cols]
# Print Descriptive Statistics for RateMDs
kable(xtable(describe(health)[c(3,4,5,8,9,11)]))
|
|
mean
|
sd
|
median
|
min
|
max
|
skew
|
|
Years of Experience
|
25.8623482
|
10.8210349
|
26.0000000
|
2.0000
|
53.0000000
|
0.1554778
|
|
Rating
|
4.5979757
|
0.3479848
|
4.6000000
|
3.5000
|
5.0000000
|
-0.5336239
|
|
Word Count
|
194.0971660
|
270.5785255
|
124.0000000
|
11.0000
|
2018.0000000
|
4.1629431
|
|
Review Count
|
4.4089069
|
6.7522054
|
3.0000000
|
1.0000
|
54.0000000
|
4.9852808
|
|
Phrase Count
|
39.4777328
|
56.4946083
|
23.0000000
|
3.0000
|
432.0000000
|
4.4631481
|
|
Total Count of Sentiment
|
4.7692308
|
9.5512835
|
3.0000000
|
1.0000
|
90.0000000
|
6.8203153
|
|
Positive_Proportion
|
0.9075924
|
0.1964834
|
1.0000000
|
0.0000
|
1.0000000
|
-2.6376345
|
|
Count of pos Sentiment
|
4.2995951
|
9.0171088
|
2.0000000
|
0.0000
|
86.0000000
|
7.0230331
|
|
Average pos_score
|
0.7945390
|
0.1824017
|
0.8376093
|
0.0000
|
0.9857000
|
-2.5855164
|
|
Negative_Proportion
|
0.0924076
|
0.1964834
|
0.0000000
|
0.0000
|
1.0000000
|
2.6376345
|
|
Count of neg Sentiment
|
0.4696356
|
1.0997455
|
0.0000000
|
0.0000
|
9.0000000
|
4.0034054
|
|
Average neg_score
|
-0.1475103
|
0.2808211
|
0.0000000
|
-0.9471
|
0.0000000
|
-1.6245998
|
|
Communication_positive
|
0.1074784
|
0.1175589
|
0.0769231
|
0.0000
|
0.7500000
|
1.5961346
|
|
Expertisepositive
|
0.1554605
|
0.1370798
|
0.1428571
|
0.0000
|
1.0000000
|
1.6127717
|
|
Timepositive
|
0.0940978
|
0.1083136
|
0.0666667
|
0.0000
|
0.5000000
|
1.4619393
|
|
Bedside_positive
|
0.1563275
|
0.1493452
|
0.1428571
|
0.0000
|
1.0000000
|
1.9710141
|
|
Officepositive
|
0.0472850
|
0.0756680
|
0.0000000
|
0.0000
|
0.3333333
|
1.7511825
|
|
Costpositive
|
0.0276534
|
0.0562286
|
0.0000000
|
0.0000
|
0.3333333
|
2.7729653
|
|
Communication_negative
|
0.0310048
|
0.0852062
|
0.0000000
|
0.0000
|
1.0000000
|
6.8384268
|
|
Expertisenegative
|
0.0308365
|
0.0874027
|
0.0000000
|
0.0000
|
1.0000000
|
6.7252228
|
|
Time_negative
|
0.0365831
|
0.0713895
|
0.0000000
|
0.0000
|
0.5000000
|
2.8473511
|
|
Bedside_negative
|
0.0183953
|
0.0478848
|
0.0000000
|
0.0000
|
0.3333333
|
3.7851412
|
|
Office_negative
|
0.0151664
|
0.0443944
|
0.0000000
|
0.0000
|
0.3333333
|
4.4347950
|
|
Cost_negative
|
0.0052563
|
0.0278533
|
0.0000000
|
0.0000
|
0.3333333
|
8.2021183
|
|
Average words per review
|
45.6032389
|
19.9854741
|
44.0000000
|
8.0000
|
135.0000000
|
0.7046350
|
|
Overall_score
|
0.6470287
|
0.3555858
|
0.7937000
|
-0.8750
|
0.9857000
|
-1.5541893
|
## Multiple t-tests for RateMDs
a<- lapply(health[], function(x) t.test(x ~ healthgrades$Gender, var.equal = TRUE))
print(a)
## $`Years of Experience`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = -4.4111, df = 245, p-value = 1.541e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -8.499717 -3.252163
## sample estimates:
## mean in group Female mean in group Male
## 23.15038 29.02632
##
##
## $Rating
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.42814, df = 245, p-value = 0.6689
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0685821 0.1066773
## sample estimates:
## mean in group Female mean in group Male
## 4.606767 4.587719
##
##
## $`Word Count`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 1.2259, df = 245, p-value = 0.2214
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -25.66264 110.24660
## sample estimates:
## mean in group Female mean in group Male
## 213.6165 171.3246
##
##
## $`Review Count`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 1.1465, df = 245, p-value = 0.2527
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7089661 2.6839034
## sample estimates:
## mean in group Female mean in group Male
## 4.864662 3.877193
##
##
## $`Phrase Count`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 1.1268, df = 245, p-value = 0.2609
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.074789 22.315390
## sample estimates:
## mean in group Female mean in group Male
## 43.22556 35.10526
##
##
## $`Total Count of Sentiment`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 1.3748, df = 245, p-value = 0.1705
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.7239489 4.0698136
## sample estimates:
## mean in group Female mean in group Male
## 5.541353 3.868421
##
##
## $Positive_Proportion
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = -0.16404, df = 245, p-value = 0.8698
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05361620 0.04537245
## sample estimates:
## mean in group Female mean in group Male
## 0.9056900 0.9098119
##
##
## $`Count of pos Sentiment`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 1.5925, df = 245, p-value = 0.1126
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4328089 4.0869442
## sample estimates:
## mean in group Female mean in group Male
## 5.142857 3.315789
##
##
## $`Average pos_score`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = -0.22508, df = 245, p-value = 0.8221
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05119520 0.04069462
## sample estimates:
## mean in group Female mean in group Male
## 0.7921158 0.7973661
##
##
## $Negative_Proportion
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.16404, df = 245, p-value = 0.8698
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04537245 0.05361620
## sample estimates:
## mean in group Female mean in group Male
## 0.09431001 0.09018813
##
##
## $`Count of neg Sentiment`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = -1.0986, df = 245, p-value = 0.273
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.4304976 0.1222269
## sample estimates:
## mean in group Female mean in group Male
## 0.3984962 0.5526316
##
##
## $`Average neg_score`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.73812, df = 245, p-value = 0.4611
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04418382 0.09714504
## sample estimates:
## mean in group Female mean in group Male
## -0.1352885 -0.1617691
##
##
## $Communication_positive
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 1.6974, df = 245, p-value = 0.09089
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004069804 0.054814609
## sample estimates:
## mean in group Female mean in group Male
## 0.11918876 0.09381636
##
##
## $Expertisepositive
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.38903, df = 245, p-value = 0.6976
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0277035 0.0413400
## sample estimates:
## mean in group Female mean in group Male
## 0.1586074 0.1517891
##
##
## $Timepositive
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = -0.8634, df = 245, p-value = 0.3888
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03918680 0.01530196
## sample estimates:
## mean in group Female mean in group Male
## 0.08858589 0.10052831
##
##
## $Bedside_positive
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.34221, df = 245, p-value = 0.7325
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03107836 0.04414818
## sample estimates:
## mean in group Female mean in group Male
## 0.1593436 0.1528087
##
##
## $Officepositive
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.7565, df = 245, p-value = 0.4501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.0117271 0.0263522
## sample estimates:
## mean in group Female mean in group Male
## 0.05066005 0.04334750
##
##
## $Costpositive
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 1.0907, df = 245, p-value = 0.2765
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.006306086 0.021955029
## sample estimates:
## mean in group Female mean in group Male
## 0.03126467 0.02344020
##
##
## $Communication_negative
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.16922, df = 245, p-value = 0.8658
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01961945 0.02330742
## sample estimates:
## mean in group Female mean in group Male
## 0.03185587 0.03001188
##
##
## $Expertisenegative
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.49546, df = 245, p-value = 0.6207
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01647136 0.02754264
## sample estimates:
## mean in group Female mean in group Male
## 0.03339144 0.02785580
##
##
## $Time_negative
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.94808, df = 245, p-value = 0.344
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.009310659 0.026591689
## sample estimates:
## mean in group Female mean in group Male
## 0.04057103 0.03193051
##
##
## $Bedside_negative
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = -1.275, df = 245, p-value = 0.2035
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01980555 0.00424055
## sample estimates:
## mean in group Female mean in group Male
## 0.01480339 0.02258589
##
##
## $Office_negative
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.21281, df = 245, p-value = 0.8317
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.009974398 0.012390719
## sample estimates:
## mean in group Female mean in group Male
## 0.01572405 0.01451588
##
##
## $Cost_negative
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = -0.54196, df = 245, p-value = 0.5883
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.008941925 0.005082995
## sample estimates:
## mean in group Female mean in group Male
## 0.004365786 0.006295251
##
##
## $`Average words per review`
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.13875, df = 245, p-value = 0.8898
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.679800 5.389073
## sample estimates:
## mean in group Female mean in group Male
## 45.76692 45.41228
##
##
## $Overall_score
##
## Two Sample t-test
##
## data: x by healthgrades$Gender
## t = 0.46704, df = 245, p-value = 0.6409
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.06830712 0.11076776
## sample estimates:
## mean in group Female mean in group Male
## 0.6568273 0.6355970
Statistics based on Gender
## Plots with Gender
health %>% #plot rating over Gender
ggplot(aes(x=Rating,fill=healthgrades$Gender))+
geom_density(alpha=.4,position="identity")+
labs(title = "Density of Rating by Gender on HealthGrades\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot Positive_Proportion over Gender
ggplot(aes(x=Positive_Proportion,fill=healthgrades$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Positive Proportion by Gender on HealthGrades\n(blue=Female, red=Male)",x="Positive_Proportion",y = "Density of Positive Proportion")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot Negative_Proportion over Gender
ggplot(aes(x=Negative_Proportion,fill=healthgrades$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Negative_Proportion by Gender on HealthGrades\n(blue=Female, red=Male)",x="Negative_Proportion",y = "Density of Negative_Proportion")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot `Average neg_score` over Gender
ggplot(aes(x=`Average neg_score`,fill=healthgrades$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of `Average neg_score` by Gender on HealthGrades\n(blue=Female, red=Male)",x="`Average neg_score`",y = "Density of `Average neg_score`")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot `Average pos_score` over Gender
ggplot(aes(x=`Average pos_score`,fill=healthgrades$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of `Average pos_score` by Gender on HealthGrades\n(blue=Female, red=Male)",x="`Average pos_score`",y = "Density of `Average pos_score`")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot Years of Experience over Gender
ggplot(aes(x=`Years of Experience`,fill=healthgrades$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Years of Experience by Gender on HealthGrades\n(blue=Female, red=Male)",x="Years of Experience - in years",y = "Density of Years of Experience")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot Overall_score over Gender
ggplot(aes(x=Overall_score,fill=healthgrades$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Overall_score by Gender on HealthGrades\n(blue=Female, red=Male)",x="Overall_score",y = "Density of Overall_score")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

Statistics based on Years of Experience in HealthGrades
summary(healthgrades$`Years of Experience`)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.00 18.00 26.00 25.86 34.00 53.00
healthgrades$`Years of Experience` <- as.factor(healthgrades$`Years of Experience`)
levels(healthgrades$`Years of Experience`) = list("Less than 10 years" = c(0:10),
"11 to 20 years" =c(11:20),
"21 to 30 years " = c(21:30),
"31 years and above" = c(31:56))
summary(healthgrades$`Years of Experience`)
## Less than 10 years 11 to 20 years 21 to 30 years 31 years and above
## 22 58 86 81
boxplot(healthgrades$Rating~healthgrades$`Years of Experience`, # Specify the variables to graph, quantitative variable first
col=c("orange","thistle","lightgreen","lightblue","gray","yellow"), # Specify the data set that contains the variables
main = "Rating over Years of Experience on Healthgrades", #Create the chart title
xlab = "Rating", #Create the x-axis label
ylab = "Years of Experience",
las=1, # Width of box as proportion of original
whisklty = 1, # Whisker line type; 1 = solid line
staplelty = 0, # Staple (line at end) type; 0 = none
outpch = 16, # Symbols for outliers; 16 = filled circle
outcol = "slategray3",
notch=T,horizontal = T# Color for outliers
) #Create the y-axis label

# Checking the Significance Level of the Factors created
years.aov1 <- aov(healthgrades$Rating~healthgrades$`Years of Experience`, data = healthgrades)
summary(years.aov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## healthgrades$`Years of Experience` 3 0.404 0.1346 1.113 0.344
## Residuals 243 29.385 0.1209
TukeyHSD(years.aov1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = healthgrades$Rating ~ healthgrades$`Years of Experience`, data = healthgrades)
##
## $`healthgrades$`Years of Experience``
## diff lwr upr
## 11 to 20 years-Less than 10 years -0.04059561 -0.26584306 0.18465184
## 21 to 30 years -Less than 10 years -0.12167019 -0.33659747 0.09325709
## 31 years and above-Less than 10 years -0.05583614 -0.27211047 0.16043820
## 21 to 30 years -11 to 20 years -0.08107458 -0.23392205 0.07177289
## 31 years and above-11 to 20 years -0.01524053 -0.16997644 0.13949538
## 31 years and above-21 to 30 years 0.06583405 -0.07345171 0.20511981
## p adj
## 11 to 20 years-Less than 10 years 0.9663542
## 21 to 30 years -Less than 10 years 0.4606505
## 31 years and above-Less than 10 years 0.9090996
## 21 to 30 years -11 to 20 years 0.5180421
## 31 years and above-11 to 20 years 0.9941852
## 31 years and above-21 to 30 years 0.6130291
# Thus, there is not much significance
Plots for Years of Experience on HealthGrades
health %>% #plot rating over Gender
ggplot(aes(x=Rating,fill=healthgrades$Gender))+
geom_density(alpha=.4,position="identity")+
labs(title = "Density of Rating over Gender spread across Years of Experience on HealthGrades\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
theme(legend.position="none")+#hide the side signal of fill parameter
facet_wrap(~healthgrades$`Years of Experience`)

Combined
options(scipen=999)
#Combined(Both Ratemds and Healthgrades)
hcommon=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,7,14,17,16,19,33)])
rcommon=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,6,11,14,12,16,33)])
dcombine=data.frame(rbind(hcommon,rcommon))
str(dcombine)
## 'data.frame': 1040 obs. of 21 variables:
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 1 ...
## $ Years.of.Experience : Factor w/ 4 levels "Less than 10 years",..: 2 3 3 2 4 3 4 4 2 4 ...
## $ Communication_positive : num 0.0676 0.0286 0.1 0.1538 0.3333 ...
## $ Expertisepositive : num 0.189 0.114 0.1 0.154 0.167 ...
## $ Timepositive : num 0.1216 0.0714 0 0.1154 0 ...
## $ Bedside_positive : num 0.108 0.186 0.35 0.115 0.333 ...
## $ Officepositive : num 0.027 0.0429 0.15 0.0769 0 ...
## $ Costpositive : num 0.0541 0.0286 0 0.0385 0 ...
## $ Communication_negative : num 0 0.0143 0 0 0 ...
## $ Expertisenegative : num 0.027 0.0143 0 0 0 ...
## $ Time_negative : num 0 0.0429 0 0 0 ...
## $ Bedside_negative : num 0 0.0429 0 0 0 ...
## $ Office_negative : num 0 0.0143 0 0 0 ...
## $ Cost_negative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Average.words.per.review: num 29 72 44 40 42 61 29 41 17 34 ...
## $ Rating : num 4.9 4.7 5 4.8 5 5 4.8 4.6 5 5 ...
## $ Positive_Proportion : num 1 1 1 1 1 ...
## $ Negative_Proportion : num 0 0 0 0 0 ...
## $ Average.pos_score : num 0.824 0.792 0.916 0.897 0.936 ...
## $ Average.neg_score : num 0 0 0 0 0 0 0 -0.612 0 0 ...
## $ Overall_score : num 0.824 0.792 0.916 0.897 0.936 ...
# Print Descriptive Statistics for Combined
kable(xtable(describe(dcombine)[c(3,4,5,8,9,11)]))
|
|
mean
|
sd
|
median
|
min
|
max
|
skew
|
|
Gender*
|
1.4730769
|
0.4995148
|
1.0000000
|
1.000
|
2.0000
|
0.1076933
|
|
Years.of.Experience*
|
2.9336538
|
0.9247022
|
3.0000000
|
1.000
|
4.0000
|
-0.4741071
|
|
Communication_positive
|
0.0651197
|
0.1146517
|
0.0000000
|
0.000
|
1.0000
|
3.1973305
|
|
Expertisepositive
|
0.1262908
|
0.1620018
|
0.0909091
|
0.000
|
1.0000
|
2.4456190
|
|
Timepositive
|
0.0627967
|
0.1055687
|
0.0000000
|
0.000
|
1.0000
|
2.7014127
|
|
Bedside_positive
|
0.1433768
|
0.1727294
|
0.1111111
|
0.000
|
1.0000
|
2.2205717
|
|
Officepositive
|
0.0544948
|
0.1060410
|
0.0000000
|
0.000
|
1.0000
|
3.5336472
|
|
Costpositive
|
0.0361672
|
0.0736005
|
0.0000000
|
0.000
|
0.5000
|
2.7902871
|
|
Communication_negative
|
0.0399820
|
0.0908382
|
0.0000000
|
0.000
|
1.0000
|
4.4908606
|
|
Expertisenegative
|
0.0546442
|
0.1206358
|
0.0000000
|
0.000
|
1.0000
|
4.3331605
|
|
Time_negative
|
0.0467875
|
0.0988836
|
0.0000000
|
0.000
|
1.0000
|
3.8977334
|
|
Bedside_negative
|
0.0299485
|
0.0944457
|
0.0000000
|
0.000
|
1.0000
|
6.6026214
|
|
Office_negative
|
0.0293538
|
0.0750176
|
0.0000000
|
0.000
|
0.6000
|
3.8922822
|
|
Cost_negative
|
0.0119775
|
0.0541032
|
0.0000000
|
0.000
|
1.0000
|
9.5204150
|
|
Average.words.per.review
|
50.9807692
|
47.9577896
|
41.0000000
|
0.000
|
737.0000
|
6.3078409
|
|
Rating
|
4.1334423
|
1.0470945
|
4.5000000
|
1.000
|
5.0000
|
-1.3967250
|
|
Positive_Proportion
|
0.7744462
|
0.3391379
|
1.0000000
|
0.000
|
1.0000
|
-1.3426505
|
|
Negative_Proportion
|
0.1945310
|
0.3203035
|
0.0000000
|
0.000
|
1.0000
|
1.5719105
|
|
Average.pos_score
|
0.6917569
|
0.2982497
|
0.7988333
|
0.000
|
0.9923
|
-1.4246449
|
|
Average.neg_score
|
-0.2221033
|
0.3278596
|
0.0000000
|
-0.981
|
0.0000
|
-1.0272195
|
|
Overall_score
|
0.4696536
|
0.5107502
|
0.6907250
|
-0.981
|
0.9914
|
-1.1026585
|
## Multiple t-tests for RateMDs
a<- lapply(dcombine[,-c(1:2)], function(x) t.test(x ~ dcombine$Gender, var.equal = TRUE))
print(a)
## $Communication_positive
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.21913, df = 1038, p-value = 0.8266
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01241801 0.01554012
## sample estimates:
## mean in group Female mean in group Male
## 0.06585818 0.06429713
##
##
## $Expertisepositive
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.0042608, df = 1038, p-value = 0.9966
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01970985 0.01979564
## sample estimates:
## mean in group Female mean in group Male
## 0.1263111 0.1262682
##
##
## $Timepositive
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.27915, df = 1038, p-value = 0.7802
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01104033 0.01470251
## sample estimates:
## mean in group Female mean in group Male
## 0.06366296 0.06183186
##
##
## $Bedside_positive
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 1.2298, df = 1038, p-value = 0.219
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.007855475 0.034235379
## sample estimates:
## mean in group Female mean in group Male
## 0.1496166 0.1364267
##
##
## $Officepositive
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = -0.24602, df = 1038, p-value = 0.8057
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01455012 0.01130812
## sample estimates:
## mean in group Female mean in group Male
## 0.05372793 0.05534893
##
##
## $Costpositive
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 2.0143, df = 1038, p-value = 0.04423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.0002377392 0.0181508551
## sample estimates:
## mean in group Female mean in group Male
## 0.04051680 0.03132251
##
##
## $Communication_negative
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = -0.031827, df = 1038, p-value = 0.9746
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.01125547 0.01089617
## sample estimates:
## mean in group Female mean in group Male
## 0.03989703 0.04007668
##
##
## $Expertisenegative
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = -1.2344, df = 1038, p-value = 0.2173
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.023944766 0.005451714
## sample estimates:
## mean in group Female mean in group Male
## 0.05026990 0.05951643
##
##
## $Time_negative
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = -1.1361, df = 1038, p-value = 0.2562
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.019025744 0.005072864
## sample estimates:
## mean in group Female mean in group Male
## 0.04348706 0.05046350
##
##
## $Bedside_negative
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.99352, df = 1038, p-value = 0.3207
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.005682433 0.017338003
## sample estimates:
## mean in group Female mean in group Male
## 0.03270550 0.02687772
##
##
## $Office_negative
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.23284, df = 1038, p-value = 0.8159
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.008061275 0.010231912
## sample estimates:
## mean in group Female mean in group Male
## 0.02986720 0.02878188
##
##
## $Cost_negative
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.47828, df = 1038, p-value = 0.6325
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.004988305 0.008203773
## sample estimates:
## mean in group Female mean in group Male
## 0.01273811 0.01113038
##
##
## $Average.words.per.review
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = -0.17405, df = 1038, p-value = 0.8619
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.366033 5.328706
## sample estimates:
## mean in group Female mean in group Male
## 50.73540 51.25407
##
##
## $Rating
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = -0.94877, df = 1038, p-value = 0.343
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.18931953 0.06591261
## sample estimates:
## mean in group Female mean in group Male
## 4.104252 4.165955
##
##
## $Positive_Proportion
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = -0.27771, df = 1038, p-value = 0.7813
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.04720122 0.03549734
## sample estimates:
## mean in group Female mean in group Male
## 0.7716777 0.7775297
##
##
## $Negative_Proportion
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.64885, df = 1038, p-value = 0.5166
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.02613506 0.05195780
## sample estimates:
## mean in group Female mean in group Male
## 0.2006391 0.1877277
##
##
## $Average.pos_score
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.012898, df = 1038, p-value = 0.9897
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03612631 0.03660439
## sample estimates:
## mean in group Female mean in group Male
## 0.691870 0.691631
##
##
## $Average.neg_score
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.32777, df = 1038, p-value = 0.7432
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03329650 0.04665067
## sample estimates:
## mean in group Female mean in group Male
## -0.2189445 -0.2256216
##
##
## $Overall_score
##
## Two Sample t-test
##
## data: x by dcombine$Gender
## t = 0.21793, df = 1038, p-value = 0.8275
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.05535781 0.06919006
## sample estimates:
## mean in group Female mean in group Male
## 0.4729255 0.4660093
# Checking the Significance Level of the Gender
years.aov1 <- aov(dcombine$Rating~dcombine$Gender, data = dcombine)
summary(years.aov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## dcombine$Gender 1 1 0.987 0.9 0.343
## Residuals 1038 1138 1.097
TukeyHSD(years.aov1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = dcombine$Rating ~ dcombine$Gender, data = dcombine)
##
## $`dcombine$Gender`
## diff lwr upr p adj
## Male-Female 0.06170346 -0.06591261 0.1893195 0.3429607
# Thus, there is not much significance
# Checking the Significance Level of the Years of Experience
years.aov1 <- aov(dcombine$Rating~dcombine$Years.of.Experience, data = dcombine)
summary(years.aov1)
## Df Sum Sq Mean Sq F value Pr(>F)
## dcombine$Years.of.Experience 3 4.4 1.458 1.331 0.263
## Residuals 1036 1134.8 1.095
TukeyHSD(years.aov1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = dcombine$Rating ~ dcombine$Years.of.Experience, data = dcombine)
##
## $`dcombine$Years.of.Experience`
## diff lwr upr
## 11 to 20 years-Less than 10 years 0.15253168 -0.19191755 0.4969809
## 21 to 30 years -Less than 10 years 0.20791438 -0.11719844 0.5330272
## 31 years and above-Less than 10 years 0.24300948 -0.08778982 0.5738088
## 21 to 30 years -11 to 20 years 0.05538271 -0.16727679 0.2780422
## 31 years and above-11 to 20 years 0.09047781 -0.14040547 0.3213611
## 31 years and above-21 to 30 years 0.03509510 -0.16580328 0.2359935
## p adj
## 11 to 20 years-Less than 10 years 0.6651255
## 21 to 30 years -Less than 10 years 0.3534725
## 31 years and above-Less than 10 years 0.2327867
## 21 to 30 years -11 to 20 years 0.9190063
## 31 years and above-11 to 20 years 0.7445574
## 31 years and above-21 to 30 years 0.9697224
# Thus, there is not much significance
Plotting for Combined results
dcombine %>% #plot rating over Gender
ggplot(aes(x=Rating,fill=Gender))+
geom_density(alpha=.4,position="identity")+
labs(title = "Density of Rating by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

dcombine %>% #plot Positive_Proportion over Gender
ggplot(aes(x=Positive_Proportion,fill=dcombine$Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Positive Proportion by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="Positive_Proportion",y = "Density of Positive Proportion")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter
## Warning: Use of `dcombine$Gender` is discouraged. Use `Gender` instead.

dcombine %>% #plot Negative_Proportion over Gender
ggplot(aes(x=Negative_Proportion,fill=Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Negative_Proportion by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="Negative_Proportion",y = "Density of Negative_Proportion")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

dcombine %>% #plot `Average neg_score` over Gender
ggplot(aes(x=Average.neg_score,fill=Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of `Average neg_score` by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="`Average neg_score`",y = "Density of `Average neg_score`")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

dcombine %>% #plot `Average pos_score` over Gender
ggplot(aes(x=Average.pos_score,fill=Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of `Average pos_score` by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="`Average pos_score`",y = "Density of `Average pos_score`")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

dcombine %>% #plot Overall_score over Gender
ggplot(aes(x=Overall_score,fill=Gender))+
geom_density(alpha=.5,position="identity")+
labs(title = "Density of Overall_score by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="Overall_score",y = "Density of Overall_score")+#add title and axis labels
theme(legend.position="none")#hide the side signal of fill parameter

###### Checking ######
boxplot(dcombine$Rating~dcombine$Years.of.Experience, # Specify the variables to graph, quantitative variable first
col=c("orange","thistle","lightgreen","lightblue","gray","yellow"), # Specify the data set that contains the variables
main = "Rating over Years of Experience on Healthgrades and RateMDs", #Create the chart title
xlab = "Years of Experience", #Create the x-axis label
ylab = "Rating",
las=1, # Width of box as proportion of original
whisklty = 1, # Whisker line type; 1 = solid line
staplelty = 0, # Staple (line at end) type; 0 = none
outpch = 16, # Symbols for outliers; 16 = filled circle
outcol = "slategray3",
notch=F,horizontal = F# Color for outliers
) #Create the y-axis label

#######Checking
### No of years of Experience on both the Websites
dcombine %>% #plot rating over Gender
ggplot(aes(x=Rating,fill=Gender))+
geom_density(alpha=.4,position="identity")+
labs(title = "Density of Rating over Gender spread across Years of Experience on HealthGrades and RateMDs\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
theme(legend.position="none")+#hide the side signal of fill parameter
facet_wrap(~dcombine$Years.of.Experience)

Linear Regression Modeling
library(corrplot)
## corrplot 0.84 loaded
## Healthgrades
#str(ratemdsfinal)
#str(healthgrades)
hdff1=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,7)])
names(hdff1)
## [1] "Gender" "Years.of.Experience"
## [3] "Communication_positive" "Expertisepositive"
## [5] "Timepositive" "Bedside_positive"
## [7] "Officepositive" "Costpositive"
## [9] "Communication_negative" "Expertisenegative"
## [11] "Time_negative" "Bedside_negative"
## [13] "Office_negative" "Cost_negative"
## [15] "Average.words.per.review" "Rating"
hdff2=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,14)])
names(hdff2)
## [1] "Gender" "Years.of.Experience"
## [3] "Communication_positive" "Expertisepositive"
## [5] "Timepositive" "Bedside_positive"
## [7] "Officepositive" "Costpositive"
## [9] "Communication_negative" "Expertisenegative"
## [11] "Time_negative" "Bedside_negative"
## [13] "Office_negative" "Cost_negative"
## [15] "Average.words.per.review" "Positive_Proportion"
hdff3=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,17)])
names(hdff3)
## [1] "Gender" "Years.of.Experience"
## [3] "Communication_positive" "Expertisepositive"
## [5] "Timepositive" "Bedside_positive"
## [7] "Officepositive" "Costpositive"
## [9] "Communication_negative" "Expertisenegative"
## [11] "Time_negative" "Bedside_negative"
## [13] "Office_negative" "Cost_negative"
## [15] "Average.words.per.review" "Negative_Proportion"
hdff4=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,16)])
names(hdff4)
## [1] "Gender" "Years.of.Experience"
## [3] "Communication_positive" "Expertisepositive"
## [5] "Timepositive" "Bedside_positive"
## [7] "Officepositive" "Costpositive"
## [9] "Communication_negative" "Expertisenegative"
## [11] "Time_negative" "Bedside_negative"
## [13] "Office_negative" "Cost_negative"
## [15] "Average.words.per.review" "Average.pos_score"
hdff5=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,19)])
names(hdff5)
## [1] "Gender" "Years.of.Experience"
## [3] "Communication_positive" "Expertisepositive"
## [5] "Timepositive" "Bedside_positive"
## [7] "Officepositive" "Costpositive"
## [9] "Communication_negative" "Expertisenegative"
## [11] "Time_negative" "Bedside_negative"
## [13] "Office_negative" "Cost_negative"
## [15] "Average.words.per.review" "Average.neg_score"
hdff6=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,33)])
names(hdff6)
## [1] "Gender" "Years.of.Experience"
## [3] "Communication_positive" "Expertisepositive"
## [5] "Timepositive" "Bedside_positive"
## [7] "Officepositive" "Costpositive"
## [9] "Communication_negative" "Expertisenegative"
## [11] "Time_negative" "Bedside_negative"
## [13] "Office_negative" "Cost_negative"
## [15] "Average.words.per.review" "Overall_score"
#Regression
lh1=lm(Rating~.,data = hdff1)
summary(lh1)
##
## Call:
## lm(formula = Rating ~ ., data = hdff1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.99217 -0.22596 0.02488 0.26852 0.57756
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 4.756741 0.106861 44.513
## GenderMale -0.031296 0.044540 -0.703
## Years.of.Experience11 to 20 years -0.030114 0.084181 -0.358
## Years.of.Experience21 to 30 years -0.063216 0.082504 -0.766
## Years.of.Experience31 years and above -0.014015 0.083938 -0.167
## Communication_positive -0.016877 0.185336 -0.091
## Expertisepositive -0.059499 0.165435 -0.360
## Timepositive 0.102207 0.203954 0.501
## Bedside_positive 0.163074 0.144485 1.129
## Officepositive -0.699218 0.290933 -2.403
## Costpositive 0.437641 0.386192 1.133
## Communication_negative -1.016354 0.361890 -2.808
## Expertisenegative 0.215638 0.339537 0.635
## Time_negative -0.447024 0.329563 -1.356
## Bedside_negative 0.520510 0.492825 1.056
## Office_negative -1.143112 0.531891 -2.149
## Cost_negative -0.498560 0.843224 -0.591
## Average.words.per.review -0.001362 0.001112 -1.226
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.48298
## Years.of.Experience11 to 20 years 0.72088
## Years.of.Experience21 to 30 years 0.44434
## Years.of.Experience31 years and above 0.86754
## Communication_positive 0.92752
## Expertisepositive 0.71944
## Timepositive 0.61676
## Bedside_positive 0.26022
## Officepositive 0.01704 *
## Costpositive 0.25831
## Communication_negative 0.00541 **
## Expertisenegative 0.52600
## Time_negative 0.17630
## Bedside_negative 0.29200
## Office_negative 0.03267 *
## Cost_negative 0.55493
## Average.words.per.review 0.22156
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3301 on 229 degrees of freedom
## Multiple R-squared: 0.1624, Adjusted R-squared: 0.1002
## F-statistic: 2.612 on 17 and 229 DF, p-value: 0.0006955
lh2=lm(Positive_Proportion~.,data = hdff2)
summary(lh2)
##
## Call:
## lm(formula = Positive_Proportion ~ ., data = hdff2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.62532 -0.02850 0.01704 0.05916 0.41045
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.9436522 0.0462024 20.424
## GenderMale -0.0108854 0.0192572 -0.565
## Years.of.Experience11 to 20 years -0.0330841 0.0363966 -0.909
## Years.of.Experience21 to 30 years 0.0018360 0.0356717 0.051
## Years.of.Experience31 years and above 0.0071221 0.0362916 0.196
## Communication_positive 0.0192247 0.0801322 0.240
## Expertisepositive 0.0960011 0.0715279 1.342
## Timepositive 0.0234726 0.0881817 0.266
## Bedside_positive 0.1454767 0.0624697 2.329
## Officepositive 0.0411917 0.1257880 0.327
## Costpositive 0.2346136 0.1669745 1.405
## Communication_negative -0.5411209 0.1564674 -3.458
## Expertisenegative -0.4561526 0.1468029 -3.107
## Time_negative -1.0377078 0.1424904 -7.283
## Bedside_negative 0.4154977 0.2130787 1.950
## Office_negative -0.1205638 0.2299691 -0.524
## Cost_negative -1.6477666 0.3645774 -4.520
## Average.words.per.review -0.0001089 0.0004806 -0.227
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.572447
## Years.of.Experience11 to 20 years 0.364312
## Years.of.Experience21 to 30 years 0.958996
## Years.of.Experience31 years and above 0.844592
## Communication_positive 0.810613
## Expertisepositive 0.180877
## Timepositive 0.790337
## Bedside_positive 0.020743 *
## Officepositive 0.743612
## Costpositive 0.161351
## Communication_negative 0.000648 ***
## Expertisenegative 0.002127 **
## Time_negative 0.00000000000522 ***
## Bedside_negative 0.052400 .
## Office_negative 0.600604
## Cost_negative 0.00000993081246 ***
## Average.words.per.review 0.820935
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1427 on 229 degrees of freedom
## Multiple R-squared: 0.5089, Adjusted R-squared: 0.4724
## F-statistic: 13.96 on 17 and 229 DF, p-value: < 0.00000000000000022
lh3=lm(Negative_Proportion~.,data = hdff3)
summary(lh3)
##
## Call:
## lm(formula = Negative_Proportion ~ ., data = hdff3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.41045 -0.05916 -0.01704 0.02850 0.62532
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.0563478 0.0462024 1.220
## GenderMale 0.0108854 0.0192572 0.565
## Years.of.Experience11 to 20 years 0.0330841 0.0363966 0.909
## Years.of.Experience21 to 30 years -0.0018360 0.0356717 -0.051
## Years.of.Experience31 years and above -0.0071221 0.0362916 -0.196
## Communication_positive -0.0192247 0.0801322 -0.240
## Expertisepositive -0.0960011 0.0715279 -1.342
## Timepositive -0.0234726 0.0881817 -0.266
## Bedside_positive -0.1454767 0.0624697 -2.329
## Officepositive -0.0411917 0.1257880 -0.327
## Costpositive -0.2346136 0.1669745 -1.405
## Communication_negative 0.5411209 0.1564674 3.458
## Expertisenegative 0.4561526 0.1468029 3.107
## Time_negative 1.0377078 0.1424904 7.283
## Bedside_negative -0.4154977 0.2130787 -1.950
## Office_negative 0.1205638 0.2299691 0.524
## Cost_negative 1.6477666 0.3645774 4.520
## Average.words.per.review 0.0001089 0.0004806 0.227
## Pr(>|t|)
## (Intercept) 0.223877
## GenderMale 0.572447
## Years.of.Experience11 to 20 years 0.364312
## Years.of.Experience21 to 30 years 0.958996
## Years.of.Experience31 years and above 0.844592
## Communication_positive 0.810613
## Expertisepositive 0.180877
## Timepositive 0.790337
## Bedside_positive 0.020743 *
## Officepositive 0.743612
## Costpositive 0.161351
## Communication_negative 0.000648 ***
## Expertisenegative 0.002127 **
## Time_negative 0.00000000000522 ***
## Bedside_negative 0.052400 .
## Office_negative 0.600604
## Cost_negative 0.00000993081246 ***
## Average.words.per.review 0.820935
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1427 on 229 degrees of freedom
## Multiple R-squared: 0.5089, Adjusted R-squared: 0.4724
## F-statistic: 13.96 on 17 and 229 DF, p-value: < 0.00000000000000022
lh4=lm(Average.pos_score~.,data = hdff4)
summary(lh4)
##
## Call:
## lm(formula = Average.pos_score ~ ., data = hdff4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.76388 -0.04567 0.02221 0.08125 0.29915
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.7740361 0.0486756 15.902
## GenderMale 0.0003497 0.0202880 0.017
## Years.of.Experience11 to 20 years -0.0421178 0.0383449 -1.098
## Years.of.Experience21 to 30 years -0.0010852 0.0375812 -0.029
## Years.of.Experience31 years and above -0.0235270 0.0382343 -0.615
## Communication_positive -0.1782681 0.0844216 -2.112
## Expertisepositive 0.1135756 0.0753566 1.507
## Timepositive 0.0284462 0.0929019 0.306
## Bedside_positive -0.0243370 0.0658136 -0.370
## Officepositive 0.1036617 0.1325212 0.782
## Costpositive 0.1865724 0.1759124 1.061
## Communication_negative -0.5388122 0.1648428 -3.269
## Expertisenegative -0.5458443 0.1546610 -3.529
## Time_negative -0.1629900 0.1501176 -1.086
## Bedside_negative -0.0548634 0.2244844 -0.244
## Office_negative -0.0048442 0.2422789 -0.020
## Cost_negative -1.3876293 0.3840926 -3.613
## Average.words.per.review 0.0017273 0.0005063 3.412
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.986263
## Years.of.Experience11 to 20 years 0.273186
## Years.of.Experience21 to 30 years 0.976989
## Years.of.Experience31 years and above 0.538943
## Communication_positive 0.035801 *
## Expertisepositive 0.133144
## Timepositive 0.759733
## Bedside_positive 0.711883
## Officepositive 0.434889
## Costpositive 0.289990
## Communication_negative 0.001247 **
## Expertisenegative 0.000504 ***
## Time_negative 0.278732
## Bedside_negative 0.807142
## Office_negative 0.984065
## Cost_negative 0.000372 ***
## Average.words.per.review 0.000763 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1504 on 229 degrees of freedom
## Multiple R-squared: 0.3675, Adjusted R-squared: 0.3205
## F-statistic: 7.826 on 17 and 229 DF, p-value: 0.000000000000002658
lh5=lm(Average.neg_score~.,data = hdff5)
summary(lh5)
##
## Call:
## lm(formula = Average.neg_score ~ ., data = hdff5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85154 -0.01423 0.05662 0.10557 0.53916
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0559592 0.0778001 -0.719 0.47271
## GenderMale -0.0453889 0.0324271 -1.400 0.16295
## Years.of.Experience11 to 20 years -0.0017304 0.0612881 -0.028 0.97750
## Years.of.Experience21 to 30 years 0.0108923 0.0600674 0.181 0.85627
## Years.of.Experience31 years and above 0.0415669 0.0611113 0.680 0.49708
## Communication_positive 0.0702209 0.1349343 0.520 0.60328
## Expertisepositive 0.0269866 0.1204455 0.224 0.82291
## Timepositive -0.0317881 0.1484887 -0.214 0.83068
## Bedside_positive 0.0930283 0.1051924 0.884 0.37743
## Officepositive 0.0314480 0.2118138 0.148 0.88210
## Costpositive 0.2896671 0.2811677 1.030 0.30399
## Communication_negative -0.7033954 0.2634748 -2.670 0.00814
## Expertisenegative 0.0319602 0.2472007 0.129 0.89724
## Time_negative -1.2347404 0.2399389 -5.146 0.000000572
## Bedside_negative 0.6061279 0.3588023 1.689 0.09252
## Office_negative -1.2752564 0.3872439 -3.293 0.00115
## Cost_negative -1.5581905 0.6139103 -2.538 0.01181
## Average.words.per.review -0.0008341 0.0008093 -1.031 0.30379
##
## (Intercept)
## GenderMale
## Years.of.Experience11 to 20 years
## Years.of.Experience21 to 30 years
## Years.of.Experience31 years and above
## Communication_positive
## Expertisepositive
## Timepositive
## Bedside_positive
## Officepositive
## Costpositive
## Communication_negative **
## Expertisenegative
## Time_negative ***
## Bedside_negative .
## Office_negative **
## Cost_negative *
## Average.words.per.review
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2403 on 229 degrees of freedom
## Multiple R-squared: 0.3183, Adjusted R-squared: 0.2677
## F-statistic: 6.289 on 17 and 229 DF, p-value: 0.000000000004951
lh6=lm(Overall_score~.,data = hdff6)
summary(lh6)
##
## Call:
## lm(formula = Overall_score ~ ., data = hdff6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1626 -0.1082 0.0530 0.1568 0.6340
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.7180769 0.0875710 8.200
## GenderMale -0.0450392 0.0364996 -1.234
## Years.of.Experience11 to 20 years -0.0438482 0.0689853 -0.636
## Years.of.Experience21 to 30 years 0.0098071 0.0676113 0.145
## Years.of.Experience31 years and above 0.0180400 0.0687863 0.262
## Communication_positive -0.1080472 0.1518808 -0.711
## Expertisepositive 0.1405622 0.1355723 1.037
## Timepositive -0.0033418 0.1671375 -0.020
## Bedside_positive 0.0686913 0.1184036 0.580
## Officepositive 0.1351097 0.2384156 0.567
## Costpositive 0.4762394 0.3164796 1.505
## Communication_negative -1.2422075 0.2965646 -4.189
## Expertisenegative -0.5138841 0.2782467 -1.847
## Time_negative -1.3977304 0.2700730 -5.175
## Bedside_negative 0.5512645 0.4038644 1.365
## Office_negative -1.2801005 0.4358780 -2.937
## Cost_negative -2.9458199 0.6910115 -4.263
## Average.words.per.review 0.0008933 0.0009109 0.981
## Pr(>|t|)
## (Intercept) 0.0000000000000172 ***
## GenderMale 0.21848
## Years.of.Experience11 to 20 years 0.52566
## Years.of.Experience21 to 30 years 0.88480
## Years.of.Experience31 years and above 0.79336
## Communication_positive 0.47756
## Expertisepositive 0.30092
## Timepositive 0.98407
## Bedside_positive 0.56239
## Officepositive 0.57147
## Costpositive 0.13375
## Communication_negative 0.0000400594001271 ***
## Expertisenegative 0.06606 .
## Time_negative 0.0000004969127672 ***
## Bedside_negative 0.17360
## Office_negative 0.00365 **
## Cost_negative 0.0000294868836949 ***
## Average.words.per.review 0.32780
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2705 on 229 degrees of freedom
## Multiple R-squared: 0.4613, Adjusted R-squared: 0.4213
## F-statistic: 11.54 on 17 and 229 DF, p-value: < 0.00000000000000022
## Corelation Plot
str(hdff1)
## 'data.frame': 247 obs. of 16 variables:
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 1 ...
## $ Years.of.Experience : Factor w/ 4 levels "Less than 10 years",..: 2 3 3 2 4 3 4 4 2 4 ...
## $ Communication_positive : num 0.0676 0.0286 0.1 0.1538 0.3333 ...
## $ Expertisepositive : num 0.189 0.114 0.1 0.154 0.167 ...
## $ Timepositive : num 0.1216 0.0714 0 0.1154 0 ...
## $ Bedside_positive : num 0.108 0.186 0.35 0.115 0.333 ...
## $ Officepositive : num 0.027 0.0429 0.15 0.0769 0 ...
## $ Costpositive : num 0.0541 0.0286 0 0.0385 0 ...
## $ Communication_negative : num 0 0.0143 0 0 0 ...
## $ Expertisenegative : num 0.027 0.0143 0 0 0 ...
## $ Time_negative : num 0 0.0429 0 0 0 ...
## $ Bedside_negative : num 0 0.0429 0 0 0 ...
## $ Office_negative : num 0 0.0143 0 0 0 ...
## $ Cost_negative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Average.words.per.review: num 29 72 44 40 42 61 29 41 17 34 ...
## $ Rating : num 4.9 4.7 5 4.8 5 5 4.8 4.6 5 5 ...
corrplot(cor(hdff1[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff2[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff3[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff4[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff5[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff6[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

# RateMDs
rdff1=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,6)])
rdff2=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,11)])
rdff3=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,14)])
rdff4=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,12)])
rdff5=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,16)])
rdff6=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,33)])
#Correlation plots
corrplot(cor(rdff1[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff2[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff3[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff4[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff5[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff6[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

#Regression
lr1=lm(Rating~.,data = rdff1)
summary(lr1)
##
## Call:
## lm(formula = Rating ~ ., data = rdff1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4987 -0.4319 0.1945 0.5221 4.0045
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 4.2584145 0.1313193 32.428
## GenderMale 0.1209880 0.0659969 1.833
## Years.of.Experience11 to 20 years 0.1474356 0.1381748 1.067
## Years.of.Experience21 to 30 years 0.1810661 0.1303980 1.389
## Years.of.Experience31 years and above 0.1751600 0.1337864 1.309
## Communication_positive 0.0579462 0.3074916 0.188
## Expertisepositive 0.3666430 0.1991385 1.841
## Timepositive -0.4788978 0.3337779 -1.435
## Bedside_positive 0.3259224 0.1892757 1.722
## Officepositive 0.4865397 0.3001795 1.621
## Costpositive 1.4084574 0.4267645 3.300
## Communication_negative -1.7267337 0.3863956 -4.469
## Expertisenegative -1.7076915 0.2783143 -6.136
## Time_negative -1.6155999 0.3399253 -4.753
## Bedside_negative -1.2639660 0.3176705 -3.979
## Office_negative -2.4548218 0.4412688 -5.563
## Cost_negative -2.6605126 0.5515298 -4.824
## Average.words.per.review -0.0039921 0.0006375 -6.262
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.06715 .
## Years.of.Experience11 to 20 years 0.28629
## Years.of.Experience21 to 30 years 0.16536
## Years.of.Experience31 years and above 0.19084
## Communication_positive 0.85057
## Expertisepositive 0.06598 .
## Timepositive 0.15175
## Bedside_positive 0.08548 .
## Officepositive 0.10546
## Costpositive 0.00101 **
## Communication_negative 0.000009037762 ***
## Expertisenegative 0.000000001350 ***
## Time_negative 0.000002391209 ***
## Bedside_negative 0.000075742470 ***
## Office_negative 0.000000036508 ***
## Cost_negative 0.000001695418 ***
## Average.words.per.review 0.000000000629 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9128 on 775 degrees of freedom
## Multiple R-squared: 0.3787, Adjusted R-squared: 0.3651
## F-statistic: 27.79 on 17 and 775 DF, p-value: < 0.00000000000000022
lr2=lm(Positive_Proportion~.,data = rdff2)
summary(lr2)
##
## Call:
## lm(formula = Positive_Proportion ~ ., data = rdff2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.96778 -0.09982 0.06330 0.16630 0.84169
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.7231908 0.0399668 18.095
## GenderMale 0.0247285 0.0200860 1.231
## Years.of.Experience11 to 20 years 0.1146939 0.0420532 2.727
## Years.of.Experience21 to 30 years 0.1170368 0.0396864 2.949
## Years.of.Experience31 years and above 0.0817680 0.0407176 2.008
## Communication_positive -0.1391590 0.0935845 -1.487
## Expertisepositive 0.1948552 0.0606074 3.215
## Timepositive -0.0362635 0.1015847 -0.357
## Bedside_positive 0.2661089 0.0576057 4.619
## Officepositive 0.2353906 0.0913590 2.577
## Costpositive 0.5013061 0.1298850 3.860
## Communication_negative -0.3858624 0.1175988 -3.281
## Expertisenegative -0.7255680 0.0847044 -8.566
## Time_negative -0.6723595 0.1034556 -6.499
## Bedside_negative -0.4786142 0.0966824 -4.950
## Office_negative -0.4120777 0.1342993 -3.068
## Cost_negative -1.0191855 0.1678570 -6.072
## Average.words.per.review -0.0008097 0.0001940 -4.173
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.218647
## Years.of.Experience11 to 20 years 0.006529 **
## Years.of.Experience21 to 30 years 0.003283 **
## Years.of.Experience31 years and above 0.044972 *
## Communication_positive 0.137425
## Expertisepositive 0.001358 **
## Timepositive 0.721205
## Bedside_positive 0.000004503490 ***
## Officepositive 0.010163 *
## Costpositive 0.000123 ***
## Communication_negative 0.001080 **
## Expertisenegative < 0.0000000000000002 ***
## Time_negative 0.000000000145 ***
## Bedside_negative 0.000000909286 ***
## Office_negative 0.002227 **
## Cost_negative 0.000000001980 ***
## Average.words.per.review 0.000033429711 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2778 on 775 degrees of freedom
## Multiple R-squared: 0.4263, Adjusted R-squared: 0.4137
## F-statistic: 33.87 on 17 and 775 DF, p-value: < 0.00000000000000022
lr3=lm(Negative_Proportion~.,data = rdff3)
summary(lr3)
##
## Call:
## lm(formula = Negative_Proportion ~ ., data = rdff3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.00751 -0.11376 -0.04056 0.07096 0.99419
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.1898280 0.0350736 5.412
## GenderMale -0.0280136 0.0176269 -1.589
## Years.of.Experience11 to 20 years -0.1256970 0.0369046 -3.406
## Years.of.Experience21 to 30 years -0.1311036 0.0348276 -3.764
## Years.of.Experience31 years and above -0.1170448 0.0357326 -3.276
## Communication_positive -0.0405123 0.0821269 -0.493
## Expertisepositive -0.0928168 0.0531872 -1.745
## Timepositive 0.1089283 0.0891476 1.222
## Bedside_positive -0.1422816 0.0505530 -2.815
## Officepositive -0.0764590 0.0801739 -0.954
## Costpositive -0.3962814 0.1139831 -3.477
## Communication_negative 0.5028461 0.1032011 4.872
## Expertisenegative 0.7743013 0.0743340 10.417
## Time_negative 0.6877894 0.0907895 7.576
## Bedside_negative 0.4440841 0.0848455 5.234
## Office_negative 0.4822172 0.1178570 4.092
## Cost_negative 1.1186023 0.1473062 7.594
## Average.words.per.review 0.0011622 0.0001703 6.826
## Pr(>|t|)
## (Intercept) 0.0000000830252621 ***
## GenderMale 0.112411
## Years.of.Experience11 to 20 years 0.000693 ***
## Years.of.Experience21 to 30 years 0.000180 ***
## Years.of.Experience31 years and above 0.001101 **
## Communication_positive 0.621948
## Expertisepositive 0.081365 .
## Timepositive 0.222122
## Bedside_positive 0.005009 **
## Officepositive 0.340551
## Costpositive 0.000536 ***
## Communication_negative 0.0000013366680680 ***
## Expertisenegative < 0.0000000000000002 ***
## Time_negative 0.0000000000001019 ***
## Bedside_negative 0.0000002137397275 ***
## Office_negative 0.0000473381478164 ***
## Cost_negative 0.0000000000000895 ***
## Average.words.per.review 0.0000000000176135 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2438 on 775 degrees of freedom
## Multiple R-squared: 0.5085, Adjusted R-squared: 0.4977
## F-statistic: 47.16 on 17 and 775 DF, p-value: < 0.00000000000000022
lr4=lm(Average.pos_score~.,data = rdff4)
summary(lr4)
##
## Call:
## lm(formula = Average.pos_score ~ ., data = rdff4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.79655 -0.13758 0.06478 0.17862 0.51776
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.5794739 0.0381635 15.184
## GenderMale 0.0078624 0.0191797 0.410
## Years.of.Experience11 to 20 years 0.1249562 0.0401558 3.112
## Years.of.Experience21 to 30 years 0.1444395 0.0378958 3.811
## Years.of.Experience31 years and above 0.1420179 0.0388805 3.653
## Communication_positive -0.1937978 0.0893620 -2.169
## Expertisepositive 0.0760436 0.0578729 1.314
## Timepositive 0.0382756 0.0970012 0.395
## Bedside_positive 0.1923630 0.0550066 3.497
## Officepositive 0.2520744 0.0872370 2.890
## Costpositive 0.5517505 0.1240246 4.449
## Communication_negative -0.3427398 0.1122928 -3.052
## Expertisenegative -0.5175899 0.0808826 -6.399
## Time_negative -0.5548255 0.0987878 -5.616
## Bedside_negative -0.4153887 0.0923201 -4.499
## Office_negative -0.2496353 0.1282398 -1.947
## Cost_negative -0.8607826 0.1602834 -5.370
## Average.words.per.review -0.0001177 0.0001853 -0.635
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.681967
## Years.of.Experience11 to 20 years 0.001928 **
## Years.of.Experience21 to 30 years 0.000149 ***
## Years.of.Experience31 years and above 0.000277 ***
## Communication_positive 0.030410 *
## Expertisepositive 0.189243
## Timepositive 0.693255
## Bedside_positive 0.000497 ***
## Officepositive 0.003966 **
## Costpositive 0.00000990309 ***
## Communication_negative 0.002349 **
## Expertisenegative 0.00000000027 ***
## Time_negative 0.00000002719 ***
## Bedside_negative 0.00000785777 ***
## Office_negative 0.051940 .
## Cost_negative 0.00000010395 ***
## Average.words.per.review 0.525436
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2653 on 775 degrees of freedom
## Multiple R-squared: 0.3251, Adjusted R-squared: 0.3103
## F-statistic: 21.96 on 17 and 775 DF, p-value: < 0.00000000000000022
lr5=lm(Average.neg_score~.,data = rdff5)
summary(lr5)
##
## Call:
## lm(formula = Average.neg_score ~ ., data = rdff5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78415 -0.17673 0.07616 0.14458 0.95304
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -0.1109611 0.0385817 -2.876
## GenderMale 0.0145985 0.0193899 0.753
## Years.of.Experience11 to 20 years 0.0203760 0.0405959 0.502
## Years.of.Experience21 to 30 years -0.0036386 0.0383111 -0.095
## Years.of.Experience31 years and above -0.0154383 0.0393066 -0.393
## Communication_positive 0.1330288 0.0903414 1.473
## Expertisepositive 0.1199794 0.0585071 2.051
## Timepositive -0.0742197 0.0980643 -0.757
## Bedside_positive 0.1445197 0.0556094 2.599
## Officepositive 0.0778611 0.0881930 0.883
## Costpositive 0.2327128 0.1253839 1.856
## Communication_negative -0.6848186 0.1135234 -6.032
## Expertisenegative -0.5429282 0.0817690 -6.640
## Time_negative -0.5489313 0.0998704 -5.496
## Bedside_negative -0.3096607 0.0933319 -3.318
## Office_negative -0.5648870 0.1296452 -4.357
## Cost_negative -0.5082322 0.1620400 -3.136
## Average.words.per.review -0.0011992 0.0001873 -6.403
## Pr(>|t|)
## (Intercept) 0.004138 **
## GenderMale 0.451744
## Years.of.Experience11 to 20 years 0.615864
## Years.of.Experience21 to 30 years 0.924359
## Years.of.Experience31 years and above 0.694599
## Communication_positive 0.141288
## Expertisepositive 0.040634 *
## Timepositive 0.449371
## Bedside_positive 0.009532 **
## Officepositive 0.377592
## Costpositive 0.063833 .
## Communication_negative 0.0000000025006 ***
## Expertisenegative 0.0000000000591 ***
## Time_negative 0.0000000526198 ***
## Bedside_negative 0.000949 ***
## Office_negative 0.0000149485286 ***
## Cost_negative 0.001775 **
## Average.words.per.review 0.0000000002647 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2682 on 775 degrees of freedom
## Multiple R-squared: 0.3839, Adjusted R-squared: 0.3704
## F-statistic: 28.41 on 17 and 775 DF, p-value: < 0.00000000000000022
lr6=lm(Overall_score~.,data = hdff6)
summary(lh6)
##
## Call:
## lm(formula = Overall_score ~ ., data = hdff6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1626 -0.1082 0.0530 0.1568 0.6340
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.7180769 0.0875710 8.200
## GenderMale -0.0450392 0.0364996 -1.234
## Years.of.Experience11 to 20 years -0.0438482 0.0689853 -0.636
## Years.of.Experience21 to 30 years 0.0098071 0.0676113 0.145
## Years.of.Experience31 years and above 0.0180400 0.0687863 0.262
## Communication_positive -0.1080472 0.1518808 -0.711
## Expertisepositive 0.1405622 0.1355723 1.037
## Timepositive -0.0033418 0.1671375 -0.020
## Bedside_positive 0.0686913 0.1184036 0.580
## Officepositive 0.1351097 0.2384156 0.567
## Costpositive 0.4762394 0.3164796 1.505
## Communication_negative -1.2422075 0.2965646 -4.189
## Expertisenegative -0.5138841 0.2782467 -1.847
## Time_negative -1.3977304 0.2700730 -5.175
## Bedside_negative 0.5512645 0.4038644 1.365
## Office_negative -1.2801005 0.4358780 -2.937
## Cost_negative -2.9458199 0.6910115 -4.263
## Average.words.per.review 0.0008933 0.0009109 0.981
## Pr(>|t|)
## (Intercept) 0.0000000000000172 ***
## GenderMale 0.21848
## Years.of.Experience11 to 20 years 0.52566
## Years.of.Experience21 to 30 years 0.88480
## Years.of.Experience31 years and above 0.79336
## Communication_positive 0.47756
## Expertisepositive 0.30092
## Timepositive 0.98407
## Bedside_positive 0.56239
## Officepositive 0.57147
## Costpositive 0.13375
## Communication_negative 0.0000400594001271 ***
## Expertisenegative 0.06606 .
## Time_negative 0.0000004969127672 ***
## Bedside_negative 0.17360
## Office_negative 0.00365 **
## Cost_negative 0.0000294868836949 ***
## Average.words.per.review 0.32780
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2705 on 229 degrees of freedom
## Multiple R-squared: 0.4613, Adjusted R-squared: 0.4213
## F-statistic: 11.54 on 17 and 229 DF, p-value: < 0.00000000000000022
Combined Linear Modeling
comdff1=data.frame(dcombine[,-c(17,18,19,20,21)])
comdff2=data.frame(dcombine[,-c(16,18,19,20,21)])
comdff3=data.frame(dcombine[,-c(16,17,19,20,21)])
comdff4=data.frame(dcombine[,-c(16,17,18,20,21)])
comdff5=data.frame(dcombine[,-c(16,17,18,19,21)])
comdff6=data.frame(dcombine[,-c(16,17,18,19,20)])
str(comdff6)
## 'data.frame': 1040 obs. of 16 variables:
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 1 ...
## $ Years.of.Experience : Factor w/ 4 levels "Less than 10 years",..: 2 3 3 2 4 3 4 4 2 4 ...
## $ Communication_positive : num 0.0676 0.0286 0.1 0.1538 0.3333 ...
## $ Expertisepositive : num 0.189 0.114 0.1 0.154 0.167 ...
## $ Timepositive : num 0.1216 0.0714 0 0.1154 0 ...
## $ Bedside_positive : num 0.108 0.186 0.35 0.115 0.333 ...
## $ Officepositive : num 0.027 0.0429 0.15 0.0769 0 ...
## $ Costpositive : num 0.0541 0.0286 0 0.0385 0 ...
## $ Communication_negative : num 0 0.0143 0 0 0 ...
## $ Expertisenegative : num 0.027 0.0143 0 0 0 ...
## $ Time_negative : num 0 0.0429 0 0 0 ...
## $ Bedside_negative : num 0 0.0429 0 0 0 ...
## $ Office_negative : num 0 0.0143 0 0 0 ...
## $ Cost_negative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Average.words.per.review: num 29 72 44 40 42 61 29 41 17 34 ...
## $ Overall_score : num 0.824 0.792 0.916 0.897 0.936 ...
#Regression
lc1=lm(Rating~.,data = comdff1)
summary(lc1)
##
## Call:
## lm(formula = Rating ~ ., data = comdff1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.5548 -0.3300 0.1813 0.4663 3.4430
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 4.4194784 0.1069837 41.310
## GenderMale 0.0804925 0.0535586 1.503
## Years.of.Experience11 to 20 years 0.0910572 0.1096967 0.830
## Years.of.Experience21 to 30 years 0.1089694 0.1039991 1.048
## Years.of.Experience31 years and above 0.1143336 0.1064307 1.074
## Communication_positive 0.3987009 0.2374853 1.679
## Expertisepositive 0.3171896 0.1668446 1.901
## Timepositive -0.0787030 0.2589872 -0.304
## Bedside_positive 0.2579257 0.1573460 1.639
## Officepositive 0.1341886 0.2565245 0.523
## Costpositive 1.0833569 0.3646001 2.971
## Communication_negative -1.2309834 0.3211376 -3.833
## Expertisenegative -1.5766878 0.2424676 -6.503
## Time_negative -1.4641938 0.2926774 -5.003
## Bedside_negative -1.2131718 0.2855508 -4.249
## Office_negative -2.6646053 0.3892143 -6.846
## Cost_negative -2.6459299 0.4945366 -5.350
## Average.words.per.review -0.0042471 0.0005748 -7.388
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.133177
## Years.of.Experience11 to 20 years 0.406687
## Years.of.Experience21 to 30 years 0.294982
## Years.of.Experience31 years and above 0.282962
## Communication_positive 0.093488 .
## Expertisepositive 0.057569 .
## Timepositive 0.761275
## Bedside_positive 0.101474
## Officepositive 0.601016
## Costpositive 0.003034 **
## Communication_negative 0.000134 ***
## Expertisenegative 0.000000000123221 ***
## Time_negative 0.000000665002823 ***
## Bedside_negative 0.000023481775074 ***
## Office_negative 0.000000000013070 ***
## Cost_negative 0.000000108358355 ***
## Average.words.per.review 0.000000000000308 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8471 on 1022 degrees of freedom
## Multiple R-squared: 0.3562, Adjusted R-squared: 0.3455
## F-statistic: 33.26 on 17 and 1022 DF, p-value: < 0.00000000000000022
lc2=lm(Positive_Proportion~.,data = comdff2)
summary(lc2)
##
## Call:
## lm(formula = Positive_Proportion ~ ., data = comdff2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.98048 -0.07733 0.05426 0.14137 0.82984
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.7810386 0.0325769 23.975
## GenderMale 0.0175599 0.0163088 1.077
## Years.of.Experience11 to 20 years 0.0745600 0.0334030 2.232
## Years.of.Experience21 to 30 years 0.0846441 0.0316681 2.673
## Years.of.Experience31 years and above 0.0598625 0.0324085 1.847
## Communication_positive -0.0057390 0.0723151 -0.079
## Expertisepositive 0.1973434 0.0508048 3.884
## Timepositive 0.0676514 0.0788625 0.858
## Bedside_positive 0.2351209 0.0479124 4.907
## Officepositive 0.1569701 0.0781126 2.010
## Costpositive 0.4012060 0.1110220 3.614
## Communication_negative -0.3681654 0.0977875 -3.765
## Expertisenegative -0.7218612 0.0738322 -9.777
## Time_negative -0.7071751 0.0891213 -7.935
## Bedside_negative -0.4477315 0.0869512 -5.149
## Office_negative -0.4311245 0.1185171 -3.638
## Cost_negative -1.0948320 0.1505881 -7.270
## Average.words.per.review -0.0008207 0.0001750 -4.689
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.281863
## Years.of.Experience11 to 20 years 0.025822 *
## Years.of.Experience21 to 30 years 0.007641 **
## Years.of.Experience31 years and above 0.065018 .
## Communication_positive 0.936761
## Expertisepositive 0.000109 ***
## Timepositive 0.391182
## Bedside_positive 0.00000107400834509 ***
## Officepositive 0.044743 *
## Costpositive 0.000316 ***
## Communication_negative 0.000176 ***
## Expertisenegative < 0.0000000000000002 ***
## Time_negative 0.00000000000000551 ***
## Bedside_negative 0.00000031367070663 ***
## Office_negative 0.000289 ***
## Cost_negative 0.00000000000071172 ***
## Average.words.per.review 0.00000311940283111 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.258 on 1022 degrees of freedom
## Multiple R-squared: 0.4309, Adjusted R-squared: 0.4215
## F-statistic: 45.53 on 17 and 1022 DF, p-value: < 0.00000000000000022
lc3=lm(Negative_Proportion~.,data = comdff3)
summary(lc3)
##
## Call:
## lm(formula = Negative_Proportion ~ ., data = comdff3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.02412 -0.10052 -0.03151 0.05972 0.99927
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.146617 0.028666 5.115
## GenderMale -0.020651 0.014351 -1.439
## Years.of.Experience11 to 20 years -0.083203 0.029392 -2.831
## Years.of.Experience21 to 30 years -0.097930 0.027866 -3.514
## Years.of.Experience31 years and above -0.087611 0.028517 -3.072
## Communication_positive -0.089456 0.063633 -1.406
## Expertisepositive -0.100977 0.044705 -2.259
## Timepositive 0.029154 0.069394 0.420
## Bedside_positive -0.137290 0.042160 -3.256
## Officepositive -0.050707 0.068734 -0.738
## Costpositive -0.336372 0.097692 -3.443
## Communication_negative 0.460933 0.086047 5.357
## Expertisenegative 0.749351 0.064968 11.534
## Time_negative 0.722739 0.078421 9.216
## Bedside_negative 0.407485 0.076511 5.326
## Office_negative 0.482666 0.104287 4.628
## Cost_negative 1.168428 0.132508 8.818
## Average.words.per.review 0.001158 0.000154 7.520
## Pr(>|t|)
## (Intercept) 0.00000037503111 ***
## GenderMale 0.150444
## Years.of.Experience11 to 20 years 0.004735 **
## Years.of.Experience21 to 30 years 0.000460 ***
## Years.of.Experience31 years and above 0.002181 **
## Communication_positive 0.160081
## Expertisepositive 0.024110 *
## Timepositive 0.674480
## Bedside_positive 0.001165 **
## Officepositive 0.460853
## Costpositive 0.000598 ***
## Communication_negative 0.00000010466500 ***
## Expertisenegative < 0.0000000000000002 ***
## Time_negative < 0.0000000000000002 ***
## Bedside_negative 0.00000012359151 ***
## Office_negative 0.00000416197762 ***
## Cost_negative < 0.0000000000000002 ***
## Average.words.per.review 0.00000000000012 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.227 on 1022 degrees of freedom
## Multiple R-squared: 0.506, Adjusted R-squared: 0.4978
## F-statistic: 61.59 on 17 and 1022 DF, p-value: < 0.00000000000000022
lc4=lm(Average.pos_score~.,data = comdff4)
summary(lc4)
##
## Call:
## lm(formula = Average.pos_score ~ ., data = comdff4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.81656 -0.09927 0.05738 0.15959 0.51416
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.64692434 0.03132822 20.650
## GenderMale 0.00400675 0.01568365 0.255
## Years.of.Experience11 to 20 years 0.07488439 0.03212267 2.331
## Years.of.Experience21 to 30 years 0.10002820 0.03045422 3.285
## Years.of.Experience31 years and above 0.09487822 0.03116626 3.044
## Communication_positive -0.09722930 0.06954321 -1.398
## Expertisepositive 0.09417639 0.04885739 1.928
## Timepositive 0.10823997 0.07583964 1.427
## Bedside_positive 0.15062062 0.04607589 3.269
## Officepositive 0.18792158 0.07511848 2.502
## Costpositive 0.46411366 0.10676642 4.347
## Communication_negative -0.37281616 0.09403924 -3.964
## Expertisenegative -0.56256603 0.07100217 -7.923
## Time_negative -0.48335656 0.08570519 -5.640
## Bedside_negative -0.40464353 0.08361829 -4.839
## Office_negative -0.27630470 0.11397424 -2.424
## Cost_negative -0.93228006 0.14481594 -6.438
## Average.words.per.review -0.00006264 0.00016833 -0.372
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.79841
## Years.of.Experience11 to 20 years 0.01994 *
## Years.of.Experience21 to 30 years 0.00106 **
## Years.of.Experience31 years and above 0.00239 **
## Communication_positive 0.16238
## Expertisepositive 0.05418 .
## Timepositive 0.15382
## Bedside_positive 0.00112 **
## Officepositive 0.01252 *
## Costpositive 0.00001518079388590 ***
## Communication_negative 0.00007867743822599 ***
## Expertisenegative 0.00000000000000602 ***
## Time_negative 0.00000002203201016 ***
## Bedside_negative 0.00000150464080230 ***
## Office_negative 0.01551 *
## Cost_negative 0.00000000018625477 ***
## Average.words.per.review 0.70986
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2481 on 1022 degrees of freedom
## Multiple R-squared: 0.3195, Adjusted R-squared: 0.3082
## F-statistic: 28.23 on 17 and 1022 DF, p-value: < 0.00000000000000022
lc5=lm(Average.neg_score~.,data = comdff5)
summary(lc5)
##
## Call:
## lm(formula = Average.neg_score ~ ., data = comdff5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78930 -0.14500 0.07364 0.13895 0.98492
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -0.1036872 0.0332653 -3.117
## GenderMale 0.0039772 0.0166534 0.239
## Years.of.Experience11 to 20 years 0.0163092 0.0341089 0.478
## Years.of.Experience21 to 30 years 0.0021139 0.0323373 0.065
## Years.of.Experience31 years and above 0.0009758 0.0330934 0.029
## Communication_positive 0.1444302 0.0738433 1.956
## Expertisepositive 0.1084058 0.0518784 2.090
## Timepositive -0.0273260 0.0805290 -0.339
## Bedside_positive 0.1305719 0.0489249 2.669
## Officepositive 0.0685017 0.0797633 0.859
## Costpositive 0.2057099 0.1133681 1.815
## Communication_negative -0.6225700 0.0998540 -6.235
## Expertisenegative -0.4726412 0.0753924 -6.269
## Time_negative -0.6435948 0.0910046 -7.072
## Bedside_negative -0.2828426 0.0887887 -3.186
## Office_negative -0.6325603 0.1210216 -5.227
## Cost_negative -0.5624687 0.1537703 -3.658
## Average.words.per.review -0.0012106 0.0001787 -6.773
## Pr(>|t|)
## (Intercept) 0.001878 **
## GenderMale 0.811292
## Years.of.Experience11 to 20 years 0.632646
## Years.of.Experience21 to 30 years 0.947891
## Years.of.Experience31 years and above 0.976481
## Communication_positive 0.050749 .
## Expertisepositive 0.036899 *
## Timepositive 0.734430
## Bedside_positive 0.007732 **
## Officepositive 0.390645
## Costpositive 0.069889 .
## Communication_negative 0.00000000066078 ***
## Expertisenegative 0.00000000053486 ***
## Time_negative 0.00000000000283 ***
## Bedside_negative 0.001488 **
## Office_negative 0.00000020901385 ***
## Cost_negative 0.000267 ***
## Average.words.per.review 0.00000000002127 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2634 on 1022 degrees of freedom
## Multiple R-squared: 0.3651, Adjusted R-squared: 0.3545
## F-statistic: 34.57 on 17 and 1022 DF, p-value: < 0.00000000000000022
lc6=lm(Overall_score~.,data = comdff6)
summary(lc6)
##
## Call:
## lm(formula = Overall_score ~ ., data = comdff6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.45537 -0.21441 0.07354 0.23620 1.30553
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 0.5432371 0.0465767 11.663
## GenderMale 0.0079839 0.0233174 0.342
## Years.of.Experience11 to 20 years 0.0911935 0.0477578 1.910
## Years.of.Experience21 to 30 years 0.1021421 0.0452773 2.256
## Years.of.Experience31 years and above 0.0958541 0.0463359 2.069
## Communication_positive 0.0472009 0.1033921 0.457
## Expertisepositive 0.2025822 0.0726379 2.789
## Timepositive 0.0809140 0.1127532 0.718
## Bedside_positive 0.2811925 0.0685025 4.105
## Officepositive 0.2564233 0.1116811 2.296
## Costpositive 0.6698235 0.1587331 4.220
## Communication_negative -0.9953862 0.1398112 -7.120
## Expertisenegative -1.0352072 0.1055612 -9.807
## Time_negative -1.1269513 0.1274207 -8.844
## Bedside_negative -0.6874861 0.1243180 -5.530
## Office_negative -0.9088650 0.1694492 -5.364
## Cost_negative -1.4947488 0.2153025 -6.943
## Average.words.per.review -0.0012732 0.0002503 -5.088
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## GenderMale 0.73212
## Years.of.Experience11 to 20 years 0.05648 .
## Years.of.Experience21 to 30 years 0.02429 *
## Years.of.Experience31 years and above 0.03883 *
## Communication_positive 0.64811
## Expertisepositive 0.00539 **
## Timepositive 0.47316
## Bedside_positive 0.00004368341531 ***
## Officepositive 0.02188 *
## Costpositive 0.00002662383106 ***
## Communication_negative 0.00000000000204 ***
## Expertisenegative < 0.0000000000000002 ***
## Time_negative < 0.0000000000000002 ***
## Bedside_negative 0.00000004064894 ***
## Office_negative 0.00000010086286 ***
## Cost_negative 0.00000000000684 ***
## Average.words.per.review 0.00000043142775 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3688 on 1022 degrees of freedom
## Multiple R-squared: 0.4871, Adjusted R-squared: 0.4786
## F-statistic: 57.1 on 17 and 1022 DF, p-value: < 0.00000000000000022
#Correlation plots
str(comdff1)
## 'data.frame': 1040 obs. of 16 variables:
## $ Gender : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 1 ...
## $ Years.of.Experience : Factor w/ 4 levels "Less than 10 years",..: 2 3 3 2 4 3 4 4 2 4 ...
## $ Communication_positive : num 0.0676 0.0286 0.1 0.1538 0.3333 ...
## $ Expertisepositive : num 0.189 0.114 0.1 0.154 0.167 ...
## $ Timepositive : num 0.1216 0.0714 0 0.1154 0 ...
## $ Bedside_positive : num 0.108 0.186 0.35 0.115 0.333 ...
## $ Officepositive : num 0.027 0.0429 0.15 0.0769 0 ...
## $ Costpositive : num 0.0541 0.0286 0 0.0385 0 ...
## $ Communication_negative : num 0 0.0143 0 0 0 ...
## $ Expertisenegative : num 0.027 0.0143 0 0 0 ...
## $ Time_negative : num 0 0.0429 0 0 0 ...
## $ Bedside_negative : num 0 0.0429 0 0 0 ...
## $ Office_negative : num 0 0.0143 0 0 0 ...
## $ Cost_negative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ Average.words.per.review: num 29 72 44 40 42 61 29 41 17 34 ...
## $ Rating : num 4.9 4.7 5 4.8 5 5 4.8 4.6 5 5 ...
corrplot(cor(comdff1[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff2[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff3[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff4[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff5[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff6[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

#Stargazer
library(stargazer)
stargazer(lh1,lh2,lh3,lh4,lh5,lh6, type="text",out = "Stargazer_healthgrades.txt")
##
## =========================================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------------------------
## Rating Positive_Proportion Negative_Proportion Average.pos_score Average.neg_score Overall_score
## (1) (2) (3) (4) (5) (6)
## -----------------------------------------------------------------------------------------------------------------------------------------
## GenderMale -0.031 -0.011 0.011 0.0003 -0.045 -0.045
## (0.045) (0.019) (0.019) (0.020) (0.032) (0.036)
##
## Years.of.Experience11 to 20 years -0.030 -0.033 0.033 -0.042 -0.002 -0.044
## (0.084) (0.036) (0.036) (0.038) (0.061) (0.069)
##
## Years.of.Experience21 to 30 years -0.063 0.002 -0.002 -0.001 0.011 0.010
## (0.083) (0.036) (0.036) (0.038) (0.060) (0.068)
##
## Years.of.Experience31 years and above -0.014 0.007 -0.007 -0.024 0.042 0.018
## (0.084) (0.036) (0.036) (0.038) (0.061) (0.069)
##
## Communication_positive -0.017 0.019 -0.019 -0.178** 0.070 -0.108
## (0.185) (0.080) (0.080) (0.084) (0.135) (0.152)
##
## Expertisepositive -0.059 0.096 -0.096 0.114 0.027 0.141
## (0.165) (0.072) (0.072) (0.075) (0.120) (0.136)
##
## Timepositive 0.102 0.023 -0.023 0.028 -0.032 -0.003
## (0.204) (0.088) (0.088) (0.093) (0.148) (0.167)
##
## Bedside_positive 0.163 0.145** -0.145** -0.024 0.093 0.069
## (0.144) (0.062) (0.062) (0.066) (0.105) (0.118)
##
## Officepositive -0.699** 0.041 -0.041 0.104 0.031 0.135
## (0.291) (0.126) (0.126) (0.133) (0.212) (0.238)
##
## Costpositive 0.438 0.235 -0.235 0.187 0.290 0.476
## (0.386) (0.167) (0.167) (0.176) (0.281) (0.316)
##
## Communication_negative -1.016*** -0.541*** 0.541*** -0.539*** -0.703*** -1.242***
## (0.362) (0.156) (0.156) (0.165) (0.263) (0.297)
##
## Expertisenegative 0.216 -0.456*** 0.456*** -0.546*** 0.032 -0.514*
## (0.340) (0.147) (0.147) (0.155) (0.247) (0.278)
##
## Time_negative -0.447 -1.038*** 1.038*** -0.163 -1.235*** -1.398***
## (0.330) (0.142) (0.142) (0.150) (0.240) (0.270)
##
## Bedside_negative 0.521 0.415* -0.415* -0.055 0.606* 0.551
## (0.493) (0.213) (0.213) (0.224) (0.359) (0.404)
##
## Office_negative -1.143** -0.121 0.121 -0.005 -1.275*** -1.280***
## (0.532) (0.230) (0.230) (0.242) (0.387) (0.436)
##
## Cost_negative -0.499 -1.648*** 1.648*** -1.388*** -1.558** -2.946***
## (0.843) (0.365) (0.365) (0.384) (0.614) (0.691)
##
## Average.words.per.review -0.001 -0.0001 0.0001 0.002*** -0.001 0.001
## (0.001) (0.0005) (0.0005) (0.001) (0.001) (0.001)
##
## Constant 4.757*** 0.944*** 0.056 0.774*** -0.056 0.718***
## (0.107) (0.046) (0.046) (0.049) (0.078) (0.088)
##
## -----------------------------------------------------------------------------------------------------------------------------------------
## Observations 247 247 247 247 247 247
## R2 0.162 0.509 0.509 0.367 0.318 0.461
## Adjusted R2 0.100 0.472 0.472 0.321 0.268 0.421
## Residual Std. Error (df = 229) 0.330 0.143 0.143 0.150 0.240 0.270
## F Statistic (df = 17; 229) 2.612*** 13.958*** 13.958*** 7.826*** 6.289*** 11.536***
## =========================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(lr1,lr2,lr3,lr4,lr5,lr6, type="text",out="stargazer_ratemds.txt")
##
## ===========================================================================================================================================================================================
## Dependent variable:
## -----------------------------------------------------------------------------------------------------------------------------------------------------
## Rating Positive_Proportion Negative_Proportion Average.pos_score Average.neg_score Overall_score
## (1) (2) (3) (4) (5) (6)
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## GenderMale 0.121* 0.025 -0.028 0.008 0.015 -0.045
## (0.066) (0.020) (0.018) (0.019) (0.019) (0.036)
##
## Years.of.Experience11 to 20 years 0.147 0.115*** -0.126*** 0.125*** 0.020 -0.044
## (0.138) (0.042) (0.037) (0.040) (0.041) (0.069)
##
## Years.of.Experience21 to 30 years 0.181 0.117*** -0.131*** 0.144*** -0.004 0.010
## (0.130) (0.040) (0.035) (0.038) (0.038) (0.068)
##
## Years.of.Experience31 years and above 0.175 0.082** -0.117*** 0.142*** -0.015 0.018
## (0.134) (0.041) (0.036) (0.039) (0.039) (0.069)
##
## Communication_positive 0.058 -0.139 -0.041 -0.194** 0.133 -0.108
## (0.307) (0.094) (0.082) (0.089) (0.090) (0.152)
##
## Expertisepositive 0.367* 0.195*** -0.093* 0.076 0.120** 0.141
## (0.199) (0.061) (0.053) (0.058) (0.059) (0.136)
##
## Timepositive -0.479 -0.036 0.109 0.038 -0.074 -0.003
## (0.334) (0.102) (0.089) (0.097) (0.098) (0.167)
##
## Bedside_positive 0.326* 0.266*** -0.142*** 0.192*** 0.145*** 0.069
## (0.189) (0.058) (0.051) (0.055) (0.056) (0.118)
##
## Officepositive 0.487 0.235** -0.076 0.252*** 0.078 0.135
## (0.300) (0.091) (0.080) (0.087) (0.088) (0.238)
##
## Costpositive 1.408*** 0.501*** -0.396*** 0.552*** 0.233* 0.476
## (0.427) (0.130) (0.114) (0.124) (0.125) (0.316)
##
## Communication_negative -1.727*** -0.386*** 0.503*** -0.343*** -0.685*** -1.242***
## (0.386) (0.118) (0.103) (0.112) (0.114) (0.297)
##
## Expertisenegative -1.708*** -0.726*** 0.774*** -0.518*** -0.543*** -0.514*
## (0.278) (0.085) (0.074) (0.081) (0.082) (0.278)
##
## Time_negative -1.616*** -0.672*** 0.688*** -0.555*** -0.549*** -1.398***
## (0.340) (0.103) (0.091) (0.099) (0.100) (0.270)
##
## Bedside_negative -1.264*** -0.479*** 0.444*** -0.415*** -0.310*** 0.551
## (0.318) (0.097) (0.085) (0.092) (0.093) (0.404)
##
## Office_negative -2.455*** -0.412*** 0.482*** -0.250* -0.565*** -1.280***
## (0.441) (0.134) (0.118) (0.128) (0.130) (0.436)
##
## Cost_negative -2.661*** -1.019*** 1.119*** -0.861*** -0.508*** -2.946***
## (0.552) (0.168) (0.147) (0.160) (0.162) (0.691)
##
## Average.words.per.review -0.004*** -0.001*** 0.001*** -0.0001 -0.001*** 0.001
## (0.001) (0.0002) (0.0002) (0.0002) (0.0002) (0.001)
##
## Constant 4.258*** 0.723*** 0.190*** 0.579*** -0.111*** 0.718***
## (0.131) (0.040) (0.035) (0.038) (0.039) (0.088)
##
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Observations 793 793 793 793 793 247
## R2 0.379 0.426 0.508 0.325 0.384 0.461
## Adjusted R2 0.365 0.414 0.498 0.310 0.370 0.421
## Residual Std. Error 0.913 (df = 775) 0.278 (df = 775) 0.244 (df = 775) 0.265 (df = 775) 0.268 (df = 775) 0.270 (df = 229)
## F Statistic 27.792*** (df = 17; 775) 33.871*** (df = 17; 775) 47.158*** (df = 17; 775) 21.961*** (df = 17; 775) 28.410*** (df = 17; 775) 11.536*** (df = 17; 229)
## ===========================================================================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
stargazer(lc1,lc2,lc3,lc4,lc5,lc6, type="text",out="stargazer_combined.txt")
##
## =========================================================================================================================================
## Dependent variable:
## ---------------------------------------------------------------------------------------------------
## Rating Positive_Proportion Negative_Proportion Average.pos_score Average.neg_score Overall_score
## (1) (2) (3) (4) (5) (6)
## -----------------------------------------------------------------------------------------------------------------------------------------
## GenderMale 0.080 0.018 -0.021 0.004 0.004 0.008
## (0.054) (0.016) (0.014) (0.016) (0.017) (0.023)
##
## Years.of.Experience11 to 20 years 0.091 0.075** -0.083*** 0.075** 0.016 0.091*
## (0.110) (0.033) (0.029) (0.032) (0.034) (0.048)
##
## Years.of.Experience21 to 30 years 0.109 0.085*** -0.098*** 0.100*** 0.002 0.102**
## (0.104) (0.032) (0.028) (0.030) (0.032) (0.045)
##
## Years.of.Experience31 years and above 0.114 0.060* -0.088*** 0.095*** 0.001 0.096**
## (0.106) (0.032) (0.029) (0.031) (0.033) (0.046)
##
## Communication_positive 0.399* -0.006 -0.089 -0.097 0.144* 0.047
## (0.237) (0.072) (0.064) (0.070) (0.074) (0.103)
##
## Expertisepositive 0.317* 0.197*** -0.101** 0.094* 0.108** 0.203***
## (0.167) (0.051) (0.045) (0.049) (0.052) (0.073)
##
## Timepositive -0.079 0.068 0.029 0.108 -0.027 0.081
## (0.259) (0.079) (0.069) (0.076) (0.081) (0.113)
##
## Bedside_positive 0.258 0.235*** -0.137*** 0.151*** 0.131*** 0.281***
## (0.157) (0.048) (0.042) (0.046) (0.049) (0.069)
##
## Officepositive 0.134 0.157** -0.051 0.188** 0.069 0.256**
## (0.257) (0.078) (0.069) (0.075) (0.080) (0.112)
##
## Costpositive 1.083*** 0.401*** -0.336*** 0.464*** 0.206* 0.670***
## (0.365) (0.111) (0.098) (0.107) (0.113) (0.159)
##
## Communication_negative -1.231*** -0.368*** 0.461*** -0.373*** -0.623*** -0.995***
## (0.321) (0.098) (0.086) (0.094) (0.100) (0.140)
##
## Expertisenegative -1.577*** -0.722*** 0.749*** -0.563*** -0.473*** -1.035***
## (0.242) (0.074) (0.065) (0.071) (0.075) (0.106)
##
## Time_negative -1.464*** -0.707*** 0.723*** -0.483*** -0.644*** -1.127***
## (0.293) (0.089) (0.078) (0.086) (0.091) (0.127)
##
## Bedside_negative -1.213*** -0.448*** 0.407*** -0.405*** -0.283*** -0.687***
## (0.286) (0.087) (0.077) (0.084) (0.089) (0.124)
##
## Office_negative -2.665*** -0.431*** 0.483*** -0.276** -0.633*** -0.909***
## (0.389) (0.119) (0.104) (0.114) (0.121) (0.169)
##
## Cost_negative -2.646*** -1.095*** 1.168*** -0.932*** -0.562*** -1.495***
## (0.495) (0.151) (0.133) (0.145) (0.154) (0.215)
##
## Average.words.per.review -0.004*** -0.001*** 0.001*** -0.0001 -0.001*** -0.001***
## (0.001) (0.0002) (0.0002) (0.0002) (0.0002) (0.0003)
##
## Constant 4.419*** 0.781*** 0.147*** 0.647*** -0.104*** 0.543***
## (0.107) (0.033) (0.029) (0.031) (0.033) (0.047)
##
## -----------------------------------------------------------------------------------------------------------------------------------------
## Observations 1,040 1,040 1,040 1,040 1,040 1,040
## R2 0.356 0.431 0.506 0.320 0.365 0.487
## Adjusted R2 0.345 0.421 0.498 0.308 0.355 0.479
## Residual Std. Error (df = 1022) 0.847 0.258 0.227 0.248 0.263 0.369
## F Statistic (df = 17; 1022) 33.261*** 45.526*** 61.589*** 28.231*** 34.572*** 57.100***
## =========================================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01