EDA on Text

Descriptive Statistics

library(readxl)
ratemdsfinal <- read_excel("ratemdsfinal.xlsx")

## New names:
## * `` -> ...1

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(xtable)
library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2018). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.2. https://CRAN.R-project.org/package=stargazer

library(knitr)
library(psych)
library(ggplot2)

## 
## Attaching package: 'ggplot2'

## The following objects are masked from 'package:psych':
## 
##     %+%, alpha

colSums(is.na(ratemdsfinal))

##                        ...1                Doctor_names 
##                           0                           0 
##                      Gender         Years of Experience 
##                           0                          59 
##              Doctor_reviews                      Rating 
##                           0                           0 
##            Review_sentences          Count of Sentiment 
##                           0                           0 
##            Sum of Pos Score Count of Positive Sentiment 
##                           0                           0 
##         Positive_Proportion           Average pos_score 
##                           0                           0 
##            Sum of Neg Score         Negative_Proportion 
##                           0                           0 
## Count of Negative Sentiment           Average neg_score 
##                           0                           0 
##      Communication_positive           Expertisepositive 
##                           0                           0 
##                Timepositive            Bedside_positive 
##                           0                           0 
##              Officepositive                Costpositive 
##                           0                           0 
##      Communication_negative           Expertisenegative 
##                           0                           0 
##               Time_negative            Bedside_negative 
##                           0                           0 
##             Office_negative               Cost_negative 
##                           0                           0 
##                  Word Count                Review Count 
##                           0                           0 
##                Phrase Count    Average words per review 
##                           0                           0 
##               Overall_score 
##                           0

ratemdsfinal$`Years of Experience`[is.na(ratemdsfinal$`Years of Experience`)]=round(mean(ratemdsfinal$`Years of Experience`,na.rm = T))

# ghetting only the numeric columsn 
num_cols <- unlist(lapply(ratemdsfinal, is.numeric))  
rate <- ratemdsfinal[, num_cols]

rate$...1<- NULL
str(rate)

## tibble [793 × 28] (S3: tbl_df/tbl/data.frame)
##  $ Years of Experience        : num [1:793] 21 15 26 21 27 38 49 36 25 26 ...
##  $ Rating                     : num [1:793] 4.85 4.47 4.92 4.89 4.89 5 4.44 4.75 4.93 4.78 ...
##  $ Count of Sentiment         : num [1:793] 14 23 9 9 9 7 19 11 7 8 ...
##  $ Sum of Pos Score           : num [1:793] 9.81 18.01 7.11 8.41 6.54 ...
##  $ Count of Positive Sentiment: num [1:793] 12 21 8 9 9 7 15 10 7 8 ...
##  $ Positive_Proportion        : num [1:793] 0.857 0.913 0.889 1 1 ...
##  $ Average pos_score          : num [1:793] 0.817 0.858 0.889 0.934 0.726 ...
##  $ Sum of Neg Score           : num [1:793] -0.9394 -0.6902 -0.0018 0 0 ...
##  $ Negative_Proportion        : num [1:793] 0.143 0.087 0.111 0 0 ...
##  $ Count of Negative Sentiment: num [1:793] 2 2 1 0 0 0 2 1 0 0 ...
##  $ Average neg_score          : num [1:793] -0.4697 -0.3451 -0.0018 0 0 ...
##  $ Communication_positive     : num [1:793] 0.0323 0.0876 0 0.0513 0.1 ...
##  $ Expertisepositive          : num [1:793] 0.145 0.073 0.139 0.103 0.267 ...
##  $ Timepositive               : num [1:793] 0.0806 0.0438 0.0556 0.1538 0.1667 ...
##  $ Bedside_positive           : num [1:793] 0 0.19 0.194 0.103 0.167 ...
##  $ Officepositive             : num [1:793] 0.0645 0.0949 0.1111 0.0513 0.0333 ...
##  $ Costpositive               : num [1:793] 0.0161 0.073 0.0833 0 0.0667 ...
##  $ Communication_negative     : num [1:793] 0.0484 0.0438 0.0278 0 0 ...
##  $ Expertisenegative          : num [1:793] 0.0645 0.0219 0.0278 0 0.0333 ...
##  $ Time_negative              : num [1:793] 0.0968 0.0511 0 0 0 ...
##  $ Bedside_negative           : num [1:793] 0.0161 0.0219 0 0 0 ...
##  $ Office_negative            : num [1:793] 0.0323 0.0365 0 0 0 ...
##  $ Cost_negative              : num [1:793] 0 0.0219 0 0 0 ...
##  $ Word Count                 : num [1:793] 523 1604 288 358 280 ...
##  $ Review Count               : num [1:793] 14 23 9 9 9 7 19 11 7 8 ...
##  $ Phrase Count               : num [1:793] 67 165 38 42 48 32 133 93 41 67 ...
##  $ Average words per review   : num [1:793] 37 70 32 40 31 36 69 100 38 77 ...
##  $ Overall_score              : num [1:793] 0.348 0.513 0.887 0.934 0.726 ...

names(rate)[1]<- "Exp"

# Print Descriptive Statistics for RateMDs
kable(xtable(describe(rate)[c(3,4,5,8,9,11)]))

	mean	sd	median	min	max	skew
Exp	25.9394704	10.7023017	26.0000000	2.0000	56.0000	0.2925266
Rating	3.9887516	1.1456304	4.5000000	1.0000	5.0000	-1.0690984
Count of Sentiment	2.9621690	3.2622855	2.0000000	1.0000	30.0000	3.0905276
Sum of Pos Score	1.6952861	2.0505289	0.9493000	0.0000	18.0120	2.9120548
Count of Positive Sentiment	2.1601513	2.5047079	1.0000000	0.0000	21.0000	2.8523571
Positive_Proportion	0.7329744	0.3628249	1.0000000	0.0000	1.0000	-1.0791118
Average pos_score	0.6597428	0.3194366	0.7778000	0.0000	0.9923	-1.1775197
Sum of Neg Score	-0.4450890	0.8923222	0.0000000	-10.8437	0.0000	-4.3611618
Negative_Proportion	0.2263399	0.3439964	0.0000000	0.0000	1.0000	1.3315686
Count of Negative Sentiment	0.6935687	1.2951625	0.0000000	0.0000	15.0000	4.1368418
Average neg_score	-0.2453372	0.3380014	0.0000000	-0.9810	0.0000	-0.8781404
Communication_positive	0.0519260	0.1105353	0.0000000	0.0000	1.0000	4.0196863
Expertisepositive	0.1172052	0.1680621	0.0666667	0.0000	1.0000	2.6532162
Timepositive	0.0530472	0.1028384	0.0000000	0.0000	1.0000	3.2624407
Bedside_positive	0.1393430	0.1792870	0.1000000	0.0000	1.0000	2.2700317
Officepositive	0.0567404	0.1138064	0.0000000	0.0000	1.0000	3.5556046
Costpositive	0.0388190	0.0780689	0.0000000	0.0000	0.5000	2.6948463
Communication_negative	0.0427782	0.0923958	0.0000000	0.0000	1.0000	3.9127936
Expertisenegative	0.0620597	0.1284017	0.0000000	0.0000	1.0000	3.9608482
Time_negative	0.0499659	0.1058386	0.0000000	0.0000	1.0000	3.8477383
Bedside_negative	0.0335470	0.1045708	0.0000000	0.0000	1.0000	6.1851972
Office_negative	0.0337728	0.0817816	0.0000000	0.0000	0.6000	3.6060950
Cost_negative	0.0140710	0.0598381	0.0000000	0.0000	1.0000	8.8894980
Word Count	169.6973518	250.4917975	78.0000000	0.0000	2350.0000	3.5365532
Review Count	2.9621690	3.2622855	2.0000000	1.0000	30.0000	3.0905276
Phrase Count	17.4186633	23.0809676	9.0000000	1.0000	195.0000	3.4406515
Average words per review	52.6557377	53.6781351	40.0000000	0.0000	737.0000	5.8080336
Overall_score	0.4144056	0.5385908	0.6249000	-0.9810	0.9914	-0.9302287

# Checking for Normality 
#library(ggpubr)
#ggqqplot(rate$Rating)
#shapiro.test(rate$Rating)

#ggqqplot(rate$Positive_Proportion)
#shapiro.test(rate$Positive_Proportion)

#ggqqplot(rate$Negative_Proportion)
#shapiro.test(rate$Negative_Proportion)

#ggqqplot(rate$`Average pos_score`)
#shapiro.test(rate$`Average pos_score`)

#ggqqplot(rate$`Average neg_score`)
#shapiro.test(rate$`Average neg_score`)


## Multiple t-tests for RateMDs
a<- lapply(rate, function(x) t.test(x ~ ratemdsfinal$Gender, var.equal = TRUE))
# T-Test for RateMDs with Gender 
print(a)

## $Exp
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -3.8797, df = 791, p-value = 0.0001133
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.406922 -1.445720
## sample estimates:
## mean in group Female   mean in group Male 
##             24.54458             27.47090 
## 
## 
## $Rating
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -1.1734, df = 791, p-value = 0.241
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.25540488  0.06430129
## sample estimates:
## mean in group Female   mean in group Male 
##             3.943205             4.038757 
## 
## 
## $`Count of Sentiment`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -2.2132, df = 791, p-value = 0.02717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.96627433 -0.05789856
## sample estimates:
## mean in group Female   mean in group Male 
##             2.718072             3.230159 
## 
## 
## $`Sum of Pos Score`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -2.6979, df = 791, p-value = 0.007126
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.6768438 -0.1067308
## sample estimates:
## mean in group Female   mean in group Male 
##             1.508533             1.900320 
## 
## 
## $`Count of Positive Sentiment`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -2.7495, df = 791, p-value = 0.006104
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.8357665 -0.1394997
## sample estimates:
## mean in group Female   mean in group Male 
##             1.927711             2.415344 
## 
## 
## $Positive_Proportion
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.34504, df = 791, p-value = 0.7302
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05957205  0.04176041
## sample estimates:
## mean in group Female   mean in group Male 
##            0.7287292            0.7376351 
## 
## 
## $`Average pos_score`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 1.9515e-05, df = 791, p-value = 1
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04461023  0.04461111
## sample estimates:
## mean in group Female   mean in group Male 
##            0.6597430            0.6597426 
## 
## 
## $`Sum of Neg Score`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.28433, df = 791, p-value = 0.7762
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1426599  0.1065606
## sample estimates:
## mean in group Female   mean in group Male 
##           -0.4536928           -0.4356431 
## 
## 
## $Negative_Proportion
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 0.71821, df = 791, p-value = 0.4728
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03045368  0.06559611
## sample estimates:
## mean in group Female   mean in group Male 
##            0.2347156            0.2171444 
## 
## 
## $`Count of Negative Sentiment`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 0.0092704, df = 791, p-value = 0.9926
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1800207  0.1817291
## sample estimates:
## mean in group Female   mean in group Male 
##            0.6939759            0.6931217 
## 
## 
## $`Average neg_score`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.036431, df = 791, p-value = 0.9709
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04807935  0.04632724
## sample estimates:
## mean in group Female   mean in group Male 
##           -0.2457548           -0.2448787 
## 
## 
## $Communication_positive
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.84318, df = 791, p-value = 0.3994
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.022057602  0.008801988
## sample estimates:
## mean in group Female   mean in group Male 
##           0.04876669           0.05539450 
## 
## 
## $Expertisepositive
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.21835, df = 791, p-value = 0.8272
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02608057  0.02085918
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1159607            0.1185714 
## 
## 
## $Timepositive
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 0.75394, df = 791, p-value = 0.4511
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.008842558  0.019870758
## sample estimates:
## mean in group Female   mean in group Male 
##           0.05567561           0.05016151 
## 
## 
## $Bedside_positive
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 1.1781, df = 791, p-value = 0.2391
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01000301  0.04002949
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1464993            0.1314861 
## 
## 
## $Officepositive
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.5259, df = 791, p-value = 0.5991
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02014804  0.01163348
## sample estimates:
## mean in group Female   mean in group Male 
##           0.05471113           0.05896841 
## 
## 
## $Costpositive
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 1.7647, df = 791, p-value = 0.078
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.001099019  0.020663489
## sample estimates:
## mean in group Female   mean in group Male 
##           0.04348194           0.03369971 
## 
## 
## $Communication_negative
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.09706, df = 791, p-value = 0.9227
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01354141  0.01226538
## sample estimates:
## mean in group Female   mean in group Male 
##           0.04247408           0.04311209 
## 
## 
## $Expertisenegative
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -1.4673, df = 791, p-value = 0.1427
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.031293214  0.004521759
## sample estimates:
## mean in group Female   mean in group Male 
##           0.05567915           0.06906487 
## 
## 
## $Time_negative
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -1.547, df = 791, p-value = 0.1223
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.026389721  0.003127285
## sample estimates:
## mean in group Female   mean in group Male 
##           0.04442160           0.05605282 
## 
## 
## $Bedside_negative
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 1.3822, df = 791, p-value = 0.1673
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004315428  0.024856885
## sample estimates:
## mean in group Female   mean in group Male 
##           0.03844281           0.02817208 
## 
## 
## $Office_negative
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 0.2261, df = 791, p-value = 0.8212
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01010528  0.01273628
## sample estimates:
## mean in group Female   mean in group Male 
##           0.03439982           0.03308432 
## 
## 
## $Cost_negative
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = 0.66558, df = 791, p-value = 0.5059
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005521611  0.011187012
## sample estimates:
## mean in group Female   mean in group Male 
##           0.01542129           0.01258859 
## 
## 
## $`Word Count`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -1.8513, df = 791, p-value = 0.0645
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -67.827806   1.985592
## sample estimates:
## mean in group Female   mean in group Male 
##             154.0048             186.9259 
## 
## 
## $`Review Count`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -2.2132, df = 791, p-value = 0.02717
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.96627433 -0.05789856
## sample estimates:
## mean in group Female   mean in group Male 
##             2.718072             3.230159 
## 
## 
## $`Phrase Count`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -2.0798, df = 791, p-value = 0.03787
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.6204551 -0.1912998
## sample estimates:
## mean in group Female   mean in group Male 
##             15.79518             19.20106 
## 
## 
## $`Average words per review`
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.1802, df = 791, p-value = 0.857
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.184387  6.808063
## sample estimates:
## mean in group Female   mean in group Male 
##             52.32771             53.01587 
## 
## 
## $Overall_score
## 
##  Two Sample t-test
## 
## data:  x by ratemdsfinal$Gender
## t = -0.022851, df = 791, p-value = 0.9818
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07609207  0.07434085
## sample estimates:
## mean in group Female   mean in group Male 
##            0.4139883            0.4148639

Statistics based on Gender

#str(ratemdsfinal)
ratemdsfinal$Gender<- factor(ratemdsfinal$Gender)

rate %>% #plot rating over Gender 
  ggplot(aes(x=Rating,fill=ratemdsfinal$Gender))+
  geom_density(alpha=.4,position="identity")+
  labs(title = "Density of Rating by Gender on RateMDs\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

rate %>% #plot Positive_Proportion over Gender 
  ggplot(aes(x=Positive_Proportion,fill=ratemdsfinal$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Positive Proportion by Gender on RateMDs\n(blue=Female, red=Male)",x="Positive_Proportion",y = "Density of Positive Proportion")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

rate %>% #plot Negative_Proportion over Gender 
  ggplot(aes(x=Negative_Proportion,fill=ratemdsfinal$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Negative_Proportion by Gender on RateMDs\n(blue=Female, red=Male)",x="Negative_Proportion",y = "Density of Negative_Proportion")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

rate %>% #plot `Average neg_score` over Gender 
  ggplot(aes(x=`Average neg_score`,fill=ratemdsfinal$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of `Average neg_score` by Gender on RateMDs\n(blue=Female, red=Male)",x="`Average neg_score`",y = "Density of `Average neg_score`")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

rate %>% #plot `Average pos_score` over Gender 
  ggplot(aes(x=`Average pos_score`,fill=ratemdsfinal$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of `Average pos_score` by Gender on RateMDs\n(blue=Female, red=Male)",x="`Average pos_score`",y = "Density of `Average pos_score`")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

ratemdsfinal %>% #plot Years of Experience over Gender 
  ggplot(aes(x=`Years of Experience`,fill=ratemdsfinal$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Years of Experience by Gender on RateMDs\n(blue=Female, red=Male)",x="Years of Experience - in years",y = "Density of Years of Experience")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

## Warning: Use of `ratemdsfinal$Gender` is discouraged. Use `Gender` instead.

rate %>% #plot Overall_score over Gender 
  ggplot(aes(x=Overall_score,fill=ratemdsfinal$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Overall_score by Gender on RateMDs\n(blue=Female, red=Male)",x="Overall_score",y = "Density of Overall_score")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

## Statistics by No of Years of Experience

summary(ratemdsfinal$`Years of Experience`)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   19.00   26.00   25.94   33.00   56.00

ratemdsfinal$`Years of Experience` <- as.factor(ratemdsfinal$`Years of Experience`)
levels(ratemdsfinal$`Years of Experience`) = list("Less than 10 years" = c(0:10),
                                       "11 to 20 years" =c(11:20),
                                       "21 to 30 years " = c(21:30), 
                                       "31 years and above" = c(31:56))
summary(ratemdsfinal$`Years of Experience`)

## Less than 10 years     11 to 20 years    21 to 30 years  31 years and above 
##                 61                174                310                248

boxplot(ratemdsfinal$Rating~ratemdsfinal$`Years of Experience`, # Specify the variables to graph, quantitative variable first
        col=c("orange","thistle","lightgreen","lightblue","gray","yellow"), # Specify the data set that contains the variables
        main = "Rating over Years of Experience on RateMDs", #Create the chart title
        xlab = "Rating",  #Create the x-axis label
        ylab = "",
        las=1,  # Width of box as proportion of original
        whisklty = 1,  # Whisker line type; 1 = solid line
        staplelty = 0,  # Staple (line at end) type; 0 = none
        outpch = 16,  # Symbols for outliers; 16 = filled circle
        outcol = "slategray3", 
        notch=T,horizontal = T# Color for outliers
) #Create the y-axis label

# Checking the Significance Level of the Factors created
years.aov1 <- aov(ratemdsfinal$Rating~ratemdsfinal$`Years of Experience`, data = ratemdsfinal)
summary(years.aov1)

##                                     Df Sum Sq Mean Sq F value Pr(>F)  
## ratemdsfinal$`Years of Experience`   3    8.3   2.771    2.12 0.0963 .
## Residuals                          789 1031.2   1.307                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

TukeyHSD(years.aov1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = ratemdsfinal$Rating ~ ratemdsfinal$`Years of Experience`, data = ratemdsfinal)
## 
## $`ratemdsfinal$`Years of Experience``
##                                              diff         lwr       upr
## 11 to 20 years-Less than 10 years     0.236710948 -0.20122948 0.6746514
## 21 to 30 years -Less than 10 years    0.359677948 -0.05257317 0.7719291
## 31 years and above-Less than 10 years 0.365290851 -0.05534776 0.7859295
## 21 to 30 years -11 to 20 years        0.122967000 -0.15582985 0.4017638
## 31 years and above-11 to 20 years     0.128579904 -0.16247600 0.4196358
## 31 years and above-21 to 30 years     0.005612903 -0.24513139 0.2563572
##                                           p adj
## 11 to 20 years-Less than 10 years     0.5050019
## 21 to 30 years -Less than 10 years    0.1118890
## 31 years and above-Less than 10 years 0.1145611
## 21 to 30 years -11 to 20 years        0.6676319
## 31 years and above-11 to 20 years     0.6664955
## 31 years and above-21 to 30 years     0.9999313

# Thus, there is not much significance

Plots based on Years of Experience over Dependent Variables

boxplot(ratemdsfinal$Rating~ratemdsfinal$`Years of Experience`, # Specify the variables to graph, quantitative variable first
        col=c("orange","thistle","lightgreen","lightblue","gray","yellow"), # Specify the data set that contains the variables
        main = "Rating over Years of Experience on RateMDs", #Create the chart title
        xlab = "Rating",  #Create the x-axis label
        ylab = "Years of Experience",
        las=1,  # Width of box as proportion of original
        whisklty = 1,  # Whisker line type; 1 = solid line
        staplelty = 0,  # Staple (line at end) type; 0 = none
        outpch = 16,  # Symbols for outliers; 16 = filled circle
        outcol = "slategray3", 
        notch=T,horizontal = T# Color for outliers
) #Create the y-axis label

rate %>% #plot rating over Gender 
  ggplot(aes(x=Rating,fill=ratemdsfinal$Gender))+
  geom_density(alpha=.4,position="identity")+
  labs(title = "Density of Rating over Gender spread across Years of Experience on RateMDs\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
  theme(legend.position="none")+#hide the side signal of fill parameter
 facet_wrap(ratemdsfinal$`Years of Experience`)

Healthgrades

library(readxl)
healthgrades <- read_excel("healthgradesfinal.xlsx")

str(healthgrades)

## tibble [247 × 33] (S3: tbl_df/tbl/data.frame)
##  $ DoctorName              : chr [1:247] "Dr. Amy Williams, MD" "Dr. Roma Franzia, MD" "Dr. Elizabeth Manjooran, MD" "Dr. Jason Canel, MD" ...
##  $ Age                     : chr [1:247] "• Age 42" "• Age 50" "• Age 59" "• Age 48" ...
##  $ Gender                  : chr [1:247] "Female" "Female" "Female" "Male" ...
##  $ Speciality              : chr [1:247] "Pediatrics" "Pediatrics" "Pediatrics" "Pediatrics" ...
##  $ Years of Experience     : num [1:247] 12 21 28 11 43 24 42 39 16 41 ...
##  $ Biography               : chr [1:247] "Dr. Amy Williams, MD is a pediatrics specialist in Chicago, IL. She specializes in pediatrics." "Dr. Roma Franzia, MD is a pediatrics specialist in Winnetka, IL and has been practicing for 21 years. She gradu"| __truncated__ "Dr. Elizabeth Manjooran, MD is a pediatrics specialist in Des Plaines, IL. She specializes in pediatrics." "Dr. Jason Canel, MD is a pediatrics specialist in Glenview, IL and has been practicing for 11 years. He graduat"| __truncated__ ...
##  $ Rating                  : num [1:247] 4.9 4.7 5 4.8 5 5 4.8 4.6 5 5 ...
##  $ Reviews                 : chr [1:247] "[\"I've been seeing Dr. Williams for years and through 2 pregnancies. She is the absolute best. Caring, compass"| __truncated__ "[\"Dr. Roma Franzia is the best pediatrician ever! So caring about my kids and family as a whole, always availa"| __truncated__ "['She’s not only a very good doctor but also a wonderful person! Also her office staff is very professional, wa"| __truncated__ "[\"We were assigned Dr Canel at my daughter's birth. I am so glad he was on call that day! My daughter is now t"| __truncated__ ...
##  $ Review_cleaned          : chr [1:247] "['[\"I\\'ve been seeing Dr', ' Williams for years ', ' through 2 pregnancies', ' She is the absolute best', ' C"| __truncated__ "['[\"Dr', ' Roma Franzia is the best pediatrician ever', ' So caring about my kids ', ' family as a whole', ' a"| __truncated__ "[\"['She’s not only a very good doctor \", ' also a wonderful person', ' Also her office staff is very professi"| __truncated__ "['[\"We were assigned Dr Canel at my daughter\\'s birth', ' I am so glad he was on call that day', ' My daughte"| __truncated__ ...
##  $ Word Count              : num [1:247] 614 793 176 199 84 182 144 690 50 68 ...
##  $ Review Count            : num [1:247] 21 11 4 5 2 3 5 17 3 2 ...
##  $ Phrase Count            : num [1:247] 166 143 35 47 16 42 41 152 11 11 ...
##  $ Total Count of Sentiment: num [1:247] 21 11 4 5 2 3 5 17 3 2 ...
##  $ Positive_Proportion     : num [1:247] 1 1 1 1 1 ...
##  $ Count of pos Sentiment  : num [1:247] 21 11 4 5 2 3 5 16 3 2 ...
##  $ Average pos_score       : num [1:247] 0.824 0.792 0.916 0.897 0.936 ...
##  $ Negative_Proportion     : num [1:247] 0 0 0 0 0 ...
##  $ Count of neg Sentiment  : num [1:247] 0 0 0 0 0 0 0 1 0 0 ...
##  $ Average neg_score       : num [1:247] 0 0 0 0 0 0 0 -0.612 0 0 ...
##  $ Communication_positive  : num [1:247] 0.0676 0.0286 0.1 0.1538 0.3333 ...
##  $ Expertisepositive       : num [1:247] 0.189 0.114 0.1 0.154 0.167 ...
##  $ Timepositive            : num [1:247] 0.1216 0.0714 0 0.1154 0 ...
##  $ Bedside_positive        : num [1:247] 0.108 0.186 0.35 0.115 0.333 ...
##  $ Officepositive          : num [1:247] 0.027 0.0429 0.15 0.0769 0 ...
##  $ Costpositive            : num [1:247] 0.0541 0.0286 0 0.0385 0 ...
##  $ Communication_negative  : num [1:247] 0 0.0143 0 0 0 ...
##  $ Expertisenegative       : num [1:247] 0.027 0.0143 0 0 0 ...
##  $ Time_negative           : num [1:247] 0 0.0429 0 0 0 ...
##  $ Bedside_negative        : num [1:247] 0 0.0429 0 0 0 ...
##  $ Office_negative         : num [1:247] 0 0.0143 0 0 0 ...
##  $ Cost_negative           : num [1:247] 0 0 0 0 0 0 0 0 0 0 ...
##  $ Average words per review: num [1:247] 29 72 44 40 42 61 29 41 17 34 ...
##  $ Overall_score           : num [1:247] 0.824 0.792 0.916 0.897 0.936 ...

healthgrades$Gender<- factor(healthgrades$Gender)

colSums(is.na(healthgrades))

##               DoctorName                      Age                   Gender 
##                        0                        0                        0 
##               Speciality      Years of Experience                Biography 
##                        0                        4                        0 
##                   Rating                  Reviews           Review_cleaned 
##                        0                        0                        0 
##               Word Count             Review Count             Phrase Count 
##                        0                        0                        0 
## Total Count of Sentiment      Positive_Proportion   Count of pos Sentiment 
##                        0                        0                        0 
##        Average pos_score      Negative_Proportion   Count of neg Sentiment 
##                        0                        0                        0 
##        Average neg_score   Communication_positive        Expertisepositive 
##                        0                        0                        0 
##             Timepositive         Bedside_positive           Officepositive 
##                        0                        0                        0 
##             Costpositive   Communication_negative        Expertisenegative 
##                        0                        0                        0 
##            Time_negative         Bedside_negative          Office_negative 
##                        0                        0                        0 
##            Cost_negative Average words per review            Overall_score 
##                        0                        0                        0

healthgrades$`Years of Experience`[is.na(healthgrades$`Years of Experience`)]=round(mean(healthgrades$`Years of Experience`,na.rm = T))

# ghetting only the numeric columsn 
num_cols <- unlist(lapply(healthgrades, is.numeric))  
health <- healthgrades[, num_cols]

# Print Descriptive Statistics for RateMDs
kable(xtable(describe(health)[c(3,4,5,8,9,11)]))

	mean	sd	median	min	max	skew
Years of Experience	25.8623482	10.8210349	26.0000000	2.0000	53.0000000	0.1554778
Rating	4.5979757	0.3479848	4.6000000	3.5000	5.0000000	-0.5336239
Word Count	194.0971660	270.5785255	124.0000000	11.0000	2018.0000000	4.1629431
Review Count	4.4089069	6.7522054	3.0000000	1.0000	54.0000000	4.9852808
Phrase Count	39.4777328	56.4946083	23.0000000	3.0000	432.0000000	4.4631481
Total Count of Sentiment	4.7692308	9.5512835	3.0000000	1.0000	90.0000000	6.8203153
Positive_Proportion	0.9075924	0.1964834	1.0000000	0.0000	1.0000000	-2.6376345
Count of pos Sentiment	4.2995951	9.0171088	2.0000000	0.0000	86.0000000	7.0230331
Average pos_score	0.7945390	0.1824017	0.8376093	0.0000	0.9857000	-2.5855164
Negative_Proportion	0.0924076	0.1964834	0.0000000	0.0000	1.0000000	2.6376345
Count of neg Sentiment	0.4696356	1.0997455	0.0000000	0.0000	9.0000000	4.0034054
Average neg_score	-0.1475103	0.2808211	0.0000000	-0.9471	0.0000000	-1.6245998
Communication_positive	0.1074784	0.1175589	0.0769231	0.0000	0.7500000	1.5961346
Expertisepositive	0.1554605	0.1370798	0.1428571	0.0000	1.0000000	1.6127717
Timepositive	0.0940978	0.1083136	0.0666667	0.0000	0.5000000	1.4619393
Bedside_positive	0.1563275	0.1493452	0.1428571	0.0000	1.0000000	1.9710141
Officepositive	0.0472850	0.0756680	0.0000000	0.0000	0.3333333	1.7511825
Costpositive	0.0276534	0.0562286	0.0000000	0.0000	0.3333333	2.7729653
Communication_negative	0.0310048	0.0852062	0.0000000	0.0000	1.0000000	6.8384268
Expertisenegative	0.0308365	0.0874027	0.0000000	0.0000	1.0000000	6.7252228
Time_negative	0.0365831	0.0713895	0.0000000	0.0000	0.5000000	2.8473511
Bedside_negative	0.0183953	0.0478848	0.0000000	0.0000	0.3333333	3.7851412
Office_negative	0.0151664	0.0443944	0.0000000	0.0000	0.3333333	4.4347950
Cost_negative	0.0052563	0.0278533	0.0000000	0.0000	0.3333333	8.2021183
Average words per review	45.6032389	19.9854741	44.0000000	8.0000	135.0000000	0.7046350
Overall_score	0.6470287	0.3555858	0.7937000	-0.8750	0.9857000	-1.5541893

## Multiple t-tests for RateMDs
a<- lapply(health[], function(x) t.test(x ~ healthgrades$Gender, var.equal = TRUE))
print(a)

## $`Years of Experience`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = -4.4111, df = 245, p-value = 1.541e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -8.499717 -3.252163
## sample estimates:
## mean in group Female   mean in group Male 
##             23.15038             29.02632 
## 
## 
## $Rating
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.42814, df = 245, p-value = 0.6689
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0685821  0.1066773
## sample estimates:
## mean in group Female   mean in group Male 
##             4.606767             4.587719 
## 
## 
## $`Word Count`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 1.2259, df = 245, p-value = 0.2214
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -25.66264 110.24660
## sample estimates:
## mean in group Female   mean in group Male 
##             213.6165             171.3246 
## 
## 
## $`Review Count`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 1.1465, df = 245, p-value = 0.2527
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7089661  2.6839034
## sample estimates:
## mean in group Female   mean in group Male 
##             4.864662             3.877193 
## 
## 
## $`Phrase Count`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 1.1268, df = 245, p-value = 0.2609
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.074789 22.315390
## sample estimates:
## mean in group Female   mean in group Male 
##             43.22556             35.10526 
## 
## 
## $`Total Count of Sentiment`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 1.3748, df = 245, p-value = 0.1705
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.7239489  4.0698136
## sample estimates:
## mean in group Female   mean in group Male 
##             5.541353             3.868421 
## 
## 
## $Positive_Proportion
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = -0.16404, df = 245, p-value = 0.8698
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05361620  0.04537245
## sample estimates:
## mean in group Female   mean in group Male 
##            0.9056900            0.9098119 
## 
## 
## $`Count of pos Sentiment`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 1.5925, df = 245, p-value = 0.1126
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4328089  4.0869442
## sample estimates:
## mean in group Female   mean in group Male 
##             5.142857             3.315789 
## 
## 
## $`Average pos_score`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = -0.22508, df = 245, p-value = 0.8221
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05119520  0.04069462
## sample estimates:
## mean in group Female   mean in group Male 
##            0.7921158            0.7973661 
## 
## 
## $Negative_Proportion
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.16404, df = 245, p-value = 0.8698
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04537245  0.05361620
## sample estimates:
## mean in group Female   mean in group Male 
##           0.09431001           0.09018813 
## 
## 
## $`Count of neg Sentiment`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = -1.0986, df = 245, p-value = 0.273
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.4304976  0.1222269
## sample estimates:
## mean in group Female   mean in group Male 
##            0.3984962            0.5526316 
## 
## 
## $`Average neg_score`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.73812, df = 245, p-value = 0.4611
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04418382  0.09714504
## sample estimates:
## mean in group Female   mean in group Male 
##           -0.1352885           -0.1617691 
## 
## 
## $Communication_positive
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 1.6974, df = 245, p-value = 0.09089
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004069804  0.054814609
## sample estimates:
## mean in group Female   mean in group Male 
##           0.11918876           0.09381636 
## 
## 
## $Expertisepositive
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.38903, df = 245, p-value = 0.6976
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0277035  0.0413400
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1586074            0.1517891 
## 
## 
## $Timepositive
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = -0.8634, df = 245, p-value = 0.3888
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03918680  0.01530196
## sample estimates:
## mean in group Female   mean in group Male 
##           0.08858589           0.10052831 
## 
## 
## $Bedside_positive
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.34221, df = 245, p-value = 0.7325
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03107836  0.04414818
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1593436            0.1528087 
## 
## 
## $Officepositive
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.7565, df = 245, p-value = 0.4501
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.0117271  0.0263522
## sample estimates:
## mean in group Female   mean in group Male 
##           0.05066005           0.04334750 
## 
## 
## $Costpositive
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 1.0907, df = 245, p-value = 0.2765
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006306086  0.021955029
## sample estimates:
## mean in group Female   mean in group Male 
##           0.03126467           0.02344020 
## 
## 
## $Communication_negative
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.16922, df = 245, p-value = 0.8658
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01961945  0.02330742
## sample estimates:
## mean in group Female   mean in group Male 
##           0.03185587           0.03001188 
## 
## 
## $Expertisenegative
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.49546, df = 245, p-value = 0.6207
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01647136  0.02754264
## sample estimates:
## mean in group Female   mean in group Male 
##           0.03339144           0.02785580 
## 
## 
## $Time_negative
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.94808, df = 245, p-value = 0.344
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.009310659  0.026591689
## sample estimates:
## mean in group Female   mean in group Male 
##           0.04057103           0.03193051 
## 
## 
## $Bedside_negative
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = -1.275, df = 245, p-value = 0.2035
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01980555  0.00424055
## sample estimates:
## mean in group Female   mean in group Male 
##           0.01480339           0.02258589 
## 
## 
## $Office_negative
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.21281, df = 245, p-value = 0.8317
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.009974398  0.012390719
## sample estimates:
## mean in group Female   mean in group Male 
##           0.01572405           0.01451588 
## 
## 
## $Cost_negative
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = -0.54196, df = 245, p-value = 0.5883
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.008941925  0.005082995
## sample estimates:
## mean in group Female   mean in group Male 
##          0.004365786          0.006295251 
## 
## 
## $`Average words per review`
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.13875, df = 245, p-value = 0.8898
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -4.679800  5.389073
## sample estimates:
## mean in group Female   mean in group Male 
##             45.76692             45.41228 
## 
## 
## $Overall_score
## 
##  Two Sample t-test
## 
## data:  x by healthgrades$Gender
## t = 0.46704, df = 245, p-value = 0.6409
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.06830712  0.11076776
## sample estimates:
## mean in group Female   mean in group Male 
##            0.6568273            0.6355970

Statistics based on Gender

## Plots with Gender 
health %>% #plot rating over Gender 
  ggplot(aes(x=Rating,fill=healthgrades$Gender))+
  geom_density(alpha=.4,position="identity")+
  labs(title = "Density of Rating by Gender on HealthGrades\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot Positive_Proportion over Gender 
  ggplot(aes(x=Positive_Proportion,fill=healthgrades$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Positive Proportion by Gender on HealthGrades\n(blue=Female, red=Male)",x="Positive_Proportion",y = "Density of Positive Proportion")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

health  %>% #plot Negative_Proportion over Gender 
  ggplot(aes(x=Negative_Proportion,fill=healthgrades$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Negative_Proportion by Gender on HealthGrades\n(blue=Female, red=Male)",x="Negative_Proportion",y = "Density of Negative_Proportion")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

health  %>% #plot `Average neg_score` over Gender 
  ggplot(aes(x=`Average neg_score`,fill=healthgrades$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of `Average neg_score` by Gender on HealthGrades\n(blue=Female, red=Male)",x="`Average neg_score`",y = "Density of `Average neg_score`")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot `Average pos_score` over Gender 
  ggplot(aes(x=`Average pos_score`,fill=healthgrades$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of `Average pos_score` by Gender on HealthGrades\n(blue=Female, red=Male)",x="`Average pos_score`",y = "Density of `Average pos_score`")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

health  %>% #plot Years of Experience over Gender 
  ggplot(aes(x=`Years of Experience`,fill=healthgrades$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Years of Experience by Gender on HealthGrades\n(blue=Female, red=Male)",x="Years of Experience - in years",y = "Density of Years of Experience")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

health %>% #plot Overall_score over Gender 
  ggplot(aes(x=Overall_score,fill=healthgrades$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Overall_score by Gender on HealthGrades\n(blue=Female, red=Male)",x="Overall_score",y = "Density of Overall_score")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

Statistics based on Years of Experience in HealthGrades

summary(healthgrades$`Years of Experience`)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    2.00   18.00   26.00   25.86   34.00   53.00

healthgrades$`Years of Experience` <- as.factor(healthgrades$`Years of Experience`)
levels(healthgrades$`Years of Experience`) = list("Less than 10 years" = c(0:10),
                                       "11 to 20 years" =c(11:20),
                                       "21 to 30 years " = c(21:30), 
                                       "31 years and above" = c(31:56))
summary(healthgrades$`Years of Experience`)

## Less than 10 years     11 to 20 years    21 to 30 years  31 years and above 
##                 22                 58                 86                 81

boxplot(healthgrades$Rating~healthgrades$`Years of Experience`, # Specify the variables to graph, quantitative variable first
        col=c("orange","thistle","lightgreen","lightblue","gray","yellow"), # Specify the data set that contains the variables
        main = "Rating over Years of Experience on Healthgrades", #Create the chart title
        xlab = "Rating",  #Create the x-axis label
        ylab = "Years of Experience",
        las=1,  # Width of box as proportion of original
        whisklty = 1,  # Whisker line type; 1 = solid line
        staplelty = 0,  # Staple (line at end) type; 0 = none
        outpch = 16,  # Symbols for outliers; 16 = filled circle
        outcol = "slategray3", 
        notch=T,horizontal = T# Color for outliers
) #Create the y-axis label

# Checking the Significance Level of the Factors created
years.aov1 <- aov(healthgrades$Rating~healthgrades$`Years of Experience`, data = healthgrades)
summary(years.aov1)

##                                     Df Sum Sq Mean Sq F value Pr(>F)
## healthgrades$`Years of Experience`   3  0.404  0.1346   1.113  0.344
## Residuals                          243 29.385  0.1209

TukeyHSD(years.aov1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = healthgrades$Rating ~ healthgrades$`Years of Experience`, data = healthgrades)
## 
## $`healthgrades$`Years of Experience``
##                                              diff         lwr        upr
## 11 to 20 years-Less than 10 years     -0.04059561 -0.26584306 0.18465184
## 21 to 30 years -Less than 10 years    -0.12167019 -0.33659747 0.09325709
## 31 years and above-Less than 10 years -0.05583614 -0.27211047 0.16043820
## 21 to 30 years -11 to 20 years        -0.08107458 -0.23392205 0.07177289
## 31 years and above-11 to 20 years     -0.01524053 -0.16997644 0.13949538
## 31 years and above-21 to 30 years      0.06583405 -0.07345171 0.20511981
##                                           p adj
## 11 to 20 years-Less than 10 years     0.9663542
## 21 to 30 years -Less than 10 years    0.4606505
## 31 years and above-Less than 10 years 0.9090996
## 21 to 30 years -11 to 20 years        0.5180421
## 31 years and above-11 to 20 years     0.9941852
## 31 years and above-21 to 30 years     0.6130291

# Thus, there is not much significance

Plots for Years of Experience on HealthGrades

health %>% #plot rating over Gender 
  ggplot(aes(x=Rating,fill=healthgrades$Gender))+
  geom_density(alpha=.4,position="identity")+
  labs(title = "Density of Rating over Gender spread across Years of Experience on HealthGrades\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
  theme(legend.position="none")+#hide the side signal of fill parameter
 facet_wrap(~healthgrades$`Years of Experience`)

Combined

options(scipen=999)
#Combined(Both Ratemds and Healthgrades)

hcommon=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,7,14,17,16,19,33)])

rcommon=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,6,11,14,12,16,33)])

dcombine=data.frame(rbind(hcommon,rcommon))
str(dcombine)

## 'data.frame':    1040 obs. of  21 variables:
##  $ Gender                  : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 1 ...
##  $ Years.of.Experience     : Factor w/ 4 levels "Less than 10 years",..: 2 3 3 2 4 3 4 4 2 4 ...
##  $ Communication_positive  : num  0.0676 0.0286 0.1 0.1538 0.3333 ...
##  $ Expertisepositive       : num  0.189 0.114 0.1 0.154 0.167 ...
##  $ Timepositive            : num  0.1216 0.0714 0 0.1154 0 ...
##  $ Bedside_positive        : num  0.108 0.186 0.35 0.115 0.333 ...
##  $ Officepositive          : num  0.027 0.0429 0.15 0.0769 0 ...
##  $ Costpositive            : num  0.0541 0.0286 0 0.0385 0 ...
##  $ Communication_negative  : num  0 0.0143 0 0 0 ...
##  $ Expertisenegative       : num  0.027 0.0143 0 0 0 ...
##  $ Time_negative           : num  0 0.0429 0 0 0 ...
##  $ Bedside_negative        : num  0 0.0429 0 0 0 ...
##  $ Office_negative         : num  0 0.0143 0 0 0 ...
##  $ Cost_negative           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Average.words.per.review: num  29 72 44 40 42 61 29 41 17 34 ...
##  $ Rating                  : num  4.9 4.7 5 4.8 5 5 4.8 4.6 5 5 ...
##  $ Positive_Proportion     : num  1 1 1 1 1 ...
##  $ Negative_Proportion     : num  0 0 0 0 0 ...
##  $ Average.pos_score       : num  0.824 0.792 0.916 0.897 0.936 ...
##  $ Average.neg_score       : num  0 0 0 0 0 0 0 -0.612 0 0 ...
##  $ Overall_score           : num  0.824 0.792 0.916 0.897 0.936 ...

# Print Descriptive Statistics for Combined
kable(xtable(describe(dcombine)[c(3,4,5,8,9,11)]))

	mean	sd	median	min	max	skew
Gender*	1.4730769	0.4995148	1.0000000	1.000	2.0000	0.1076933
Years.of.Experience*	2.9336538	0.9247022	3.0000000	1.000	4.0000	-0.4741071
Communication_positive	0.0651197	0.1146517	0.0000000	0.000	1.0000	3.1973305
Expertisepositive	0.1262908	0.1620018	0.0909091	0.000	1.0000	2.4456190
Timepositive	0.0627967	0.1055687	0.0000000	0.000	1.0000	2.7014127
Bedside_positive	0.1433768	0.1727294	0.1111111	0.000	1.0000	2.2205717
Officepositive	0.0544948	0.1060410	0.0000000	0.000	1.0000	3.5336472
Costpositive	0.0361672	0.0736005	0.0000000	0.000	0.5000	2.7902871
Communication_negative	0.0399820	0.0908382	0.0000000	0.000	1.0000	4.4908606
Expertisenegative	0.0546442	0.1206358	0.0000000	0.000	1.0000	4.3331605
Time_negative	0.0467875	0.0988836	0.0000000	0.000	1.0000	3.8977334
Bedside_negative	0.0299485	0.0944457	0.0000000	0.000	1.0000	6.6026214
Office_negative	0.0293538	0.0750176	0.0000000	0.000	0.6000	3.8922822
Cost_negative	0.0119775	0.0541032	0.0000000	0.000	1.0000	9.5204150
Average.words.per.review	50.9807692	47.9577896	41.0000000	0.000	737.0000	6.3078409
Rating	4.1334423	1.0470945	4.5000000	1.000	5.0000	-1.3967250
Positive_Proportion	0.7744462	0.3391379	1.0000000	0.000	1.0000	-1.3426505
Negative_Proportion	0.1945310	0.3203035	0.0000000	0.000	1.0000	1.5719105
Average.pos_score	0.6917569	0.2982497	0.7988333	0.000	0.9923	-1.4246449
Average.neg_score	-0.2221033	0.3278596	0.0000000	-0.981	0.0000	-1.0272195
Overall_score	0.4696536	0.5107502	0.6907250	-0.981	0.9914	-1.1026585

## Multiple t-tests for RateMDs
a<- lapply(dcombine[,-c(1:2)], function(x) t.test(x ~ dcombine$Gender, var.equal = TRUE))
print(a)

## $Communication_positive
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.21913, df = 1038, p-value = 0.8266
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01241801  0.01554012
## sample estimates:
## mean in group Female   mean in group Male 
##           0.06585818           0.06429713 
## 
## 
## $Expertisepositive
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.0042608, df = 1038, p-value = 0.9966
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01970985  0.01979564
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1263111            0.1262682 
## 
## 
## $Timepositive
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.27915, df = 1038, p-value = 0.7802
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01104033  0.01470251
## sample estimates:
## mean in group Female   mean in group Male 
##           0.06366296           0.06183186 
## 
## 
## $Bedside_positive
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 1.2298, df = 1038, p-value = 0.219
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007855475  0.034235379
## sample estimates:
## mean in group Female   mean in group Male 
##            0.1496166            0.1364267 
## 
## 
## $Officepositive
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = -0.24602, df = 1038, p-value = 0.8057
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01455012  0.01130812
## sample estimates:
## mean in group Female   mean in group Male 
##           0.05372793           0.05534893 
## 
## 
## $Costpositive
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 2.0143, df = 1038, p-value = 0.04423
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.0002377392 0.0181508551
## sample estimates:
## mean in group Female   mean in group Male 
##           0.04051680           0.03132251 
## 
## 
## $Communication_negative
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = -0.031827, df = 1038, p-value = 0.9746
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.01125547  0.01089617
## sample estimates:
## mean in group Female   mean in group Male 
##           0.03989703           0.04007668 
## 
## 
## $Expertisenegative
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = -1.2344, df = 1038, p-value = 0.2173
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.023944766  0.005451714
## sample estimates:
## mean in group Female   mean in group Male 
##           0.05026990           0.05951643 
## 
## 
## $Time_negative
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = -1.1361, df = 1038, p-value = 0.2562
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.019025744  0.005072864
## sample estimates:
## mean in group Female   mean in group Male 
##           0.04348706           0.05046350 
## 
## 
## $Bedside_negative
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.99352, df = 1038, p-value = 0.3207
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.005682433  0.017338003
## sample estimates:
## mean in group Female   mean in group Male 
##           0.03270550           0.02687772 
## 
## 
## $Office_negative
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.23284, df = 1038, p-value = 0.8159
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.008061275  0.010231912
## sample estimates:
## mean in group Female   mean in group Male 
##           0.02986720           0.02878188 
## 
## 
## $Cost_negative
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.47828, df = 1038, p-value = 0.6325
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.004988305  0.008203773
## sample estimates:
## mean in group Female   mean in group Male 
##           0.01273811           0.01113038 
## 
## 
## $Average.words.per.review
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = -0.17405, df = 1038, p-value = 0.8619
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -6.366033  5.328706
## sample estimates:
## mean in group Female   mean in group Male 
##             50.73540             51.25407 
## 
## 
## $Rating
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = -0.94877, df = 1038, p-value = 0.343
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.18931953  0.06591261
## sample estimates:
## mean in group Female   mean in group Male 
##             4.104252             4.165955 
## 
## 
## $Positive_Proportion
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = -0.27771, df = 1038, p-value = 0.7813
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.04720122  0.03549734
## sample estimates:
## mean in group Female   mean in group Male 
##            0.7716777            0.7775297 
## 
## 
## $Negative_Proportion
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.64885, df = 1038, p-value = 0.5166
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.02613506  0.05195780
## sample estimates:
## mean in group Female   mean in group Male 
##            0.2006391            0.1877277 
## 
## 
## $Average.pos_score
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.012898, df = 1038, p-value = 0.9897
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03612631  0.03660439
## sample estimates:
## mean in group Female   mean in group Male 
##             0.691870             0.691631 
## 
## 
## $Average.neg_score
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.32777, df = 1038, p-value = 0.7432
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.03329650  0.04665067
## sample estimates:
## mean in group Female   mean in group Male 
##           -0.2189445           -0.2256216 
## 
## 
## $Overall_score
## 
##  Two Sample t-test
## 
## data:  x by dcombine$Gender
## t = 0.21793, df = 1038, p-value = 0.8275
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.05535781  0.06919006
## sample estimates:
## mean in group Female   mean in group Male 
##            0.4729255            0.4660093

# Checking the Significance Level of the Gender 
years.aov1 <- aov(dcombine$Rating~dcombine$Gender, data = dcombine)
summary(years.aov1)

##                   Df Sum Sq Mean Sq F value Pr(>F)
## dcombine$Gender    1      1   0.987     0.9  0.343
## Residuals       1038   1138   1.097

TukeyHSD(years.aov1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = dcombine$Rating ~ dcombine$Gender, data = dcombine)
## 
## $`dcombine$Gender`
##                   diff         lwr       upr     p adj
## Male-Female 0.06170346 -0.06591261 0.1893195 0.3429607

# Thus, there is not much significance 

# Checking the Significance Level of the Years of Experience  
years.aov1 <- aov(dcombine$Rating~dcombine$Years.of.Experience, data = dcombine)
summary(years.aov1)

##                                Df Sum Sq Mean Sq F value Pr(>F)
## dcombine$Years.of.Experience    3    4.4   1.458   1.331  0.263
## Residuals                    1036 1134.8   1.095

TukeyHSD(years.aov1)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = dcombine$Rating ~ dcombine$Years.of.Experience, data = dcombine)
## 
## $`dcombine$Years.of.Experience`
##                                             diff         lwr       upr
## 11 to 20 years-Less than 10 years     0.15253168 -0.19191755 0.4969809
## 21 to 30 years -Less than 10 years    0.20791438 -0.11719844 0.5330272
## 31 years and above-Less than 10 years 0.24300948 -0.08778982 0.5738088
## 21 to 30 years -11 to 20 years        0.05538271 -0.16727679 0.2780422
## 31 years and above-11 to 20 years     0.09047781 -0.14040547 0.3213611
## 31 years and above-21 to 30 years     0.03509510 -0.16580328 0.2359935
##                                           p adj
## 11 to 20 years-Less than 10 years     0.6651255
## 21 to 30 years -Less than 10 years    0.3534725
## 31 years and above-Less than 10 years 0.2327867
## 21 to 30 years -11 to 20 years        0.9190063
## 31 years and above-11 to 20 years     0.7445574
## 31 years and above-21 to 30 years     0.9697224

# Thus, there is not much significance

Plotting for Combined results

dcombine %>% #plot rating over Gender 
  ggplot(aes(x=Rating,fill=Gender))+
  geom_density(alpha=.4,position="identity")+
  labs(title = "Density of Rating by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

dcombine %>% #plot Positive_Proportion over Gender 
  ggplot(aes(x=Positive_Proportion,fill=dcombine$Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Positive Proportion by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="Positive_Proportion",y = "Density of Positive Proportion")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

## Warning: Use of `dcombine$Gender` is discouraged. Use `Gender` instead.

dcombine %>% #plot Negative_Proportion over Gender 
  ggplot(aes(x=Negative_Proportion,fill=Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Negative_Proportion by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="Negative_Proportion",y = "Density of Negative_Proportion")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

dcombine %>% #plot `Average neg_score` over Gender 
  ggplot(aes(x=Average.neg_score,fill=Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of `Average neg_score` by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="`Average neg_score`",y = "Density of `Average neg_score`")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

dcombine %>% #plot `Average pos_score` over Gender 
  ggplot(aes(x=Average.pos_score,fill=Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of `Average pos_score` by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="`Average pos_score`",y = "Density of `Average pos_score`")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

dcombine %>% #plot Overall_score over Gender 
  ggplot(aes(x=Overall_score,fill=Gender))+
  geom_density(alpha=.5,position="identity")+
  labs(title = "Density of Overall_score by Gender on Both - Healthgrades and RateMDs\n(blue=Female, red=Male)",x="Overall_score",y = "Density of Overall_score")+#add title and axis labels
  theme(legend.position="none")#hide the side signal of fill parameter

###### Checking  ######

boxplot(dcombine$Rating~dcombine$Years.of.Experience, # Specify the variables to graph, quantitative variable first
        col=c("orange","thistle","lightgreen","lightblue","gray","yellow"), # Specify the data set that contains the variables
        main = "Rating over Years of Experience on Healthgrades and RateMDs", #Create the chart title
        xlab = "Years of Experience",  #Create the x-axis label
        ylab = "Rating",
        las=1,  # Width of box as proportion of original
        whisklty = 1,  # Whisker line type; 1 = solid line
        staplelty = 0,  # Staple (line at end) type; 0 = none
        outpch = 16,  # Symbols for outliers; 16 = filled circle
        outcol = "slategray3", 
        notch=F,horizontal = F# Color for outliers
) #Create the y-axis label

#######Checking 

### No of years of Experience on both the Websites 

dcombine %>% #plot rating over Gender 
  ggplot(aes(x=Rating,fill=Gender))+
  geom_density(alpha=.4,position="identity")+
  labs(title = "Density of Rating over Gender spread across Years of Experience on HealthGrades and RateMDs\n(blue=Female, red=Male)",x="Rating",y = "Density of Rating")+#add title and axis labels
  theme(legend.position="none")+#hide the side signal of fill parameter
 facet_wrap(~dcombine$Years.of.Experience)

Linear Regression Modeling

library(corrplot)

## corrplot 0.84 loaded

## Healthgrades 
#str(ratemdsfinal)
#str(healthgrades)

hdff1=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,7)])
names(hdff1)

##  [1] "Gender"                   "Years.of.Experience"     
##  [3] "Communication_positive"   "Expertisepositive"       
##  [5] "Timepositive"             "Bedside_positive"        
##  [7] "Officepositive"           "Costpositive"            
##  [9] "Communication_negative"   "Expertisenegative"       
## [11] "Time_negative"            "Bedside_negative"        
## [13] "Office_negative"          "Cost_negative"           
## [15] "Average.words.per.review" "Rating"

hdff2=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,14)])
names(hdff2)

##  [1] "Gender"                   "Years.of.Experience"     
##  [3] "Communication_positive"   "Expertisepositive"       
##  [5] "Timepositive"             "Bedside_positive"        
##  [7] "Officepositive"           "Costpositive"            
##  [9] "Communication_negative"   "Expertisenegative"       
## [11] "Time_negative"            "Bedside_negative"        
## [13] "Office_negative"          "Cost_negative"           
## [15] "Average.words.per.review" "Positive_Proportion"

hdff3=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,17)])
names(hdff3)

##  [1] "Gender"                   "Years.of.Experience"     
##  [3] "Communication_positive"   "Expertisepositive"       
##  [5] "Timepositive"             "Bedside_positive"        
##  [7] "Officepositive"           "Costpositive"            
##  [9] "Communication_negative"   "Expertisenegative"       
## [11] "Time_negative"            "Bedside_negative"        
## [13] "Office_negative"          "Cost_negative"           
## [15] "Average.words.per.review" "Negative_Proportion"

hdff4=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,16)])
names(hdff4)

##  [1] "Gender"                   "Years.of.Experience"     
##  [3] "Communication_positive"   "Expertisepositive"       
##  [5] "Timepositive"             "Bedside_positive"        
##  [7] "Officepositive"           "Costpositive"            
##  [9] "Communication_negative"   "Expertisenegative"       
## [11] "Time_negative"            "Bedside_negative"        
## [13] "Office_negative"          "Cost_negative"           
## [15] "Average.words.per.review" "Average.pos_score"

hdff5=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,19)])
names(hdff5)

##  [1] "Gender"                   "Years.of.Experience"     
##  [3] "Communication_positive"   "Expertisepositive"       
##  [5] "Timepositive"             "Bedside_positive"        
##  [7] "Officepositive"           "Costpositive"            
##  [9] "Communication_negative"   "Expertisenegative"       
## [11] "Time_negative"            "Bedside_negative"        
## [13] "Office_negative"          "Cost_negative"           
## [15] "Average.words.per.review" "Average.neg_score"

hdff6=data.frame(healthgrades[,c(3,5,20,21,22,23,24,25,26,27,28,29,30,31,32,33)])
names(hdff6)

##  [1] "Gender"                   "Years.of.Experience"     
##  [3] "Communication_positive"   "Expertisepositive"       
##  [5] "Timepositive"             "Bedside_positive"        
##  [7] "Officepositive"           "Costpositive"            
##  [9] "Communication_negative"   "Expertisenegative"       
## [11] "Time_negative"            "Bedside_negative"        
## [13] "Office_negative"          "Cost_negative"           
## [15] "Average.words.per.review" "Overall_score"

#Regression

lh1=lm(Rating~.,data = hdff1)
summary(lh1)

## 
## Call:
## lm(formula = Rating ~ ., data = hdff1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.99217 -0.22596  0.02488  0.26852  0.57756 
## 
## Coefficients:
##                                        Estimate Std. Error t value
## (Intercept)                            4.756741   0.106861  44.513
## GenderMale                            -0.031296   0.044540  -0.703
## Years.of.Experience11 to 20 years     -0.030114   0.084181  -0.358
## Years.of.Experience21 to 30 years     -0.063216   0.082504  -0.766
## Years.of.Experience31 years and above -0.014015   0.083938  -0.167
## Communication_positive                -0.016877   0.185336  -0.091
## Expertisepositive                     -0.059499   0.165435  -0.360
## Timepositive                           0.102207   0.203954   0.501
## Bedside_positive                       0.163074   0.144485   1.129
## Officepositive                        -0.699218   0.290933  -2.403
## Costpositive                           0.437641   0.386192   1.133
## Communication_negative                -1.016354   0.361890  -2.808
## Expertisenegative                      0.215638   0.339537   0.635
## Time_negative                         -0.447024   0.329563  -1.356
## Bedside_negative                       0.520510   0.492825   1.056
## Office_negative                       -1.143112   0.531891  -2.149
## Cost_negative                         -0.498560   0.843224  -0.591
## Average.words.per.review              -0.001362   0.001112  -1.226
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                         0.48298    
## Years.of.Experience11 to 20 years                  0.72088    
## Years.of.Experience21 to 30 years                  0.44434    
## Years.of.Experience31 years and above              0.86754    
## Communication_positive                             0.92752    
## Expertisepositive                                  0.71944    
## Timepositive                                       0.61676    
## Bedside_positive                                   0.26022    
## Officepositive                                     0.01704 *  
## Costpositive                                       0.25831    
## Communication_negative                             0.00541 ** 
## Expertisenegative                                  0.52600    
## Time_negative                                      0.17630    
## Bedside_negative                                   0.29200    
## Office_negative                                    0.03267 *  
## Cost_negative                                      0.55493    
## Average.words.per.review                           0.22156    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3301 on 229 degrees of freedom
## Multiple R-squared:  0.1624, Adjusted R-squared:  0.1002 
## F-statistic: 2.612 on 17 and 229 DF,  p-value: 0.0006955

lh2=lm(Positive_Proportion~.,data = hdff2)
summary(lh2)

## 
## Call:
## lm(formula = Positive_Proportion ~ ., data = hdff2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.62532 -0.02850  0.01704  0.05916  0.41045 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.9436522  0.0462024  20.424
## GenderMale                            -0.0108854  0.0192572  -0.565
## Years.of.Experience11 to 20 years     -0.0330841  0.0363966  -0.909
## Years.of.Experience21 to 30 years      0.0018360  0.0356717   0.051
## Years.of.Experience31 years and above  0.0071221  0.0362916   0.196
## Communication_positive                 0.0192247  0.0801322   0.240
## Expertisepositive                      0.0960011  0.0715279   1.342
## Timepositive                           0.0234726  0.0881817   0.266
## Bedside_positive                       0.1454767  0.0624697   2.329
## Officepositive                         0.0411917  0.1257880   0.327
## Costpositive                           0.2346136  0.1669745   1.405
## Communication_negative                -0.5411209  0.1564674  -3.458
## Expertisenegative                     -0.4561526  0.1468029  -3.107
## Time_negative                         -1.0377078  0.1424904  -7.283
## Bedside_negative                       0.4154977  0.2130787   1.950
## Office_negative                       -0.1205638  0.2299691  -0.524
## Cost_negative                         -1.6477666  0.3645774  -4.520
## Average.words.per.review              -0.0001089  0.0004806  -0.227
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                        0.572447    
## Years.of.Experience11 to 20 years                 0.364312    
## Years.of.Experience21 to 30 years                 0.958996    
## Years.of.Experience31 years and above             0.844592    
## Communication_positive                            0.810613    
## Expertisepositive                                 0.180877    
## Timepositive                                      0.790337    
## Bedside_positive                                  0.020743 *  
## Officepositive                                    0.743612    
## Costpositive                                      0.161351    
## Communication_negative                            0.000648 ***
## Expertisenegative                                 0.002127 ** 
## Time_negative                             0.00000000000522 ***
## Bedside_negative                                  0.052400 .  
## Office_negative                                   0.600604    
## Cost_negative                             0.00000993081246 ***
## Average.words.per.review                          0.820935    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1427 on 229 degrees of freedom
## Multiple R-squared:  0.5089, Adjusted R-squared:  0.4724 
## F-statistic: 13.96 on 17 and 229 DF,  p-value: < 0.00000000000000022

lh3=lm(Negative_Proportion~.,data = hdff3)
summary(lh3)

## 
## Call:
## lm(formula = Negative_Proportion ~ ., data = hdff3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41045 -0.05916 -0.01704  0.02850  0.62532 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.0563478  0.0462024   1.220
## GenderMale                             0.0108854  0.0192572   0.565
## Years.of.Experience11 to 20 years      0.0330841  0.0363966   0.909
## Years.of.Experience21 to 30 years     -0.0018360  0.0356717  -0.051
## Years.of.Experience31 years and above -0.0071221  0.0362916  -0.196
## Communication_positive                -0.0192247  0.0801322  -0.240
## Expertisepositive                     -0.0960011  0.0715279  -1.342
## Timepositive                          -0.0234726  0.0881817  -0.266
## Bedside_positive                      -0.1454767  0.0624697  -2.329
## Officepositive                        -0.0411917  0.1257880  -0.327
## Costpositive                          -0.2346136  0.1669745  -1.405
## Communication_negative                 0.5411209  0.1564674   3.458
## Expertisenegative                      0.4561526  0.1468029   3.107
## Time_negative                          1.0377078  0.1424904   7.283
## Bedside_negative                      -0.4154977  0.2130787  -1.950
## Office_negative                        0.1205638  0.2299691   0.524
## Cost_negative                          1.6477666  0.3645774   4.520
## Average.words.per.review               0.0001089  0.0004806   0.227
##                                               Pr(>|t|)    
## (Intercept)                                   0.223877    
## GenderMale                                    0.572447    
## Years.of.Experience11 to 20 years             0.364312    
## Years.of.Experience21 to 30 years             0.958996    
## Years.of.Experience31 years and above         0.844592    
## Communication_positive                        0.810613    
## Expertisepositive                             0.180877    
## Timepositive                                  0.790337    
## Bedside_positive                              0.020743 *  
## Officepositive                                0.743612    
## Costpositive                                  0.161351    
## Communication_negative                        0.000648 ***
## Expertisenegative                             0.002127 ** 
## Time_negative                         0.00000000000522 ***
## Bedside_negative                              0.052400 .  
## Office_negative                               0.600604    
## Cost_negative                         0.00000993081246 ***
## Average.words.per.review                      0.820935    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1427 on 229 degrees of freedom
## Multiple R-squared:  0.5089, Adjusted R-squared:  0.4724 
## F-statistic: 13.96 on 17 and 229 DF,  p-value: < 0.00000000000000022

lh4=lm(Average.pos_score~.,data = hdff4)
summary(lh4)

## 
## Call:
## lm(formula = Average.pos_score ~ ., data = hdff4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.76388 -0.04567  0.02221  0.08125  0.29915 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.7740361  0.0486756  15.902
## GenderMale                             0.0003497  0.0202880   0.017
## Years.of.Experience11 to 20 years     -0.0421178  0.0383449  -1.098
## Years.of.Experience21 to 30 years     -0.0010852  0.0375812  -0.029
## Years.of.Experience31 years and above -0.0235270  0.0382343  -0.615
## Communication_positive                -0.1782681  0.0844216  -2.112
## Expertisepositive                      0.1135756  0.0753566   1.507
## Timepositive                           0.0284462  0.0929019   0.306
## Bedside_positive                      -0.0243370  0.0658136  -0.370
## Officepositive                         0.1036617  0.1325212   0.782
## Costpositive                           0.1865724  0.1759124   1.061
## Communication_negative                -0.5388122  0.1648428  -3.269
## Expertisenegative                     -0.5458443  0.1546610  -3.529
## Time_negative                         -0.1629900  0.1501176  -1.086
## Bedside_negative                      -0.0548634  0.2244844  -0.244
## Office_negative                       -0.0048442  0.2422789  -0.020
## Cost_negative                         -1.3876293  0.3840926  -3.613
## Average.words.per.review               0.0017273  0.0005063   3.412
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                        0.986263    
## Years.of.Experience11 to 20 years                 0.273186    
## Years.of.Experience21 to 30 years                 0.976989    
## Years.of.Experience31 years and above             0.538943    
## Communication_positive                            0.035801 *  
## Expertisepositive                                 0.133144    
## Timepositive                                      0.759733    
## Bedside_positive                                  0.711883    
## Officepositive                                    0.434889    
## Costpositive                                      0.289990    
## Communication_negative                            0.001247 ** 
## Expertisenegative                                 0.000504 ***
## Time_negative                                     0.278732    
## Bedside_negative                                  0.807142    
## Office_negative                                   0.984065    
## Cost_negative                                     0.000372 ***
## Average.words.per.review                          0.000763 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1504 on 229 degrees of freedom
## Multiple R-squared:  0.3675, Adjusted R-squared:  0.3205 
## F-statistic: 7.826 on 17 and 229 DF,  p-value: 0.000000000000002658

lh5=lm(Average.neg_score~.,data = hdff5)
summary(lh5)

## 
## Call:
## lm(formula = Average.neg_score ~ ., data = hdff5)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.85154 -0.01423  0.05662  0.10557  0.53916 
## 
## Coefficients:
##                                         Estimate Std. Error t value    Pr(>|t|)
## (Intercept)                           -0.0559592  0.0778001  -0.719     0.47271
## GenderMale                            -0.0453889  0.0324271  -1.400     0.16295
## Years.of.Experience11 to 20 years     -0.0017304  0.0612881  -0.028     0.97750
## Years.of.Experience21 to 30 years      0.0108923  0.0600674   0.181     0.85627
## Years.of.Experience31 years and above  0.0415669  0.0611113   0.680     0.49708
## Communication_positive                 0.0702209  0.1349343   0.520     0.60328
## Expertisepositive                      0.0269866  0.1204455   0.224     0.82291
## Timepositive                          -0.0317881  0.1484887  -0.214     0.83068
## Bedside_positive                       0.0930283  0.1051924   0.884     0.37743
## Officepositive                         0.0314480  0.2118138   0.148     0.88210
## Costpositive                           0.2896671  0.2811677   1.030     0.30399
## Communication_negative                -0.7033954  0.2634748  -2.670     0.00814
## Expertisenegative                      0.0319602  0.2472007   0.129     0.89724
## Time_negative                         -1.2347404  0.2399389  -5.146 0.000000572
## Bedside_negative                       0.6061279  0.3588023   1.689     0.09252
## Office_negative                       -1.2752564  0.3872439  -3.293     0.00115
## Cost_negative                         -1.5581905  0.6139103  -2.538     0.01181
## Average.words.per.review              -0.0008341  0.0008093  -1.031     0.30379
##                                          
## (Intercept)                              
## GenderMale                               
## Years.of.Experience11 to 20 years        
## Years.of.Experience21 to 30 years        
## Years.of.Experience31 years and above    
## Communication_positive                   
## Expertisepositive                        
## Timepositive                             
## Bedside_positive                         
## Officepositive                           
## Costpositive                             
## Communication_negative                ** 
## Expertisenegative                        
## Time_negative                         ***
## Bedside_negative                      .  
## Office_negative                       ** 
## Cost_negative                         *  
## Average.words.per.review                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2403 on 229 degrees of freedom
## Multiple R-squared:  0.3183, Adjusted R-squared:  0.2677 
## F-statistic: 6.289 on 17 and 229 DF,  p-value: 0.000000000004951

lh6=lm(Overall_score~.,data = hdff6)
summary(lh6)

## 
## Call:
## lm(formula = Overall_score ~ ., data = hdff6)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1626 -0.1082  0.0530  0.1568  0.6340 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.7180769  0.0875710   8.200
## GenderMale                            -0.0450392  0.0364996  -1.234
## Years.of.Experience11 to 20 years     -0.0438482  0.0689853  -0.636
## Years.of.Experience21 to 30 years      0.0098071  0.0676113   0.145
## Years.of.Experience31 years and above  0.0180400  0.0687863   0.262
## Communication_positive                -0.1080472  0.1518808  -0.711
## Expertisepositive                      0.1405622  0.1355723   1.037
## Timepositive                          -0.0033418  0.1671375  -0.020
## Bedside_positive                       0.0686913  0.1184036   0.580
## Officepositive                         0.1351097  0.2384156   0.567
## Costpositive                           0.4762394  0.3164796   1.505
## Communication_negative                -1.2422075  0.2965646  -4.189
## Expertisenegative                     -0.5138841  0.2782467  -1.847
## Time_negative                         -1.3977304  0.2700730  -5.175
## Bedside_negative                       0.5512645  0.4038644   1.365
## Office_negative                       -1.2801005  0.4358780  -2.937
## Cost_negative                         -2.9458199  0.6910115  -4.263
## Average.words.per.review               0.0008933  0.0009109   0.981
##                                                 Pr(>|t|)    
## (Intercept)                           0.0000000000000172 ***
## GenderMale                                       0.21848    
## Years.of.Experience11 to 20 years                0.52566    
## Years.of.Experience21 to 30 years                0.88480    
## Years.of.Experience31 years and above            0.79336    
## Communication_positive                           0.47756    
## Expertisepositive                                0.30092    
## Timepositive                                     0.98407    
## Bedside_positive                                 0.56239    
## Officepositive                                   0.57147    
## Costpositive                                     0.13375    
## Communication_negative                0.0000400594001271 ***
## Expertisenegative                                0.06606 .  
## Time_negative                         0.0000004969127672 ***
## Bedside_negative                                 0.17360    
## Office_negative                                  0.00365 ** 
## Cost_negative                         0.0000294868836949 ***
## Average.words.per.review                         0.32780    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2705 on 229 degrees of freedom
## Multiple R-squared:  0.4613, Adjusted R-squared:  0.4213 
## F-statistic: 11.54 on 17 and 229 DF,  p-value: < 0.00000000000000022

## Corelation Plot  

str(hdff1)

## 'data.frame':    247 obs. of  16 variables:
##  $ Gender                  : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 1 ...
##  $ Years.of.Experience     : Factor w/ 4 levels "Less than 10 years",..: 2 3 3 2 4 3 4 4 2 4 ...
##  $ Communication_positive  : num  0.0676 0.0286 0.1 0.1538 0.3333 ...
##  $ Expertisepositive       : num  0.189 0.114 0.1 0.154 0.167 ...
##  $ Timepositive            : num  0.1216 0.0714 0 0.1154 0 ...
##  $ Bedside_positive        : num  0.108 0.186 0.35 0.115 0.333 ...
##  $ Officepositive          : num  0.027 0.0429 0.15 0.0769 0 ...
##  $ Costpositive            : num  0.0541 0.0286 0 0.0385 0 ...
##  $ Communication_negative  : num  0 0.0143 0 0 0 ...
##  $ Expertisenegative       : num  0.027 0.0143 0 0 0 ...
##  $ Time_negative           : num  0 0.0429 0 0 0 ...
##  $ Bedside_negative        : num  0 0.0429 0 0 0 ...
##  $ Office_negative         : num  0 0.0143 0 0 0 ...
##  $ Cost_negative           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Average.words.per.review: num  29 72 44 40 42 61 29 41 17 34 ...
##  $ Rating                  : num  4.9 4.7 5 4.8 5 5 4.8 4.6 5 5 ...

corrplot(cor(hdff1[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff2[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff3[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff4[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff5[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(hdff6[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

# RateMDs

rdff1=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,6)])
rdff2=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,11)])
rdff3=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,14)])
rdff4=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,12)])
rdff5=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,16)])
rdff6=data.frame(ratemdsfinal[,c(3,4,17,18,19,20,21,22,23,24,25,26,27,28,32,33)])


#Correlation plots
corrplot(cor(rdff1[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff2[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff3[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff4[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff5[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(rdff6[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

#Regression
lr1=lm(Rating~.,data = rdff1)
summary(lr1)

## 
## Call:
## lm(formula = Rating ~ ., data = rdff1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4987 -0.4319  0.1945  0.5221  4.0045 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            4.2584145  0.1313193  32.428
## GenderMale                             0.1209880  0.0659969   1.833
## Years.of.Experience11 to 20 years      0.1474356  0.1381748   1.067
## Years.of.Experience21 to 30 years      0.1810661  0.1303980   1.389
## Years.of.Experience31 years and above  0.1751600  0.1337864   1.309
## Communication_positive                 0.0579462  0.3074916   0.188
## Expertisepositive                      0.3666430  0.1991385   1.841
## Timepositive                          -0.4788978  0.3337779  -1.435
## Bedside_positive                       0.3259224  0.1892757   1.722
## Officepositive                         0.4865397  0.3001795   1.621
## Costpositive                           1.4084574  0.4267645   3.300
## Communication_negative                -1.7267337  0.3863956  -4.469
## Expertisenegative                     -1.7076915  0.2783143  -6.136
## Time_negative                         -1.6155999  0.3399253  -4.753
## Bedside_negative                      -1.2639660  0.3176705  -3.979
## Office_negative                       -2.4548218  0.4412688  -5.563
## Cost_negative                         -2.6605126  0.5515298  -4.824
## Average.words.per.review              -0.0039921  0.0006375  -6.262
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                         0.06715 .  
## Years.of.Experience11 to 20 years                  0.28629    
## Years.of.Experience21 to 30 years                  0.16536    
## Years.of.Experience31 years and above              0.19084    
## Communication_positive                             0.85057    
## Expertisepositive                                  0.06598 .  
## Timepositive                                       0.15175    
## Bedside_positive                                   0.08548 .  
## Officepositive                                     0.10546    
## Costpositive                                       0.00101 ** 
## Communication_negative                      0.000009037762 ***
## Expertisenegative                           0.000000001350 ***
## Time_negative                               0.000002391209 ***
## Bedside_negative                            0.000075742470 ***
## Office_negative                             0.000000036508 ***
## Cost_negative                               0.000001695418 ***
## Average.words.per.review                    0.000000000629 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9128 on 775 degrees of freedom
## Multiple R-squared:  0.3787, Adjusted R-squared:  0.3651 
## F-statistic: 27.79 on 17 and 775 DF,  p-value: < 0.00000000000000022

lr2=lm(Positive_Proportion~.,data = rdff2)
summary(lr2)

## 
## Call:
## lm(formula = Positive_Proportion ~ ., data = rdff2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.96778 -0.09982  0.06330  0.16630  0.84169 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.7231908  0.0399668  18.095
## GenderMale                             0.0247285  0.0200860   1.231
## Years.of.Experience11 to 20 years      0.1146939  0.0420532   2.727
## Years.of.Experience21 to 30 years      0.1170368  0.0396864   2.949
## Years.of.Experience31 years and above  0.0817680  0.0407176   2.008
## Communication_positive                -0.1391590  0.0935845  -1.487
## Expertisepositive                      0.1948552  0.0606074   3.215
## Timepositive                          -0.0362635  0.1015847  -0.357
## Bedside_positive                       0.2661089  0.0576057   4.619
## Officepositive                         0.2353906  0.0913590   2.577
## Costpositive                           0.5013061  0.1298850   3.860
## Communication_negative                -0.3858624  0.1175988  -3.281
## Expertisenegative                     -0.7255680  0.0847044  -8.566
## Time_negative                         -0.6723595  0.1034556  -6.499
## Bedside_negative                      -0.4786142  0.0966824  -4.950
## Office_negative                       -0.4120777  0.1342993  -3.068
## Cost_negative                         -1.0191855  0.1678570  -6.072
## Average.words.per.review              -0.0008097  0.0001940  -4.173
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                        0.218647    
## Years.of.Experience11 to 20 years                 0.006529 ** 
## Years.of.Experience21 to 30 years                 0.003283 ** 
## Years.of.Experience31 years and above             0.044972 *  
## Communication_positive                            0.137425    
## Expertisepositive                                 0.001358 ** 
## Timepositive                                      0.721205    
## Bedside_positive                            0.000004503490 ***
## Officepositive                                    0.010163 *  
## Costpositive                                      0.000123 ***
## Communication_negative                            0.001080 ** 
## Expertisenegative                     < 0.0000000000000002 ***
## Time_negative                               0.000000000145 ***
## Bedside_negative                            0.000000909286 ***
## Office_negative                                   0.002227 ** 
## Cost_negative                               0.000000001980 ***
## Average.words.per.review                    0.000033429711 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2778 on 775 degrees of freedom
## Multiple R-squared:  0.4263, Adjusted R-squared:  0.4137 
## F-statistic: 33.87 on 17 and 775 DF,  p-value: < 0.00000000000000022

lr3=lm(Negative_Proportion~.,data = rdff3)
summary(lr3)

## 
## Call:
## lm(formula = Negative_Proportion ~ ., data = rdff3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.00751 -0.11376 -0.04056  0.07096  0.99419 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.1898280  0.0350736   5.412
## GenderMale                            -0.0280136  0.0176269  -1.589
## Years.of.Experience11 to 20 years     -0.1256970  0.0369046  -3.406
## Years.of.Experience21 to 30 years     -0.1311036  0.0348276  -3.764
## Years.of.Experience31 years and above -0.1170448  0.0357326  -3.276
## Communication_positive                -0.0405123  0.0821269  -0.493
## Expertisepositive                     -0.0928168  0.0531872  -1.745
## Timepositive                           0.1089283  0.0891476   1.222
## Bedside_positive                      -0.1422816  0.0505530  -2.815
## Officepositive                        -0.0764590  0.0801739  -0.954
## Costpositive                          -0.3962814  0.1139831  -3.477
## Communication_negative                 0.5028461  0.1032011   4.872
## Expertisenegative                      0.7743013  0.0743340  10.417
## Time_negative                          0.6877894  0.0907895   7.576
## Bedside_negative                       0.4440841  0.0848455   5.234
## Office_negative                        0.4822172  0.1178570   4.092
## Cost_negative                          1.1186023  0.1473062   7.594
## Average.words.per.review               0.0011622  0.0001703   6.826
##                                                   Pr(>|t|)    
## (Intercept)                             0.0000000830252621 ***
## GenderMale                                        0.112411    
## Years.of.Experience11 to 20 years                 0.000693 ***
## Years.of.Experience21 to 30 years                 0.000180 ***
## Years.of.Experience31 years and above             0.001101 ** 
## Communication_positive                            0.621948    
## Expertisepositive                                 0.081365 .  
## Timepositive                                      0.222122    
## Bedside_positive                                  0.005009 ** 
## Officepositive                                    0.340551    
## Costpositive                                      0.000536 ***
## Communication_negative                  0.0000013366680680 ***
## Expertisenegative                     < 0.0000000000000002 ***
## Time_negative                           0.0000000000001019 ***
## Bedside_negative                        0.0000002137397275 ***
## Office_negative                         0.0000473381478164 ***
## Cost_negative                           0.0000000000000895 ***
## Average.words.per.review                0.0000000000176135 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2438 on 775 degrees of freedom
## Multiple R-squared:  0.5085, Adjusted R-squared:  0.4977 
## F-statistic: 47.16 on 17 and 775 DF,  p-value: < 0.00000000000000022

lr4=lm(Average.pos_score~.,data = rdff4)
summary(lr4)

## 
## Call:
## lm(formula = Average.pos_score ~ ., data = rdff4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.79655 -0.13758  0.06478  0.17862  0.51776 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.5794739  0.0381635  15.184
## GenderMale                             0.0078624  0.0191797   0.410
## Years.of.Experience11 to 20 years      0.1249562  0.0401558   3.112
## Years.of.Experience21 to 30 years      0.1444395  0.0378958   3.811
## Years.of.Experience31 years and above  0.1420179  0.0388805   3.653
## Communication_positive                -0.1937978  0.0893620  -2.169
## Expertisepositive                      0.0760436  0.0578729   1.314
## Timepositive                           0.0382756  0.0970012   0.395
## Bedside_positive                       0.1923630  0.0550066   3.497
## Officepositive                         0.2520744  0.0872370   2.890
## Costpositive                           0.5517505  0.1240246   4.449
## Communication_negative                -0.3427398  0.1122928  -3.052
## Expertisenegative                     -0.5175899  0.0808826  -6.399
## Time_negative                         -0.5548255  0.0987878  -5.616
## Bedside_negative                      -0.4153887  0.0923201  -4.499
## Office_negative                       -0.2496353  0.1282398  -1.947
## Cost_negative                         -0.8607826  0.1602834  -5.370
## Average.words.per.review              -0.0001177  0.0001853  -0.635
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                        0.681967    
## Years.of.Experience11 to 20 years                 0.001928 ** 
## Years.of.Experience21 to 30 years                 0.000149 ***
## Years.of.Experience31 years and above             0.000277 ***
## Communication_positive                            0.030410 *  
## Expertisepositive                                 0.189243    
## Timepositive                                      0.693255    
## Bedside_positive                                  0.000497 ***
## Officepositive                                    0.003966 ** 
## Costpositive                                 0.00000990309 ***
## Communication_negative                            0.002349 ** 
## Expertisenegative                            0.00000000027 ***
## Time_negative                                0.00000002719 ***
## Bedside_negative                             0.00000785777 ***
## Office_negative                                   0.051940 .  
## Cost_negative                                0.00000010395 ***
## Average.words.per.review                          0.525436    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2653 on 775 degrees of freedom
## Multiple R-squared:  0.3251, Adjusted R-squared:  0.3103 
## F-statistic: 21.96 on 17 and 775 DF,  p-value: < 0.00000000000000022

lr5=lm(Average.neg_score~.,data = rdff5)
summary(lr5)

## 
## Call:
## lm(formula = Average.neg_score ~ ., data = rdff5)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78415 -0.17673  0.07616  0.14458  0.95304 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                           -0.1109611  0.0385817  -2.876
## GenderMale                             0.0145985  0.0193899   0.753
## Years.of.Experience11 to 20 years      0.0203760  0.0405959   0.502
## Years.of.Experience21 to 30 years     -0.0036386  0.0383111  -0.095
## Years.of.Experience31 years and above -0.0154383  0.0393066  -0.393
## Communication_positive                 0.1330288  0.0903414   1.473
## Expertisepositive                      0.1199794  0.0585071   2.051
## Timepositive                          -0.0742197  0.0980643  -0.757
## Bedside_positive                       0.1445197  0.0556094   2.599
## Officepositive                         0.0778611  0.0881930   0.883
## Costpositive                           0.2327128  0.1253839   1.856
## Communication_negative                -0.6848186  0.1135234  -6.032
## Expertisenegative                     -0.5429282  0.0817690  -6.640
## Time_negative                         -0.5489313  0.0998704  -5.496
## Bedside_negative                      -0.3096607  0.0933319  -3.318
## Office_negative                       -0.5648870  0.1296452  -4.357
## Cost_negative                         -0.5082322  0.1620400  -3.136
## Average.words.per.review              -0.0011992  0.0001873  -6.403
##                                              Pr(>|t|)    
## (Intercept)                                  0.004138 ** 
## GenderMale                                   0.451744    
## Years.of.Experience11 to 20 years            0.615864    
## Years.of.Experience21 to 30 years            0.924359    
## Years.of.Experience31 years and above        0.694599    
## Communication_positive                       0.141288    
## Expertisepositive                            0.040634 *  
## Timepositive                                 0.449371    
## Bedside_positive                             0.009532 ** 
## Officepositive                               0.377592    
## Costpositive                                 0.063833 .  
## Communication_negative                0.0000000025006 ***
## Expertisenegative                     0.0000000000591 ***
## Time_negative                         0.0000000526198 ***
## Bedside_negative                             0.000949 ***
## Office_negative                       0.0000149485286 ***
## Cost_negative                                0.001775 ** 
## Average.words.per.review              0.0000000002647 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2682 on 775 degrees of freedom
## Multiple R-squared:  0.3839, Adjusted R-squared:  0.3704 
## F-statistic: 28.41 on 17 and 775 DF,  p-value: < 0.00000000000000022

lr6=lm(Overall_score~.,data = hdff6)
summary(lh6)

## 
## Call:
## lm(formula = Overall_score ~ ., data = hdff6)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1626 -0.1082  0.0530  0.1568  0.6340 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.7180769  0.0875710   8.200
## GenderMale                            -0.0450392  0.0364996  -1.234
## Years.of.Experience11 to 20 years     -0.0438482  0.0689853  -0.636
## Years.of.Experience21 to 30 years      0.0098071  0.0676113   0.145
## Years.of.Experience31 years and above  0.0180400  0.0687863   0.262
## Communication_positive                -0.1080472  0.1518808  -0.711
## Expertisepositive                      0.1405622  0.1355723   1.037
## Timepositive                          -0.0033418  0.1671375  -0.020
## Bedside_positive                       0.0686913  0.1184036   0.580
## Officepositive                         0.1351097  0.2384156   0.567
## Costpositive                           0.4762394  0.3164796   1.505
## Communication_negative                -1.2422075  0.2965646  -4.189
## Expertisenegative                     -0.5138841  0.2782467  -1.847
## Time_negative                         -1.3977304  0.2700730  -5.175
## Bedside_negative                       0.5512645  0.4038644   1.365
## Office_negative                       -1.2801005  0.4358780  -2.937
## Cost_negative                         -2.9458199  0.6910115  -4.263
## Average.words.per.review               0.0008933  0.0009109   0.981
##                                                 Pr(>|t|)    
## (Intercept)                           0.0000000000000172 ***
## GenderMale                                       0.21848    
## Years.of.Experience11 to 20 years                0.52566    
## Years.of.Experience21 to 30 years                0.88480    
## Years.of.Experience31 years and above            0.79336    
## Communication_positive                           0.47756    
## Expertisepositive                                0.30092    
## Timepositive                                     0.98407    
## Bedside_positive                                 0.56239    
## Officepositive                                   0.57147    
## Costpositive                                     0.13375    
## Communication_negative                0.0000400594001271 ***
## Expertisenegative                                0.06606 .  
## Time_negative                         0.0000004969127672 ***
## Bedside_negative                                 0.17360    
## Office_negative                                  0.00365 ** 
## Cost_negative                         0.0000294868836949 ***
## Average.words.per.review                         0.32780    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2705 on 229 degrees of freedom
## Multiple R-squared:  0.4613, Adjusted R-squared:  0.4213 
## F-statistic: 11.54 on 17 and 229 DF,  p-value: < 0.00000000000000022

Combined Linear Modeling

comdff1=data.frame(dcombine[,-c(17,18,19,20,21)])
comdff2=data.frame(dcombine[,-c(16,18,19,20,21)])
comdff3=data.frame(dcombine[,-c(16,17,19,20,21)])
comdff4=data.frame(dcombine[,-c(16,17,18,20,21)])
comdff5=data.frame(dcombine[,-c(16,17,18,19,21)])
comdff6=data.frame(dcombine[,-c(16,17,18,19,20)])

str(comdff6)

## 'data.frame':    1040 obs. of  16 variables:
##  $ Gender                  : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 1 ...
##  $ Years.of.Experience     : Factor w/ 4 levels "Less than 10 years",..: 2 3 3 2 4 3 4 4 2 4 ...
##  $ Communication_positive  : num  0.0676 0.0286 0.1 0.1538 0.3333 ...
##  $ Expertisepositive       : num  0.189 0.114 0.1 0.154 0.167 ...
##  $ Timepositive            : num  0.1216 0.0714 0 0.1154 0 ...
##  $ Bedside_positive        : num  0.108 0.186 0.35 0.115 0.333 ...
##  $ Officepositive          : num  0.027 0.0429 0.15 0.0769 0 ...
##  $ Costpositive            : num  0.0541 0.0286 0 0.0385 0 ...
##  $ Communication_negative  : num  0 0.0143 0 0 0 ...
##  $ Expertisenegative       : num  0.027 0.0143 0 0 0 ...
##  $ Time_negative           : num  0 0.0429 0 0 0 ...
##  $ Bedside_negative        : num  0 0.0429 0 0 0 ...
##  $ Office_negative         : num  0 0.0143 0 0 0 ...
##  $ Cost_negative           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Average.words.per.review: num  29 72 44 40 42 61 29 41 17 34 ...
##  $ Overall_score           : num  0.824 0.792 0.916 0.897 0.936 ...

#Regression
lc1=lm(Rating~.,data = comdff1)
summary(lc1)

## 
## Call:
## lm(formula = Rating ~ ., data = comdff1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5548 -0.3300  0.1813  0.4663  3.4430 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            4.4194784  0.1069837  41.310
## GenderMale                             0.0804925  0.0535586   1.503
## Years.of.Experience11 to 20 years      0.0910572  0.1096967   0.830
## Years.of.Experience21 to 30 years      0.1089694  0.1039991   1.048
## Years.of.Experience31 years and above  0.1143336  0.1064307   1.074
## Communication_positive                 0.3987009  0.2374853   1.679
## Expertisepositive                      0.3171896  0.1668446   1.901
## Timepositive                          -0.0787030  0.2589872  -0.304
## Bedside_positive                       0.2579257  0.1573460   1.639
## Officepositive                         0.1341886  0.2565245   0.523
## Costpositive                           1.0833569  0.3646001   2.971
## Communication_negative                -1.2309834  0.3211376  -3.833
## Expertisenegative                     -1.5766878  0.2424676  -6.503
## Time_negative                         -1.4641938  0.2926774  -5.003
## Bedside_negative                      -1.2131718  0.2855508  -4.249
## Office_negative                       -2.6646053  0.3892143  -6.846
## Cost_negative                         -2.6459299  0.4945366  -5.350
## Average.words.per.review              -0.0042471  0.0005748  -7.388
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                        0.133177    
## Years.of.Experience11 to 20 years                 0.406687    
## Years.of.Experience21 to 30 years                 0.294982    
## Years.of.Experience31 years and above             0.282962    
## Communication_positive                            0.093488 .  
## Expertisepositive                                 0.057569 .  
## Timepositive                                      0.761275    
## Bedside_positive                                  0.101474    
## Officepositive                                    0.601016    
## Costpositive                                      0.003034 ** 
## Communication_negative                            0.000134 ***
## Expertisenegative                        0.000000000123221 ***
## Time_negative                            0.000000665002823 ***
## Bedside_negative                         0.000023481775074 ***
## Office_negative                          0.000000000013070 ***
## Cost_negative                            0.000000108358355 ***
## Average.words.per.review                 0.000000000000308 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8471 on 1022 degrees of freedom
## Multiple R-squared:  0.3562, Adjusted R-squared:  0.3455 
## F-statistic: 33.26 on 17 and 1022 DF,  p-value: < 0.00000000000000022

lc2=lm(Positive_Proportion~.,data = comdff2)
summary(lc2)

## 
## Call:
## lm(formula = Positive_Proportion ~ ., data = comdff2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.98048 -0.07733  0.05426  0.14137  0.82984 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.7810386  0.0325769  23.975
## GenderMale                             0.0175599  0.0163088   1.077
## Years.of.Experience11 to 20 years      0.0745600  0.0334030   2.232
## Years.of.Experience21 to 30 years      0.0846441  0.0316681   2.673
## Years.of.Experience31 years and above  0.0598625  0.0324085   1.847
## Communication_positive                -0.0057390  0.0723151  -0.079
## Expertisepositive                      0.1973434  0.0508048   3.884
## Timepositive                           0.0676514  0.0788625   0.858
## Bedside_positive                       0.2351209  0.0479124   4.907
## Officepositive                         0.1569701  0.0781126   2.010
## Costpositive                           0.4012060  0.1110220   3.614
## Communication_negative                -0.3681654  0.0977875  -3.765
## Expertisenegative                     -0.7218612  0.0738322  -9.777
## Time_negative                         -0.7071751  0.0891213  -7.935
## Bedside_negative                      -0.4477315  0.0869512  -5.149
## Office_negative                       -0.4311245  0.1185171  -3.638
## Cost_negative                         -1.0948320  0.1505881  -7.270
## Average.words.per.review              -0.0008207  0.0001750  -4.689
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                        0.281863    
## Years.of.Experience11 to 20 years                 0.025822 *  
## Years.of.Experience21 to 30 years                 0.007641 ** 
## Years.of.Experience31 years and above             0.065018 .  
## Communication_positive                            0.936761    
## Expertisepositive                                 0.000109 ***
## Timepositive                                      0.391182    
## Bedside_positive                       0.00000107400834509 ***
## Officepositive                                    0.044743 *  
## Costpositive                                      0.000316 ***
## Communication_negative                            0.000176 ***
## Expertisenegative                     < 0.0000000000000002 ***
## Time_negative                          0.00000000000000551 ***
## Bedside_negative                       0.00000031367070663 ***
## Office_negative                                   0.000289 ***
## Cost_negative                          0.00000000000071172 ***
## Average.words.per.review               0.00000311940283111 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.258 on 1022 degrees of freedom
## Multiple R-squared:  0.4309, Adjusted R-squared:  0.4215 
## F-statistic: 45.53 on 17 and 1022 DF,  p-value: < 0.00000000000000022

lc3=lm(Negative_Proportion~.,data = comdff3)
summary(lc3)

## 
## Call:
## lm(formula = Negative_Proportion ~ ., data = comdff3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.02412 -0.10052 -0.03151  0.05972  0.99927 
## 
## Coefficients:
##                                        Estimate Std. Error t value
## (Intercept)                            0.146617   0.028666   5.115
## GenderMale                            -0.020651   0.014351  -1.439
## Years.of.Experience11 to 20 years     -0.083203   0.029392  -2.831
## Years.of.Experience21 to 30 years     -0.097930   0.027866  -3.514
## Years.of.Experience31 years and above -0.087611   0.028517  -3.072
## Communication_positive                -0.089456   0.063633  -1.406
## Expertisepositive                     -0.100977   0.044705  -2.259
## Timepositive                           0.029154   0.069394   0.420
## Bedside_positive                      -0.137290   0.042160  -3.256
## Officepositive                        -0.050707   0.068734  -0.738
## Costpositive                          -0.336372   0.097692  -3.443
## Communication_negative                 0.460933   0.086047   5.357
## Expertisenegative                      0.749351   0.064968  11.534
## Time_negative                          0.722739   0.078421   9.216
## Bedside_negative                       0.407485   0.076511   5.326
## Office_negative                        0.482666   0.104287   4.628
## Cost_negative                          1.168428   0.132508   8.818
## Average.words.per.review               0.001158   0.000154   7.520
##                                                   Pr(>|t|)    
## (Intercept)                               0.00000037503111 ***
## GenderMale                                        0.150444    
## Years.of.Experience11 to 20 years                 0.004735 ** 
## Years.of.Experience21 to 30 years                 0.000460 ***
## Years.of.Experience31 years and above             0.002181 ** 
## Communication_positive                            0.160081    
## Expertisepositive                                 0.024110 *  
## Timepositive                                      0.674480    
## Bedside_positive                                  0.001165 ** 
## Officepositive                                    0.460853    
## Costpositive                                      0.000598 ***
## Communication_negative                    0.00000010466500 ***
## Expertisenegative                     < 0.0000000000000002 ***
## Time_negative                         < 0.0000000000000002 ***
## Bedside_negative                          0.00000012359151 ***
## Office_negative                           0.00000416197762 ***
## Cost_negative                         < 0.0000000000000002 ***
## Average.words.per.review                  0.00000000000012 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.227 on 1022 degrees of freedom
## Multiple R-squared:  0.506,  Adjusted R-squared:  0.4978 
## F-statistic: 61.59 on 17 and 1022 DF,  p-value: < 0.00000000000000022

lc4=lm(Average.pos_score~.,data = comdff4)
summary(lc4)

## 
## Call:
## lm(formula = Average.pos_score ~ ., data = comdff4)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.81656 -0.09927  0.05738  0.15959  0.51416 
## 
## Coefficients:
##                                          Estimate  Std. Error t value
## (Intercept)                            0.64692434  0.03132822  20.650
## GenderMale                             0.00400675  0.01568365   0.255
## Years.of.Experience11 to 20 years      0.07488439  0.03212267   2.331
## Years.of.Experience21 to 30 years      0.10002820  0.03045422   3.285
## Years.of.Experience31 years and above  0.09487822  0.03116626   3.044
## Communication_positive                -0.09722930  0.06954321  -1.398
## Expertisepositive                      0.09417639  0.04885739   1.928
## Timepositive                           0.10823997  0.07583964   1.427
## Bedside_positive                       0.15062062  0.04607589   3.269
## Officepositive                         0.18792158  0.07511848   2.502
## Costpositive                           0.46411366  0.10676642   4.347
## Communication_negative                -0.37281616  0.09403924  -3.964
## Expertisenegative                     -0.56256603  0.07100217  -7.923
## Time_negative                         -0.48335656  0.08570519  -5.640
## Bedside_negative                      -0.40464353  0.08361829  -4.839
## Office_negative                       -0.27630470  0.11397424  -2.424
## Cost_negative                         -0.93228006  0.14481594  -6.438
## Average.words.per.review              -0.00006264  0.00016833  -0.372
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                         0.79841    
## Years.of.Experience11 to 20 years                  0.01994 *  
## Years.of.Experience21 to 30 years                  0.00106 ** 
## Years.of.Experience31 years and above              0.00239 ** 
## Communication_positive                             0.16238    
## Expertisepositive                                  0.05418 .  
## Timepositive                                       0.15382    
## Bedside_positive                                   0.00112 ** 
## Officepositive                                     0.01252 *  
## Costpositive                           0.00001518079388590 ***
## Communication_negative                 0.00007867743822599 ***
## Expertisenegative                      0.00000000000000602 ***
## Time_negative                          0.00000002203201016 ***
## Bedside_negative                       0.00000150464080230 ***
## Office_negative                                    0.01551 *  
## Cost_negative                          0.00000000018625477 ***
## Average.words.per.review                           0.70986    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2481 on 1022 degrees of freedom
## Multiple R-squared:  0.3195, Adjusted R-squared:  0.3082 
## F-statistic: 28.23 on 17 and 1022 DF,  p-value: < 0.00000000000000022

lc5=lm(Average.neg_score~.,data = comdff5)
summary(lc5)

## 
## Call:
## lm(formula = Average.neg_score ~ ., data = comdff5)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78930 -0.14500  0.07364  0.13895  0.98492 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                           -0.1036872  0.0332653  -3.117
## GenderMale                             0.0039772  0.0166534   0.239
## Years.of.Experience11 to 20 years      0.0163092  0.0341089   0.478
## Years.of.Experience21 to 30 years      0.0021139  0.0323373   0.065
## Years.of.Experience31 years and above  0.0009758  0.0330934   0.029
## Communication_positive                 0.1444302  0.0738433   1.956
## Expertisepositive                      0.1084058  0.0518784   2.090
## Timepositive                          -0.0273260  0.0805290  -0.339
## Bedside_positive                       0.1305719  0.0489249   2.669
## Officepositive                         0.0685017  0.0797633   0.859
## Costpositive                           0.2057099  0.1133681   1.815
## Communication_negative                -0.6225700  0.0998540  -6.235
## Expertisenegative                     -0.4726412  0.0753924  -6.269
## Time_negative                         -0.6435948  0.0910046  -7.072
## Bedside_negative                      -0.2828426  0.0887887  -3.186
## Office_negative                       -0.6325603  0.1210216  -5.227
## Cost_negative                         -0.5624687  0.1537703  -3.658
## Average.words.per.review              -0.0012106  0.0001787  -6.773
##                                               Pr(>|t|)    
## (Intercept)                                   0.001878 ** 
## GenderMale                                    0.811292    
## Years.of.Experience11 to 20 years             0.632646    
## Years.of.Experience21 to 30 years             0.947891    
## Years.of.Experience31 years and above         0.976481    
## Communication_positive                        0.050749 .  
## Expertisepositive                             0.036899 *  
## Timepositive                                  0.734430    
## Bedside_positive                              0.007732 ** 
## Officepositive                                0.390645    
## Costpositive                                  0.069889 .  
## Communication_negative                0.00000000066078 ***
## Expertisenegative                     0.00000000053486 ***
## Time_negative                         0.00000000000283 ***
## Bedside_negative                              0.001488 ** 
## Office_negative                       0.00000020901385 ***
## Cost_negative                                 0.000267 ***
## Average.words.per.review              0.00000000002127 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2634 on 1022 degrees of freedom
## Multiple R-squared:  0.3651, Adjusted R-squared:  0.3545 
## F-statistic: 34.57 on 17 and 1022 DF,  p-value: < 0.00000000000000022

lc6=lm(Overall_score~.,data = comdff6)
summary(lc6)

## 
## Call:
## lm(formula = Overall_score ~ ., data = comdff6)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.45537 -0.21441  0.07354  0.23620  1.30553 
## 
## Coefficients:
##                                         Estimate Std. Error t value
## (Intercept)                            0.5432371  0.0465767  11.663
## GenderMale                             0.0079839  0.0233174   0.342
## Years.of.Experience11 to 20 years      0.0911935  0.0477578   1.910
## Years.of.Experience21 to 30 years      0.1021421  0.0452773   2.256
## Years.of.Experience31 years and above  0.0958541  0.0463359   2.069
## Communication_positive                 0.0472009  0.1033921   0.457
## Expertisepositive                      0.2025822  0.0726379   2.789
## Timepositive                           0.0809140  0.1127532   0.718
## Bedside_positive                       0.2811925  0.0685025   4.105
## Officepositive                         0.2564233  0.1116811   2.296
## Costpositive                           0.6698235  0.1587331   4.220
## Communication_negative                -0.9953862  0.1398112  -7.120
## Expertisenegative                     -1.0352072  0.1055612  -9.807
## Time_negative                         -1.1269513  0.1274207  -8.844
## Bedside_negative                      -0.6874861  0.1243180  -5.530
## Office_negative                       -0.9088650  0.1694492  -5.364
## Cost_negative                         -1.4947488  0.2153025  -6.943
## Average.words.per.review              -0.0012732  0.0002503  -5.088
##                                                   Pr(>|t|)    
## (Intercept)                           < 0.0000000000000002 ***
## GenderMale                                         0.73212    
## Years.of.Experience11 to 20 years                  0.05648 .  
## Years.of.Experience21 to 30 years                  0.02429 *  
## Years.of.Experience31 years and above              0.03883 *  
## Communication_positive                             0.64811    
## Expertisepositive                                  0.00539 ** 
## Timepositive                                       0.47316    
## Bedside_positive                          0.00004368341531 ***
## Officepositive                                     0.02188 *  
## Costpositive                              0.00002662383106 ***
## Communication_negative                    0.00000000000204 ***
## Expertisenegative                     < 0.0000000000000002 ***
## Time_negative                         < 0.0000000000000002 ***
## Bedside_negative                          0.00000004064894 ***
## Office_negative                           0.00000010086286 ***
## Cost_negative                             0.00000000000684 ***
## Average.words.per.review                  0.00000043142775 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3688 on 1022 degrees of freedom
## Multiple R-squared:  0.4871, Adjusted R-squared:  0.4786 
## F-statistic:  57.1 on 17 and 1022 DF,  p-value: < 0.00000000000000022

#Correlation plots

str(comdff1)

## 'data.frame':    1040 obs. of  16 variables:
##  $ Gender                  : Factor w/ 2 levels "Female","Male": 1 1 1 2 2 1 2 2 2 1 ...
##  $ Years.of.Experience     : Factor w/ 4 levels "Less than 10 years",..: 2 3 3 2 4 3 4 4 2 4 ...
##  $ Communication_positive  : num  0.0676 0.0286 0.1 0.1538 0.3333 ...
##  $ Expertisepositive       : num  0.189 0.114 0.1 0.154 0.167 ...
##  $ Timepositive            : num  0.1216 0.0714 0 0.1154 0 ...
##  $ Bedside_positive        : num  0.108 0.186 0.35 0.115 0.333 ...
##  $ Officepositive          : num  0.027 0.0429 0.15 0.0769 0 ...
##  $ Costpositive            : num  0.0541 0.0286 0 0.0385 0 ...
##  $ Communication_negative  : num  0 0.0143 0 0 0 ...
##  $ Expertisenegative       : num  0.027 0.0143 0 0 0 ...
##  $ Time_negative           : num  0 0.0429 0 0 0 ...
##  $ Bedside_negative        : num  0 0.0429 0 0 0 ...
##  $ Office_negative         : num  0 0.0143 0 0 0 ...
##  $ Cost_negative           : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ Average.words.per.review: num  29 72 44 40 42 61 29 41 17 34 ...
##  $ Rating                  : num  4.9 4.7 5 4.8 5 5 4.8 4.6 5 5 ...

corrplot(cor(comdff1[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff2[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff3[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff4[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff5[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

corrplot(cor(comdff6[,-c(1:2)]),method="circle", addCoef.col="black", diag=F, type="upper")

#Stargazer

library(stargazer)
stargazer(lh1,lh2,lh3,lh4,lh5,lh6, type="text",out = "Stargazer_healthgrades.txt")

## 
## =========================================================================================================================================
##                                                                               Dependent variable:                                        
##                                       ---------------------------------------------------------------------------------------------------
##                                        Rating   Positive_Proportion Negative_Proportion Average.pos_score Average.neg_score Overall_score
##                                          (1)            (2)                 (3)                (4)               (5)             (6)     
## -----------------------------------------------------------------------------------------------------------------------------------------
## GenderMale                             -0.031         -0.011               0.011             0.0003            -0.045          -0.045    
##                                        (0.045)        (0.019)             (0.019)            (0.020)           (0.032)         (0.036)   
##                                                                                                                                          
## Years.of.Experience11 to 20 years      -0.030         -0.033               0.033             -0.042            -0.002          -0.044    
##                                        (0.084)        (0.036)             (0.036)            (0.038)           (0.061)         (0.069)   
##                                                                                                                                          
## Years.of.Experience21 to 30 years      -0.063          0.002              -0.002             -0.001             0.011           0.010    
##                                        (0.083)        (0.036)             (0.036)            (0.038)           (0.060)         (0.068)   
##                                                                                                                                          
## Years.of.Experience31 years and above  -0.014          0.007              -0.007             -0.024             0.042           0.018    
##                                        (0.084)        (0.036)             (0.036)            (0.038)           (0.061)         (0.069)   
##                                                                                                                                          
## Communication_positive                 -0.017          0.019              -0.019            -0.178**            0.070          -0.108    
##                                        (0.185)        (0.080)             (0.080)            (0.084)           (0.135)         (0.152)   
##                                                                                                                                          
## Expertisepositive                      -0.059          0.096              -0.096              0.114             0.027           0.141    
##                                        (0.165)        (0.072)             (0.072)            (0.075)           (0.120)         (0.136)   
##                                                                                                                                          
## Timepositive                            0.102          0.023              -0.023              0.028            -0.032          -0.003    
##                                        (0.204)        (0.088)             (0.088)            (0.093)           (0.148)         (0.167)   
##                                                                                                                                          
## Bedside_positive                        0.163         0.145**            -0.145**            -0.024             0.093           0.069    
##                                        (0.144)        (0.062)             (0.062)            (0.066)           (0.105)         (0.118)   
##                                                                                                                                          
## Officepositive                        -0.699**         0.041              -0.041              0.104             0.031           0.135    
##                                        (0.291)        (0.126)             (0.126)            (0.133)           (0.212)         (0.238)   
##                                                                                                                                          
## Costpositive                            0.438          0.235              -0.235              0.187             0.290           0.476    
##                                        (0.386)        (0.167)             (0.167)            (0.176)           (0.281)         (0.316)   
##                                                                                                                                          
## Communication_negative                -1.016***      -0.541***           0.541***           -0.539***         -0.703***       -1.242***  
##                                        (0.362)        (0.156)             (0.156)            (0.165)           (0.263)         (0.297)   
##                                                                                                                                          
## Expertisenegative                       0.216        -0.456***           0.456***           -0.546***           0.032          -0.514*   
##                                        (0.340)        (0.147)             (0.147)            (0.155)           (0.247)         (0.278)   
##                                                                                                                                          
## Time_negative                          -0.447        -1.038***           1.038***            -0.163           -1.235***       -1.398***  
##                                        (0.330)        (0.142)             (0.142)            (0.150)           (0.240)         (0.270)   
##                                                                                                                                          
## Bedside_negative                        0.521         0.415*              -0.415*            -0.055            0.606*           0.551    
##                                        (0.493)        (0.213)             (0.213)            (0.224)           (0.359)         (0.404)   
##                                                                                                                                          
## Office_negative                       -1.143**        -0.121               0.121             -0.005           -1.275***       -1.280***  
##                                        (0.532)        (0.230)             (0.230)            (0.242)           (0.387)         (0.436)   
##                                                                                                                                          
## Cost_negative                          -0.499        -1.648***           1.648***           -1.388***         -1.558**        -2.946***  
##                                        (0.843)        (0.365)             (0.365)            (0.384)           (0.614)         (0.691)   
##                                                                                                                                          
## Average.words.per.review               -0.001         -0.0001             0.0001            0.002***           -0.001           0.001    
##                                        (0.001)       (0.0005)            (0.0005)            (0.001)           (0.001)         (0.001)   
##                                                                                                                                          
## Constant                              4.757***       0.944***              0.056            0.774***           -0.056         0.718***   
##                                        (0.107)        (0.046)             (0.046)            (0.049)           (0.078)         (0.088)   
##                                                                                                                                          
## -----------------------------------------------------------------------------------------------------------------------------------------
## Observations                             247            247                 247                247               247             247     
## R2                                      0.162          0.509               0.509              0.367             0.318           0.461    
## Adjusted R2                             0.100          0.472               0.472              0.321             0.268           0.421    
## Residual Std. Error (df = 229)          0.330          0.143               0.143              0.150             0.240           0.270    
## F Statistic (df = 17; 229)            2.612***       13.958***           13.958***          7.826***          6.289***        11.536***  
## =========================================================================================================================================
## Note:                                                                                                         *p<0.1; **p<0.05; ***p<0.01

stargazer(lr1,lr2,lr3,lr4,lr5,lr6, type="text",out="stargazer_ratemds.txt")

## 
## ===========================================================================================================================================================================================
##                                                                                                        Dependent variable:                                                                 
##                                       -----------------------------------------------------------------------------------------------------------------------------------------------------
##                                                Rating            Positive_Proportion      Negative_Proportion       Average.pos_score        Average.neg_score          Overall_score      
##                                                 (1)                      (2)                      (3)                      (4)                      (5)                      (6)           
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## GenderMale                                     0.121*                   0.025                    -0.028                   0.008                    0.015                    -0.045         
##                                               (0.066)                  (0.020)                  (0.018)                  (0.019)                  (0.019)                  (0.036)         
##                                                                                                                                                                                            
## Years.of.Experience11 to 20 years              0.147                   0.115***                -0.126***                 0.125***                  0.020                    -0.044         
##                                               (0.138)                  (0.042)                  (0.037)                  (0.040)                  (0.041)                  (0.069)         
##                                                                                                                                                                                            
## Years.of.Experience21 to 30 years              0.181                   0.117***                -0.131***                 0.144***                  -0.004                   0.010          
##                                               (0.130)                  (0.040)                  (0.035)                  (0.038)                  (0.038)                  (0.068)         
##                                                                                                                                                                                            
## Years.of.Experience31 years and above          0.175                   0.082**                 -0.117***                 0.142***                  -0.015                   0.018          
##                                               (0.134)                  (0.041)                  (0.036)                  (0.039)                  (0.039)                  (0.069)         
##                                                                                                                                                                                            
## Communication_positive                         0.058                    -0.139                   -0.041                  -0.194**                  0.133                    -0.108         
##                                               (0.307)                  (0.094)                  (0.082)                  (0.089)                  (0.090)                  (0.152)         
##                                                                                                                                                                                            
## Expertisepositive                              0.367*                  0.195***                 -0.093*                   0.076                   0.120**                   0.141          
##                                               (0.199)                  (0.061)                  (0.053)                  (0.058)                  (0.059)                  (0.136)         
##                                                                                                                                                                                            
## Timepositive                                   -0.479                   -0.036                   0.109                    0.038                    -0.074                   -0.003         
##                                               (0.334)                  (0.102)                  (0.089)                  (0.097)                  (0.098)                  (0.167)         
##                                                                                                                                                                                            
## Bedside_positive                               0.326*                  0.266***                -0.142***                 0.192***                 0.145***                  0.069          
##                                               (0.189)                  (0.058)                  (0.051)                  (0.055)                  (0.056)                  (0.118)         
##                                                                                                                                                                                            
## Officepositive                                 0.487                   0.235**                   -0.076                  0.252***                  0.078                    0.135          
##                                               (0.300)                  (0.091)                  (0.080)                  (0.087)                  (0.088)                  (0.238)         
##                                                                                                                                                                                            
## Costpositive                                  1.408***                 0.501***                -0.396***                 0.552***                  0.233*                   0.476          
##                                               (0.427)                  (0.130)                  (0.114)                  (0.124)                  (0.125)                  (0.316)         
##                                                                                                                                                                                            
## Communication_negative                       -1.727***                -0.386***                 0.503***                -0.343***                -0.685***                -1.242***        
##                                               (0.386)                  (0.118)                  (0.103)                  (0.112)                  (0.114)                  (0.297)         
##                                                                                                                                                                                            
## Expertisenegative                            -1.708***                -0.726***                 0.774***                -0.518***                -0.543***                 -0.514*         
##                                               (0.278)                  (0.085)                  (0.074)                  (0.081)                  (0.082)                  (0.278)         
##                                                                                                                                                                                            
## Time_negative                                -1.616***                -0.672***                 0.688***                -0.555***                -0.549***                -1.398***        
##                                               (0.340)                  (0.103)                  (0.091)                  (0.099)                  (0.100)                  (0.270)         
##                                                                                                                                                                                            
## Bedside_negative                             -1.264***                -0.479***                 0.444***                -0.415***                -0.310***                  0.551          
##                                               (0.318)                  (0.097)                  (0.085)                  (0.092)                  (0.093)                  (0.404)         
##                                                                                                                                                                                            
## Office_negative                              -2.455***                -0.412***                 0.482***                 -0.250*                 -0.565***                -1.280***        
##                                               (0.441)                  (0.134)                  (0.118)                  (0.128)                  (0.130)                  (0.436)         
##                                                                                                                                                                                            
## Cost_negative                                -2.661***                -1.019***                 1.119***                -0.861***                -0.508***                -2.946***        
##                                               (0.552)                  (0.168)                  (0.147)                  (0.160)                  (0.162)                  (0.691)         
##                                                                                                                                                                                            
## Average.words.per.review                     -0.004***                -0.001***                 0.001***                 -0.0001                 -0.001***                  0.001          
##                                               (0.001)                  (0.0002)                 (0.0002)                 (0.0002)                 (0.0002)                 (0.001)         
##                                                                                                                                                                                            
## Constant                                      4.258***                 0.723***                 0.190***                 0.579***                -0.111***                 0.718***        
##                                               (0.131)                  (0.040)                  (0.035)                  (0.038)                  (0.039)                  (0.088)         
##                                                                                                                                                                                            
## -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
## Observations                                    793                      793                      793                      793                      793                      247           
## R2                                             0.379                    0.426                    0.508                    0.325                    0.384                    0.461          
## Adjusted R2                                    0.365                    0.414                    0.498                    0.310                    0.370                    0.421          
## Residual Std. Error                       0.913 (df = 775)         0.278 (df = 775)         0.244 (df = 775)         0.265 (df = 775)         0.268 (df = 775)         0.270 (df = 229)    
## F Statistic                           27.792*** (df = 17; 775) 33.871*** (df = 17; 775) 47.158*** (df = 17; 775) 21.961*** (df = 17; 775) 28.410*** (df = 17; 775) 11.536*** (df = 17; 229)
## ===========================================================================================================================================================================================
## Note:                                                                                                                                                           *p<0.1; **p<0.05; ***p<0.01

stargazer(lc1,lc2,lc3,lc4,lc5,lc6, type="text",out="stargazer_combined.txt")

## 
## =========================================================================================================================================
##                                                                               Dependent variable:                                        
##                                       ---------------------------------------------------------------------------------------------------
##                                        Rating   Positive_Proportion Negative_Proportion Average.pos_score Average.neg_score Overall_score
##                                          (1)            (2)                 (3)                (4)               (5)             (6)     
## -----------------------------------------------------------------------------------------------------------------------------------------
## GenderMale                              0.080          0.018              -0.021              0.004             0.004           0.008    
##                                        (0.054)        (0.016)             (0.014)            (0.016)           (0.017)         (0.023)   
##                                                                                                                                          
## Years.of.Experience11 to 20 years       0.091         0.075**            -0.083***           0.075**            0.016          0.091*    
##                                        (0.110)        (0.033)             (0.029)            (0.032)           (0.034)         (0.048)   
##                                                                                                                                          
## Years.of.Experience21 to 30 years       0.109        0.085***            -0.098***          0.100***            0.002          0.102**   
##                                        (0.104)        (0.032)             (0.028)            (0.030)           (0.032)         (0.045)   
##                                                                                                                                          
## Years.of.Experience31 years and above   0.114         0.060*             -0.088***          0.095***            0.001          0.096**   
##                                        (0.106)        (0.032)             (0.029)            (0.031)           (0.033)         (0.046)   
##                                                                                                                                          
## Communication_positive                 0.399*         -0.006              -0.089             -0.097            0.144*           0.047    
##                                        (0.237)        (0.072)             (0.064)            (0.070)           (0.074)         (0.103)   
##                                                                                                                                          
## Expertisepositive                      0.317*        0.197***            -0.101**            0.094*            0.108**        0.203***   
##                                        (0.167)        (0.051)             (0.045)            (0.049)           (0.052)         (0.073)   
##                                                                                                                                          
## Timepositive                           -0.079          0.068               0.029              0.108            -0.027           0.081    
##                                        (0.259)        (0.079)             (0.069)            (0.076)           (0.081)         (0.113)   
##                                                                                                                                          
## Bedside_positive                        0.258        0.235***            -0.137***          0.151***          0.131***        0.281***   
##                                        (0.157)        (0.048)             (0.042)            (0.046)           (0.049)         (0.069)   
##                                                                                                                                          
## Officepositive                          0.134         0.157**             -0.051             0.188**            0.069          0.256**   
##                                        (0.257)        (0.078)             (0.069)            (0.075)           (0.080)         (0.112)   
##                                                                                                                                          
## Costpositive                          1.083***       0.401***            -0.336***          0.464***           0.206*         0.670***   
##                                        (0.365)        (0.111)             (0.098)            (0.107)           (0.113)         (0.159)   
##                                                                                                                                          
## Communication_negative                -1.231***      -0.368***           0.461***           -0.373***         -0.623***       -0.995***  
##                                        (0.321)        (0.098)             (0.086)            (0.094)           (0.100)         (0.140)   
##                                                                                                                                          
## Expertisenegative                     -1.577***      -0.722***           0.749***           -0.563***         -0.473***       -1.035***  
##                                        (0.242)        (0.074)             (0.065)            (0.071)           (0.075)         (0.106)   
##                                                                                                                                          
## Time_negative                         -1.464***      -0.707***           0.723***           -0.483***         -0.644***       -1.127***  
##                                        (0.293)        (0.089)             (0.078)            (0.086)           (0.091)         (0.127)   
##                                                                                                                                          
## Bedside_negative                      -1.213***      -0.448***           0.407***           -0.405***         -0.283***       -0.687***  
##                                        (0.286)        (0.087)             (0.077)            (0.084)           (0.089)         (0.124)   
##                                                                                                                                          
## Office_negative                       -2.665***      -0.431***           0.483***           -0.276**          -0.633***       -0.909***  
##                                        (0.389)        (0.119)             (0.104)            (0.114)           (0.121)         (0.169)   
##                                                                                                                                          
## Cost_negative                         -2.646***      -1.095***           1.168***           -0.932***         -0.562***       -1.495***  
##                                        (0.495)        (0.151)             (0.133)            (0.145)           (0.154)         (0.215)   
##                                                                                                                                          
## Average.words.per.review              -0.004***      -0.001***           0.001***            -0.0001          -0.001***       -0.001***  
##                                        (0.001)       (0.0002)            (0.0002)           (0.0002)          (0.0002)        (0.0003)   
##                                                                                                                                          
## Constant                              4.419***       0.781***            0.147***           0.647***          -0.104***       0.543***   
##                                        (0.107)        (0.033)             (0.029)            (0.031)           (0.033)         (0.047)   
##                                                                                                                                          
## -----------------------------------------------------------------------------------------------------------------------------------------
## Observations                            1,040          1,040               1,040              1,040             1,040           1,040    
## R2                                      0.356          0.431               0.506              0.320             0.365           0.487    
## Adjusted R2                             0.345          0.421               0.498              0.308             0.355           0.479    
## Residual Std. Error (df = 1022)         0.847          0.258               0.227              0.248             0.263           0.369    
## F Statistic (df = 17; 1022)           33.261***      45.526***           61.589***          28.231***         34.572***       57.100***  
## =========================================================================================================================================
## Note:                                                                                                         *p<0.1; **p<0.05; ***p<0.01

EDA on Text

Chirag Ahluwalia

4/24/2020

Descriptive Statistics

Statistics based on Gender

Plots based on Years of Experience over Dependent Variables

Healthgrades

Statistics based on Gender

Statistics based on Years of Experience in HealthGrades

Plots for Years of Experience on HealthGrades

Combined

Plotting for Combined results

Linear Regression Modeling

Combined Linear Modeling