library(readxl)
df = read_excel("/Users/gantsetsegganbaatar/Desktop/R class 2023/Professorial-Salaries.xlsx")
str(df)
## tibble [397 × 7] (S3: tbl_df/tbl/data.frame)
## $ ID : num [1:397] 1 2 3 4 5 6 7 8 9 10 ...
## $ Rank : chr [1:397] "Prof" "Prof" "AsstProf" "Prof" ...
## $ Discipline : chr [1:397] "B" "B" "B" "B" ...
## $ Yrs.since.phd: num [1:397] 19 20 4 45 40 6 30 45 21 18 ...
## $ Yrs.service : num [1:397] 18 16 3 39 41 6 23 45 20 18 ...
## $ Sex : chr [1:397] "Male" "Male" "Male" "Male" ...
## $ Salary : num [1:397] 139750 173200 79750 115000 141500 ...
fivenum(df$Yrs.since.phd)
## [1] 1 12 21 32 56
simple_linear = lm (Salary ~ Yrs.since.phd, data = df)
summary(simple_linear)
##
## Call:
## lm(formula = Salary ~ Yrs.since.phd, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -84171 -19432 -2858 16086 102383
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 91718.7 2765.8 33.162 <2e-16 ***
## Yrs.since.phd 985.3 107.4 9.177 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 27530 on 395 degrees of freedom
## Multiple R-squared: 0.1758, Adjusted R-squared: 0.1737
## F-statistic: 84.23 on 1 and 395 DF, p-value: < 2.2e-16
names(simple_linear)
## [1] "coefficients" "residuals" "effects" "rank"
## [5] "fitted.values" "assign" "qr" "df.residual"
## [9] "xlevels" "call" "terms" "model"
coef(simple_linear)
## (Intercept) Yrs.since.phd
## 91718.6854 985.3421
confint(simple_linear)
## 2.5 % 97.5 %
## (Intercept) 86281.1714 97156.199
## Yrs.since.phd 774.2636 1196.421
predict(simple_linear, data.frame(Yrs.since.phd = (c (5, 10, 15 ))),
interval = "confidence")
## fit lwr upr
## 1 96645.4 92091.47 101199.3
## 2 101572.1 97812.11 105332.1
## 3 106498.8 103373.97 109623.7
plot(df$Yrs.since.phd, df$Salary)
abline(simple_linear)
abline(simple_linear, lwd=3)
abline(simple_linear, lwd=2, col="red")
#Multiple linear regression
multi_linear = lm (Salary ~ Yrs.since.phd + Yrs.service + Sex + Rank, df)
summary(multi_linear)
##
## Call:
## lm(formula = Salary ~ Yrs.since.phd + Yrs.service + Sex + Rank,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64844 -14939 -1401 12137 107498
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 89596.4 4891.1 18.318 < 2e-16 ***
## Yrs.since.phd 265.8 247.9 1.072 0.28423
## Yrs.service -373.8 220.8 -1.693 0.09132 .
## SexMale 5499.1 4034.7 1.363 0.17368
## RankAsstProf -13886.6 4333.1 -3.205 0.00146 **
## RankProf 33053.4 3700.7 8.932 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 23580 on 391 degrees of freedom
## Multiple R-squared: 0.4017, Adjusted R-squared: 0.3941
## F-statistic: 52.51 on 5 and 391 DF, p-value: < 2.2e-16
df$Rank = as.factor(df$Rank)
df$Sex = as.factor(df$Sex)
df$Rank = relevel(df$Rank, ref = "AsstProf")
df$Sex = relevel(df$Sex, ref = "Female")
library(car)
## Loading required package: carData
vif(multi_linear)
## GVIF Df GVIF^(1/(2*Df))
## Yrs.since.phd 7.271171 1 2.696511
## Yrs.service 5.876405 1 2.424130
## Sex 1.029868 1 1.014824
## Rank 2.002583 2 1.189591
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.