library(readxl)
df = read_excel("/Users/gantsetsegganbaatar/Desktop/R class 2023/Professorial-Salaries.xlsx")
str(df)
## tibble [397 × 7] (S3: tbl_df/tbl/data.frame)
##  $ ID           : num [1:397] 1 2 3 4 5 6 7 8 9 10 ...
##  $ Rank         : chr [1:397] "Prof" "Prof" "AsstProf" "Prof" ...
##  $ Discipline   : chr [1:397] "B" "B" "B" "B" ...
##  $ Yrs.since.phd: num [1:397] 19 20 4 45 40 6 30 45 21 18 ...
##  $ Yrs.service  : num [1:397] 18 16 3 39 41 6 23 45 20 18 ...
##  $ Sex          : chr [1:397] "Male" "Male" "Male" "Male" ...
##  $ Salary       : num [1:397] 139750 173200 79750 115000 141500 ...
fivenum(df$Yrs.since.phd)
## [1]  1 12 21 32 56
simple_linear = lm (Salary ~ Yrs.since.phd, data = df)
summary(simple_linear)
## 
## Call:
## lm(formula = Salary ~ Yrs.since.phd, data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -84171 -19432  -2858  16086 102383 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    91718.7     2765.8  33.162   <2e-16 ***
## Yrs.since.phd    985.3      107.4   9.177   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27530 on 395 degrees of freedom
## Multiple R-squared:  0.1758, Adjusted R-squared:  0.1737 
## F-statistic: 84.23 on 1 and 395 DF,  p-value: < 2.2e-16
names(simple_linear)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"
coef(simple_linear)
##   (Intercept) Yrs.since.phd 
##    91718.6854      985.3421
confint(simple_linear)
##                    2.5 %    97.5 %
## (Intercept)   86281.1714 97156.199
## Yrs.since.phd   774.2636  1196.421
predict(simple_linear, data.frame(Yrs.since.phd = (c (5, 10, 15 ))), 
        interval = "confidence")
##        fit       lwr      upr
## 1  96645.4  92091.47 101199.3
## 2 101572.1  97812.11 105332.1
## 3 106498.8 103373.97 109623.7
plot(df$Yrs.since.phd, df$Salary)
abline(simple_linear)
abline(simple_linear, lwd=3)
abline(simple_linear, lwd=2, col="red")

#Multiple linear regression
multi_linear = lm (Salary ~ Yrs.since.phd + Yrs.service + Sex + Rank, df)
summary(multi_linear)
## 
## Call:
## lm(formula = Salary ~ Yrs.since.phd + Yrs.service + Sex + Rank, 
##     data = df)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -64844 -14939  -1401  12137 107498 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    89596.4     4891.1  18.318  < 2e-16 ***
## Yrs.since.phd    265.8      247.9   1.072  0.28423    
## Yrs.service     -373.8      220.8  -1.693  0.09132 .  
## SexMale         5499.1     4034.7   1.363  0.17368    
## RankAsstProf  -13886.6     4333.1  -3.205  0.00146 ** 
## RankProf       33053.4     3700.7   8.932  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 23580 on 391 degrees of freedom
## Multiple R-squared:  0.4017, Adjusted R-squared:  0.3941 
## F-statistic: 52.51 on 5 and 391 DF,  p-value: < 2.2e-16
df$Rank = as.factor(df$Rank)
df$Sex = as.factor(df$Sex)

df$Rank = relevel(df$Rank, ref = "AsstProf")
df$Sex = relevel(df$Sex, ref = "Female") 


library(car)
## Loading required package: carData
vif(multi_linear)
##                   GVIF Df GVIF^(1/(2*Df))
## Yrs.since.phd 7.271171  1        2.696511
## Yrs.service   5.876405  1        2.424130
## Sex           1.029868  1        1.014824
## Rank          2.002583  2        1.189591

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.