options(repos = c(CRAN = "https://cloud.r-project.org"))

install.packages("tidyverse")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'tidyverse' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

install.packages("broom.helpers")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'broom.helpers' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

install.packages("readxl")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'readxl' successfully unpacked and MD5 sums checked

## Warning: cannot remove prior installation of package 'readxl'

## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\H P\AppData\Local\R\win-library\4.4\00LOCK\readxl\libs\x64\readxl.dll
## to C:\Users\H P\AppData\Local\R\win-library\4.4\readxl\libs\x64\readxl.dll:
## Permission denied

## Warning: restored 'readxl'

## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

install.packages("skimr")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'skimr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

install.packages("gtsummary")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'gtsummary' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

install.packages("janitor")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'janitor' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

install.packages("broom")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'broom' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

install.packages("rsq")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'rsq' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

library(tidyverse)

## Warning: package 'tidyverse' was built under R version 4.4.2

## Warning: package 'dplyr' was built under R version 4.4.2

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readxl)

## Warning: package 'readxl' was built under R version 4.4.2

library(broom.helpers)

## Warning: package 'broom.helpers' was built under R version 4.4.2

library(skimr)

## Warning: package 'skimr' was built under R version 4.4.2

library(gtsummary)

## Warning: package 'gtsummary' was built under R version 4.4.2

## 
## Attaching package: 'gtsummary'
## 
## The following objects are masked from 'package:broom.helpers':
## 
##     all_categorical, all_continuous, all_contrasts, all_dichotomous,
##     all_interaction, all_intercepts

library(janitor)

## Warning: package 'janitor' was built under R version 4.4.2

## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(broom)

## Warning: package 'broom' was built under R version 4.4.2

library(rsq)

## Warning: package 'rsq' was built under R version 4.4.2

DATA <- read_excel("TRYDATANIMLR.xlsx")
Data1 <- DATA %>% clean_names()
glimpse(Data1)

## Rows: 299
## Columns: 49
## $ id                                   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11…
## $ current_hospital_posting             <chr> "HSIP", "HSIP", "HSIP", "HSIP", "…
## $ age_years                            <dbl> 27, 26, 25, 26, 27, 26, 27, 29, 3…
## $ gender                               <chr> "Female", "Female", "Female", "Fe…
## $ ethnicity                            <chr> "Malay", "Malay", "Malay", "Malay…
## $ religion                             <chr> "Islam", "Islam", "Islam", "Islam…
## $ marital_status                       <chr> "Single", "Single", "Single", "Si…
## $ chronic_medical_illness              <chr> "No known medical illness", "No k…
## $ duration_of_employment               <dbl> 10, 9, 9, 9, 10, 8, 10, 23, 9, 9,…
## $ exposure_to_psychiatry_posting       <chr> "No", "No", "No", "No", "No", "No…
## $ previous_experience_of_being_bullied <chr> "No", "No", "Yes", "No", "Yes", "…
## $ family_members_spouse                <chr> "No", "No", "Yes", "No", "No", "N…
## $ peers_friends                        <chr> "No", "No", "Yes", "No", "Yes", "…
## $ qd1                                  <dbl> 4, 3, 2, 3, 3, 3, 3, 3, 3, 2, 3, …
## $ qd2                                  <dbl> 4, 4, 2, 3, 3, 3, 4, 3, 3, 3, 3, …
## $ qd3                                  <dbl> 4, 4, 3, 4, 3, 4, 4, 4, 4, 3, 3, …
## $ qd4                                  <dbl> 4, 4, 4, 3, 3, 4, 3, 4, 4, 4, 3, …
## $ qd5                                  <dbl> 4, 3, 4, 2, 3, 3, 3, 3, 4, 4, 3, …
## $ qd6                                  <dbl> 4, 4, 3, 3, 3, 4, 3, 2, 3, 3, 3, …
## $ qd7                                  <dbl> 4, 4, 3, 4, 3, 4, 4, 4, 4, 4, 4, …
## $ qd8                                  <dbl> 4, 4, 3, 4, 3, 3, 3, 3, 4, 2, 4, …
## $ si9                                  <dbl> 2, 3, 4, 2, 3, 3, 3, 4, 3, 3, 2, …
## $ si10                                 <dbl> 2, 1, 3, 2, 3, 2, 2, 3, 3, 1, 1, …
## $ si11                                 <dbl> 3, 4, 4, 3, 2, 3, 3, 1, 4, 2, 2, …
## $ si12                                 <dbl> 1, 2, 3, 3, 2, 3, 2, 2, 3, 2, 3, …
## $ rc13                                 <dbl> 2, 4, 3, 4, 3, 3, 3, 2, 3, 3, 3, …
## $ rc14                                 <dbl> 4, 4, 4, 3, 3, 4, 3, 3, 3, 3, 3, …
## $ t15                                  <dbl> 2, 4, 4, 3, 3, 3, 3, 2, 3, 1, 3, …
## $ t16                                  <dbl> 3, 5, 3, 4, 4, 4, 4, 3, 4, 2, 4, …
## $ ph17                                 <dbl> 3, 2, 3, 3, 4, 4, 2, 4, 4, 2, 4, …
## $ ph18                                 <dbl> 2, 4, 3, 4, 4, 2, 2, 2, 4, 1, 4, …
## $ ph19                                 <dbl> 4, 5, 3, 4, 4, 4, 4, 3, 4, 2, 4, …
## $ a20                                  <dbl> 4, 1, 3, 3, 4, 1, 3, 3, 2, 2, 4, …
## $ a21                                  <dbl> 3, 2, 1, 2, 1, 1, 1, 3, 1, 1, 2, …
## $ a22                                  <dbl> 2, 1, 1, 2, 1, 1, 1, 2, 1, 1, 2, …
## $ a23                                  <dbl> 3, 3, 1, 3, 3, 1, 3, 3, 1, 3, 2, …
## $ a24                                  <dbl> 2, 2, 1, 4, 3, 1, 3, 1, 1, 3, 2, …
## $ a25                                  <dbl> 2, 2, 1, 4, 3, 3, 3, 3, 1, 3, 3, …
## $ a26                                  <dbl> 2, 2, 3, 2, 3, 2, 2, 2, 1, 1, 2, …
## $ a27                                  <dbl> 2, 2, 1, 2, 3, 2, 3, 3, 1, 1, 2, …
## $ a28                                  <dbl> 2, 2, 1, 2, 3, 2, 2, 2, 1, 2, 2, …
## $ a29                                  <dbl> 3, 3, 4, 4, 4, 3, 3, 3, 5, 2, 3, …
## $ a30                                  <dbl> 3, 3, 5, 3, 4, 3, 3, 3, 5, 4, 3, …
## $ a31                                  <dbl> 3, 4, 5, 3, 4, 3, 3, 3, 5, 4, 3, …
## $ a32                                  <dbl> 3, 3, 4, 3, 4, 3, 2, 3, 5, 3, 3, …
## $ a33                                  <dbl> 3, 2, 4, 3, 4, 3, 3, 3, 4, 2, 3, …
## $ a34                                  <dbl> 2, 1, 3, 3, 4, 3, 1, 3, 3, 2, 3, …
## $ a35                                  <dbl> 3, 4, 3, 3, 4, 3, 3, 3, 4, 3, 3, …
## $ literacy_score                       <dbl> 102, 105, 102, 107, 111, 98, 97, …

Data1 %>% 
  mutate(across(where(is.character), as.factor))

## # A tibble: 299 × 49
##       id current_hospital_posting age_years gender ethnicity religion
##    <dbl> <fct>                        <dbl> <fct>  <fct>     <fct>   
##  1     1 HSIP                            27 Female Malay     Islam   
##  2     2 HSIP                            26 Female Malay     Islam   
##  3     3 HSIP                            25 Female Malay     Islam   
##  4     4 HSIP                            26 Female Malay     Islam   
##  5     5 HSIP                            27 Female Malay     Islam   
##  6     6 HSIP                            26 Female Malay     Islam   
##  7     7 HSIP                            27 Female Malay     Islam   
##  8     8 HSIP                            29 Female Malay     Islam   
##  9     9 HSIP                            31 Female Malay     Islam   
## 10    10 HSIP                            28 Female Malay     Islam   
## # ℹ 289 more rows
## # ℹ 43 more variables: marital_status <fct>, chronic_medical_illness <fct>,
## #   duration_of_employment <dbl>, exposure_to_psychiatry_posting <fct>,
## #   previous_experience_of_being_bullied <fct>, family_members_spouse <fct>,
## #   peers_friends <fct>, qd1 <dbl>, qd2 <dbl>, qd3 <dbl>, qd4 <dbl>, qd5 <dbl>,
## #   qd6 <dbl>, qd7 <dbl>, qd8 <dbl>, si9 <dbl>, si10 <dbl>, si11 <dbl>,
## #   si12 <dbl>, rc13 <dbl>, rc14 <dbl>, t15 <dbl>, t16 <dbl>, ph17 <dbl>, …

Data1 %>% 
  select(duration_of_employment, current_hospital_posting, previous_experience_of_being_bullied, family_members_spouse, peers_friends, literacy_score)

## # A tibble: 299 × 6
##    duration_of_employment current_hospital_posting previous_experience_of_bein…¹
##                     <dbl> <chr>                    <chr>                        
##  1                     10 HSIP                     No                           
##  2                      9 HSIP                     No                           
##  3                      9 HSIP                     Yes                          
##  4                      9 HSIP                     No                           
##  5                     10 HSIP                     Yes                          
##  6                      8 HSIP                     Yes                          
##  7                     10 HSIP                     No                           
##  8                     23 HSIP                     No                           
##  9                      9 HSIP                     No                           
## 10                      9 HSIP                     No                           
## # ℹ 289 more rows
## # ℹ abbreviated name: ¹previous_experience_of_being_bullied
## # ℹ 3 more variables: family_members_spouse <chr>, peers_friends <chr>,
## #   literacy_score <dbl>

str(Data1)

## tibble [299 × 49] (S3: tbl_df/tbl/data.frame)
##  $ id                                  : num [1:299] 1 2 3 4 5 6 7 8 9 10 ...
##  $ current_hospital_posting            : chr [1:299] "HSIP" "HSIP" "HSIP" "HSIP" ...
##  $ age_years                           : num [1:299] 27 26 25 26 27 26 27 29 31 28 ...
##  $ gender                              : chr [1:299] "Female" "Female" "Female" "Female" ...
##  $ ethnicity                           : chr [1:299] "Malay" "Malay" "Malay" "Malay" ...
##  $ religion                            : chr [1:299] "Islam" "Islam" "Islam" "Islam" ...
##  $ marital_status                      : chr [1:299] "Single" "Single" "Single" "Single" ...
##  $ chronic_medical_illness             : chr [1:299] "No known medical illness" "No known medical illness" "No known medical illness" "No known medical illness" ...
##  $ duration_of_employment              : num [1:299] 10 9 9 9 10 8 10 23 9 9 ...
##  $ exposure_to_psychiatry_posting      : chr [1:299] "No" "No" "No" "No" ...
##  $ previous_experience_of_being_bullied: chr [1:299] "No" "No" "Yes" "No" ...
##  $ family_members_spouse               : chr [1:299] "No" "No" "Yes" "No" ...
##  $ peers_friends                       : chr [1:299] "No" "No" "Yes" "No" ...
##  $ qd1                                 : num [1:299] 4 3 2 3 3 3 3 3 3 2 ...
##  $ qd2                                 : num [1:299] 4 4 2 3 3 3 4 3 3 3 ...
##  $ qd3                                 : num [1:299] 4 4 3 4 3 4 4 4 4 3 ...
##  $ qd4                                 : num [1:299] 4 4 4 3 3 4 3 4 4 4 ...
##  $ qd5                                 : num [1:299] 4 3 4 2 3 3 3 3 4 4 ...
##  $ qd6                                 : num [1:299] 4 4 3 3 3 4 3 2 3 3 ...
##  $ qd7                                 : num [1:299] 4 4 3 4 3 4 4 4 4 4 ...
##  $ qd8                                 : num [1:299] 4 4 3 4 3 3 3 3 4 2 ...
##  $ si9                                 : num [1:299] 2 3 4 2 3 3 3 4 3 3 ...
##  $ si10                                : num [1:299] 2 1 3 2 3 2 2 3 3 1 ...
##  $ si11                                : num [1:299] 3 4 4 3 2 3 3 1 4 2 ...
##  $ si12                                : num [1:299] 1 2 3 3 2 3 2 2 3 2 ...
##  $ rc13                                : num [1:299] 2 4 3 4 3 3 3 2 3 3 ...
##  $ rc14                                : num [1:299] 4 4 4 3 3 4 3 3 3 3 ...
##  $ t15                                 : num [1:299] 2 4 4 3 3 3 3 2 3 1 ...
##  $ t16                                 : num [1:299] 3 5 3 4 4 4 4 3 4 2 ...
##  $ ph17                                : num [1:299] 3 2 3 3 4 4 2 4 4 2 ...
##  $ ph18                                : num [1:299] 2 4 3 4 4 2 2 2 4 1 ...
##  $ ph19                                : num [1:299] 4 5 3 4 4 4 4 3 4 2 ...
##  $ a20                                 : num [1:299] 4 1 3 3 4 1 3 3 2 2 ...
##  $ a21                                 : num [1:299] 3 2 1 2 1 1 1 3 1 1 ...
##  $ a22                                 : num [1:299] 2 1 1 2 1 1 1 2 1 1 ...
##  $ a23                                 : num [1:299] 3 3 1 3 3 1 3 3 1 3 ...
##  $ a24                                 : num [1:299] 2 2 1 4 3 1 3 1 1 3 ...
##  $ a25                                 : num [1:299] 2 2 1 4 3 3 3 3 1 3 ...
##  $ a26                                 : num [1:299] 2 2 3 2 3 2 2 2 1 1 ...
##  $ a27                                 : num [1:299] 2 2 1 2 3 2 3 3 1 1 ...
##  $ a28                                 : num [1:299] 2 2 1 2 3 2 2 2 1 2 ...
##  $ a29                                 : num [1:299] 3 3 4 4 4 3 3 3 5 2 ...
##  $ a30                                 : num [1:299] 3 3 5 3 4 3 3 3 5 4 ...
##  $ a31                                 : num [1:299] 3 4 5 3 4 3 3 3 5 4 ...
##  $ a32                                 : num [1:299] 3 3 4 3 4 3 2 3 5 3 ...
##  $ a33                                 : num [1:299] 3 2 4 3 4 3 3 3 4 2 ...
##  $ a34                                 : num [1:299] 2 1 3 3 4 3 1 3 3 2 ...
##  $ a35                                 : num [1:299] 3 4 3 3 4 3 3 3 4 3 ...
##  $ literacy_score                      : num [1:299] 102 105 102 107 111 98 97 98 108 84 ...

install.packages("GGally")

## Installing package into 'C:/Users/H P/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)

## package 'GGally' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\H P\AppData\Local\Temp\RtmpEtPOJ0\downloaded_packages

library(GGally)

## Warning: package 'GGally' was built under R version 4.4.2

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

Data1 %>% 
  select(duration_of_employment, current_hospital_posting, previous_experience_of_being_bullied, family_members_spouse, peers_friends, literacy_score) %>% 
  ggpairs()

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

slrdataA <- lm(literacy_score ~ family_members_spouse, data = Data1)
summary(slrdataA)

## 
## Call:
## lm(formula = literacy_score ~ family_members_spouse, data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.724  -4.724  -0.724   4.276  51.097 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               98.7239     0.4921 200.601  < 2e-16 ***
## family_members_spouseYes   5.1793     1.5284   3.389 0.000797 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.057 on 297 degrees of freedom
## Multiple R-squared:  0.03722,    Adjusted R-squared:  0.03398 
## F-statistic: 11.48 on 1 and 297 DF,  p-value: 0.0007971

tidy(slrdataA, conf.int= TRUE)

## # A tibble: 2 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)             98.7      0.492    201.   4.02e-319    97.8      99.7 
## 2 family_members_spou…     5.18     1.53       3.39 7.97e-  4     2.17      8.19

tbl_regression(slrdataA)

Characteristic	Beta	95% CI¹	p-value
family_members_spouse
No	—	—
Yes	5.2	2.2, 8.2	<0.001
¹ CI = Confidence Interval

slrdataB <- lm(literacy_score ~ peers_friends, data = Data1)
summary(slrdataB)

## 
## Call:
## lm(formula = literacy_score ~ peers_friends, data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.632  -4.793  -0.632   4.207  54.207 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       98.6321     0.5599 176.173   <2e-16 ***
## peers_friendsYes   2.1610     1.0379   2.082   0.0382 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.152 on 297 degrees of freedom
## Multiple R-squared:  0.01439,    Adjusted R-squared:  0.01107 
## F-statistic: 4.335 on 1 and 297 DF,  p-value: 0.03819

tidy(slrdataB, conf.int= TRUE)

## # A tibble: 2 × 7
##   term             estimate std.error statistic   p.value conf.low conf.high
##   <chr>               <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)         98.6      0.560    176.   1.63e-302   97.5       99.7 
## 2 peers_friendsYes     2.16     1.04       2.08 3.82e-  2    0.118      4.20

tbl_regression(slrdataB)

Characteristic	Beta	95% CI¹	p-value
peers_friends
No	—	—
Yes	2.2	0.12, 4.2	0.038
¹ CI = Confidence Interval

slrdataC <- lm(literacy_score ~ current_hospital_posting, data = Data1)
summary(slrdataC)

## 
## Call:
## lm(formula = literacy_score ~ current_hospital_posting, data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.469  -4.972  -0.469   4.531  56.028 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   98.4687     0.8342 118.043   <2e-16 ***
## current_hospital_postingHSIP   2.8646     1.3667   2.096   0.0369 *  
## current_hospital_postingHTM    0.5049     1.5665   0.322   0.7474    
## current_hospital_postingHUSM   0.5035     1.1465   0.439   0.6609    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.173 on 295 degrees of freedom
## Multiple R-squared:  0.01584,    Adjusted R-squared:  0.005832 
## F-statistic: 1.583 on 3 and 295 DF,  p-value: 0.1936

tidy(slrdataC, conf.int= TRUE)

## # A tibble: 4 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)            98.5       0.834   118.    2.38e-250   96.8      100.  
## 2 current_hospital_po…    2.86      1.37      2.10  3.69e-  2    0.175      5.55
## 3 current_hospital_po…    0.505     1.57      0.322 7.47e-  1   -2.58       3.59
## 4 current_hospital_po…    0.503     1.15      0.439 6.61e-  1   -1.75       2.76

tbl_regression(slrdataC)

Characteristic	Beta	95% CI¹	p-value
current_hospital_posting
HRPZ	—	—
HSIP	2.9	0.17, 5.6	0.037
HTM	0.50	-2.6, 3.6	0.7
HUSM	0.50	-1.8, 2.8	0.7
¹ CI = Confidence Interval

slrdata1 <- lm(literacy_score ~ duration_of_employment, data = Data1)
summary(slrdata1)

## 
## Call:
## lm(formula = literacy_score ~ duration_of_employment, data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.156  -4.983  -0.654   4.761  54.191 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            101.47225    1.16035  87.450   <2e-16 ***
## duration_of_employment  -0.16580    0.07949  -2.086   0.0379 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.151 on 297 degrees of freedom
## Multiple R-squared:  0.01444,    Adjusted R-squared:  0.01112 
## F-statistic:  4.35 on 1 and 297 DF,  p-value: 0.03786

tidy(slrdata1, conf.int= TRUE)

## # A tibble: 2 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)           101.       1.16       87.4  5.23e-214   99.2   104.     
## 2 duration_of_employm…   -0.166    0.0795     -2.09 3.79e-  2   -0.322  -0.00936

tbl_regression(slrdata1)

Characteristic	Beta	95% CI¹	p-value
duration_of_employment	-0.17	-0.32, -0.01	0.038
¹ CI = Confidence Interval

slrdata2 <- lm(literacy_score ~ exposure_to_psychiatry_posting, data = Data1)
summary(slrdata2)

## 
## Call:
## lm(formula = literacy_score ~ exposure_to_psychiatry_posting, 
##     data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.277  -5.277  -1.277   4.723  55.723 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                        99.2770     0.4772 208.058   <2e-16 ***
## exposure_to_psychiatry_postingYes  -1.6104     4.7636  -0.338    0.736    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.209 on 297 degrees of freedom
## Multiple R-squared:  0.0003846,  Adjusted R-squared:  -0.002981 
## F-statistic: 0.1143 on 1 and 297 DF,  p-value: 0.7356

tidy(slrdata2, conf.int= TRUE)

## # A tibble: 2 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)             99.3      0.477   208.    9.88e-324     98.3    100.  
## 2 exposure_to_psychia…    -1.61     4.76     -0.338 7.36e-  1    -11.0      7.76

tbl_regression(slrdata2)

Characteristic	Beta	95% CI¹	p-value
exposure_to_psychiatry_posting
No	—	—
Yes	-1.6	-11, 7.8	0.7
¹ CI = Confidence Interval

slrdata3 <- lm(literacy_score ~ previous_experience_of_being_bullied, data = Data1)
summary(slrdata3)

## 
## Call:
## lm(formula = literacy_score ~ previous_experience_of_being_bullied, 
##     data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.879  -4.879  -0.553   4.447  54.121 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              98.5529     0.5644 174.604   <2e-16
## previous_experience_of_being_bulliedYes   2.3262     1.0231   2.274   0.0237
##                                            
## (Intercept)                             ***
## previous_experience_of_being_bulliedYes *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.14 on 297 degrees of freedom
## Multiple R-squared:  0.01711,    Adjusted R-squared:  0.0138 
## F-statistic: 5.169 on 1 and 297 DF,  p-value: 0.0237

slrdata3 <- lm(literacy_score ~ previous_experience_of_being_bullied, data = Data1)
summary(slrdata3)

## 
## Call:
## lm(formula = literacy_score ~ previous_experience_of_being_bullied, 
##     data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.879  -4.879  -0.553   4.447  54.121 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                              98.5529     0.5644 174.604   <2e-16
## previous_experience_of_being_bulliedYes   2.3262     1.0231   2.274   0.0237
##                                            
## (Intercept)                             ***
## previous_experience_of_being_bulliedYes *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.14 on 297 degrees of freedom
## Multiple R-squared:  0.01711,    Adjusted R-squared:  0.0138 
## F-statistic: 5.169 on 1 and 297 DF,  p-value: 0.0237

tidy(slrdata3, conf.int= TRUE)

## # A tibble: 2 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)             98.6      0.564    175.   2.27e-301   97.4       99.7 
## 2 previous_experience…     2.33     1.02       2.27 2.37e-  2    0.313      4.34

tbl_regression(slrdata3)

Characteristic	Beta	95% CI¹	p-value
previous_experience_of_being_bullied
No	—	—
Yes	2.3	0.31, 4.3	0.024
¹ CI = Confidence Interval

install.packages("broom")

## Warning: package 'broom' is in use and will not be installed

library(broom)
tidy(slrdata1, conf.int= TRUE)

## # A tibble: 2 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)           101.       1.16       87.4  5.23e-214   99.2   104.     
## 2 duration_of_employm…   -0.166    0.0795     -2.09 3.79e-  2   -0.322  -0.00936

tbl_regression(slrdata1)

Characteristic	Beta	95% CI¹	p-value
duration_of_employment	-0.17	-0.32, -0.01	0.038
¹ CI = Confidence Interval

tidy(slrdata2, conf.int= TRUE)

## # A tibble: 2 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)             99.3      0.477   208.    9.88e-324     98.3    100.  
## 2 exposure_to_psychia…    -1.61     4.76     -0.338 7.36e-  1    -11.0      7.76

tidy(slrdata3, conf.int= TRUE)

## # A tibble: 2 × 7
##   term                 estimate std.error statistic   p.value conf.low conf.high
##   <chr>                   <dbl>     <dbl>     <dbl>     <dbl>    <dbl>     <dbl>
## 1 (Intercept)             98.6      0.564    175.   2.27e-301   97.4       99.7 
## 2 previous_experience…     2.33     1.02       2.27 2.37e-  2    0.313      4.34

install.packages("rsq")

## Warning: package 'rsq' is in use and will not be installed

library(rsq)
rsq(slrdata1)

## [1] 0.01443512

rsq(slrdata2)

## [1] 0.0003846313

rsq(slrdata3)

## [1] 0.01710795

rsq(slrdataA)

## [1] 0.03722474

rsq(slrdataB)

## [1] 0.01438671

rsq(slrdataC)

## [1] 0.01584071

mlrdata1 <- lm(literacy_score ~ duration_of_employment + current_hospital_posting + previous_experience_of_being_bullied + family_members_spouse + peers_friends, data = Data1)
summary(mlrdata1)

## 
## Call:
## lm(formula = literacy_score ~ duration_of_employment + current_hospital_posting + 
##     previous_experience_of_being_bullied + family_members_spouse + 
##     peers_friends, data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.136  -4.873  -0.020   4.363  48.467 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             98.72898    1.47587  66.895  < 2e-16
## duration_of_employment                  -0.13017    0.07935  -1.641  0.10197
## current_hospital_postingHSIP             2.57927    1.34851   1.913  0.05677
## current_hospital_postingHTM              0.87073    1.54378   0.564  0.57317
## current_hospital_postingHUSM             0.74146    1.12609   0.658  0.51078
## previous_experience_of_being_bulliedYes  2.01095    1.01586   1.980  0.04870
## family_members_spouseYes                 4.46020    1.54353   2.890  0.00415
## peers_friendsYes                         1.11174    1.05039   1.058  0.29075
##                                            
## (Intercept)                             ***
## duration_of_employment                     
## current_hospital_postingHSIP            .  
## current_hospital_postingHTM                
## current_hospital_postingHUSM               
## previous_experience_of_being_bulliedYes *  
## family_members_spouseYes                ** 
## peers_friendsYes                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.96 on 291 degrees of freedom
## Multiple R-squared:  0.07917,    Adjusted R-squared:  0.05702 
## F-statistic: 3.574 on 7 and 291 DF,  p-value: 0.001048

rsq(mlrdata1)

## [1] 0.07916771

mlrdata2 <- lm(literacy_score ~ duration_of_employment + previous_experience_of_being_bullied + family_members_spouse + peers_friends, data = Data1)
summary(mlrdata2)

## 
## Call:
## lm(formula = literacy_score ~ duration_of_employment + previous_experience_of_being_bullied + 
##     family_members_spouse + peers_friends, data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.736  -4.890  -0.318   4.679  48.115 
## 
## Coefficients:
##                                         Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             99.78907    1.25818  79.312   <2e-16
## duration_of_employment                  -0.14540    0.07843  -1.854   0.0648
## previous_experience_of_being_bulliedYes  1.85483    1.00896   1.838   0.0670
## family_members_spouseYes                 4.52538    1.54195   2.935   0.0036
## peers_friendsYes                         1.29702    1.03887   1.248   0.2128
##                                            
## (Intercept)                             ***
## duration_of_employment                  .  
## previous_experience_of_being_bulliedYes .  
## family_members_spouseYes                ** 
## peers_friendsYes                           
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.97 on 294 degrees of freedom
## Multiple R-squared:  0.06746,    Adjusted R-squared:  0.05477 
## F-statistic: 5.317 on 4 and 294 DF,  p-value: 0.0003795

rsq(mlrdata2)

## [1] 0.06745943

mlrdata3 <- lm(literacy_score ~ duration_of_employment + previous_experience_of_being_bullied + family_members_spouse, data = Data1)
summary(mlrdata3)

## 
## Call:
## lm(formula = literacy_score ~ duration_of_employment + previous_experience_of_being_bullied + 
##     family_members_spouse, data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.026  -4.945  -0.497   4.605  48.603 
## 
## Coefficients:
##                                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                             100.28876    1.19395  83.997  < 2e-16
## duration_of_employment                   -0.15801    0.07785  -2.030  0.04329
## previous_experience_of_being_bulliedYes   1.89782    1.00933   1.880  0.06105
## family_members_spouseYes                  4.84211    1.52238   3.181  0.00163
##                                            
## (Intercept)                             ***
## duration_of_employment                  *  
## previous_experience_of_being_bulliedYes .  
## family_members_spouseYes                ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.977 on 295 degrees of freedom
## Multiple R-squared:  0.06252,    Adjusted R-squared:  0.05298 
## F-statistic: 6.557 on 3 and 295 DF,  p-value: 0.0002637

rsq(mlrdata3)

## [1] 0.06251529

mlrdata4 <- lm(literacy_score ~ duration_of_employment + family_members_spouse, data = Data1)
summary(mlrdata4)

## 
## Call:
## lm(formula = literacy_score ~ duration_of_employment + family_members_spouse, 
##     data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.637  -4.773  -0.637   4.691  49.593 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              100.90876    1.15242  87.562  < 2e-16 ***
## duration_of_employment    -0.16361    0.07813  -2.094 0.037103 *  
## family_members_spouseYes   5.15302    1.51984   3.391 0.000792 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.011 on 296 degrees of freedom
## Multiple R-squared:  0.05128,    Adjusted R-squared:  0.04487 
## F-statistic:     8 on 2 and 296 DF,  p-value: 0.0004135

rsq(mlrdata4)

## [1] 0.05127984

# forward
mlr_data4_stepforward <- step(mlrdata4, scope = ~ duration_of_employment + family_members_spouse, direction = "forward")

## Start:  AIC=1247.32
## literacy_score ~ duration_of_employment + family_members_spouse

mlr_data3_stepforward <- step(mlrdata3, scope = ~ duration_of_employment + previous_experience_of_being_bullied + family_members_spouse, direction = "forward")

## Start:  AIC=1245.76
## literacy_score ~ duration_of_employment + previous_experience_of_being_bullied + 
##     family_members_spouse

# forward
mlr_data2_stepforward <- step(mlrdata2, scope = ~ duration_of_employment + previous_experience_of_being_bullied + family_members_spouse + peers_friends, direction = "forward")

## Start:  AIC=1246.18
## literacy_score ~ duration_of_employment + previous_experience_of_being_bullied + 
##     family_members_spouse + peers_friends

# Backward
mlr_data4_stepbackward <- step(mlrdata4, direction = "backward")

## Start:  AIC=1247.32
## literacy_score ~ duration_of_employment + family_members_spouse
## 
##                          Df Sum of Sq   RSS    AIC
## <none>                                18997 1247.3
## - duration_of_employment  1    281.43 19278 1249.7
## - family_members_spouse   1    737.77 19735 1256.7

# Both
mlr_data4_stepwise <- step(mlrdata4, direction = "both")

## Start:  AIC=1247.32
## literacy_score ~ duration_of_employment + family_members_spouse
## 
##                          Df Sum of Sq   RSS    AIC
## <none>                                18997 1247.3
## - duration_of_employment  1    281.43 19278 1249.7
## - family_members_spouse   1    737.77 19735 1256.7

prelim_mlr_data1 <- lm(literacy_score ~ duration_of_employment + family_members_spouse, data = Data1)
summary(prelim_mlr_data1, conf.int = TRUE)

## 
## Call:
## lm(formula = literacy_score ~ duration_of_employment + family_members_spouse, 
##     data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.637  -4.773  -0.637   4.691  49.593 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              100.90876    1.15242  87.562  < 2e-16 ***
## duration_of_employment    -0.16361    0.07813  -2.094 0.037103 *  
## family_members_spouseYes   5.15302    1.51984   3.391 0.000792 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.011 on 296 degrees of freedom
## Multiple R-squared:  0.05128,    Adjusted R-squared:  0.04487 
## F-statistic:     8 on 2 and 296 DF,  p-value: 0.0004135

tbl_regression(prelim_mlr_data1)

Characteristic	Beta	95% CI¹	p-value
duration_of_employment	-0.16	-0.32, -0.01	0.037
family_members_spouse
No	—	—
Yes	5.2	2.2, 8.1	<0.001
¹ CI = Confidence Interval

rsq(prelim_mlr_data1, adj = TRUE)

## [1] 0.04486957

plot(prelim_mlr_data1)

summary(glm(literacy_score ~ duration_of_employment + family_members_spouse + duration_of_employment*family_members_spouse,   data = Data1))

## 
## Call:
## glm(formula = literacy_score ~ duration_of_employment + family_members_spouse + 
##     duration_of_employment * family_members_spouse, data = Data1)
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                     99.98327    1.21132  82.541
## duration_of_employment                          -0.09430    0.08309  -1.135
## family_members_spouseYes                        12.25985    3.41060   3.595
## duration_of_employment:family_members_spouseYes -0.53781    0.23147  -2.323
##                                                 Pr(>|t|)    
## (Intercept)                                      < 2e-16 ***
## duration_of_employment                          0.257321    
## family_members_spouseYes                        0.000381 ***
## duration_of_employment:family_members_spouseYes 0.020836 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 63.23885)
## 
##     Null deviance: 20024  on 298  degrees of freedom
## Residual deviance: 18655  on 295  degrees of freedom
## AIC: 2094.4
## 
## Number of Fisher Scoring iterations: 2

prelim_mlr_Interaction <- lm(literacy_score ~ duration_of_employment + family_members_spouse + duration_of_employment*family_members_spouse,   data = Data1)
summary(prelim_mlr_Interaction)

## 
## Call:
## lm(formula = literacy_score ~ duration_of_employment + family_members_spouse + 
##     duration_of_employment * family_members_spouse, data = Data1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.097  -4.833  -0.852   4.186  45.285 
## 
## Coefficients:
##                                                 Estimate Std. Error t value
## (Intercept)                                     99.98327    1.21132  82.541
## duration_of_employment                          -0.09430    0.08309  -1.135
## family_members_spouseYes                        12.25985    3.41060   3.595
## duration_of_employment:family_members_spouseYes -0.53781    0.23147  -2.323
##                                                 Pr(>|t|)    
## (Intercept)                                      < 2e-16 ***
## duration_of_employment                          0.257321    
## family_members_spouseYes                        0.000381 ***
## duration_of_employment:family_members_spouseYes 0.020836 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.952 on 295 degrees of freedom
## Multiple R-squared:  0.06833,    Adjusted R-squared:  0.05885 
## F-statistic: 7.212 on 3 and 295 DF,  p-value: 0.0001097

tbl_regression(prelim_mlr_Interaction)

Characteristic	Beta	95% CI¹	p-value
duration_of_employment	-0.09	-0.26, 0.07	0.3
family_members_spouse
No	—	—
Yes	12	5.5, 19	<0.001
duration_of_employment * family_members_spouse
duration_of_employment * Yes	-0.54	-0.99, -0.08	0.021
¹ CI = Confidence Interval

rsq(prelim_mlr_Interaction, adj = TRUE)

## [1] 0.05885417

plot(prelim_mlr_Interaction)

augment(prelim_mlr_data1)

## # A tibble: 299 × 9
##    literacy_score duration_of_employment family_members_spouse .fitted  .resid
##             <dbl>                  <dbl> <chr>                   <dbl>   <dbl>
##  1            102                     10 No                       99.3   2.73 
##  2            105                      9 No                       99.4   5.56 
##  3            102                      9 Yes                     105.   -2.59 
##  4            107                      9 No                       99.4   7.56 
##  5            111                     10 No                       99.3  11.7  
##  6             98                      8 No                       99.6  -1.60 
##  7             97                     10 No                       99.3  -2.27 
##  8             98                     23 No                       97.1   0.854
##  9            108                      9 No                       99.4   8.56 
## 10             84                      9 No                       99.4 -15.4  
## # ℹ 289 more rows
## # ℹ 4 more variables: .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>

augment(prelim_mlr_Interaction)

## # A tibble: 299 × 9
##    literacy_score duration_of_employment family_members_spouse .fitted  .resid
##             <dbl>                  <dbl> <chr>                   <dbl>   <dbl>
##  1            102                     10 No                       99.0   2.96 
##  2            105                      9 No                       99.1   5.87 
##  3            102                      9 Yes                     107.   -4.55 
##  4            107                      9 No                       99.1   7.87 
##  5            111                     10 No                       99.0  12.0  
##  6             98                      8 No                       99.2  -1.23 
##  7             97                     10 No                       99.0  -2.04 
##  8             98                     23 No                       97.8   0.186
##  9            108                      9 No                       99.1   8.87 
## 10             84                      9 No                       99.1 -15.1  
## # ℹ 289 more rows
## # ℹ 4 more variables: .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>

summary(Data1$duration_of_employment)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.00    8.00   12.00   13.34   18.00   24.00

newdata <- expand_grid(
  duration_of_employment = c(8, 12, 18))

newprediction <- augment(slrdata1, newdata = newdata)

newprediction %>% 
  ggplot(aes(duration_of_employment, .fitted)) + geom_line()

# Install and load the broom package if not already installed
install.packages("broom")

## Warning: package 'broom' is in use and will not be installed

library(broom)

# Add predictions to the dataset
Data1_augmented <- augment(prelim_mlr_Interaction, data = Data1)

ggplot(Data1_augmented, aes(x = duration_of_employment, y = .fitted, color = family_members_spouse)) +
  geom_line(size = 1) +  # Add interaction lines
  geom_point(aes(y = literacy_score), alpha = 0.6) +  # Overlay observed points
  labs(
    title = "Interaction Effect: Duration of Employment and Family Members (Spouse)",
    x = "Duration of Employment",
    y = "Literacy Score",
    color = "Family Members (Spouse)"
  ) +
  theme_minimal()

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(Data1_augmented, aes(x = duration_of_employment, y = .fitted)) +
  geom_line(color = "blue", size = 1) +
  geom_point(aes(y = literacy_score), alpha = 0.6) +  # Observed points
  facet_wrap(~family_members_spouse) +
  labs(
    title = "Interaction Effect by Family Members (Spouse)",
    x = "Duration of Employment",
    y = "Literacy Score"
  ) +
  theme_minimal()

# Add confidence intervals
Data1_augmented <- Data1_augmented %>%
  mutate(conf_low = predict(prelim_mlr_Interaction, newdata = Data1, interval = "confidence")[, "lwr"],
         conf_high = predict(prelim_mlr_Interaction, newdata = Data1, interval = "confidence")[, "upr"])

# Plot with confidence intervals
ggplot(Data1_augmented, aes(x = duration_of_employment, y = .fitted, color = family_members_spouse)) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = conf_low, ymax = conf_high, fill = family_members_spouse), alpha = 0.2) +
  geom_point(aes(y = literacy_score), alpha = 0.6) +
  labs(
    title = "Interaction Effect with Confidence Intervals",
    x = "Duration of Employment",
    y = "Literacy Score",
    color = "Family Members (Spouse)",
    fill = "Family Members (Spouse)"
  ) +
  theme_minimal()

# Create a new dataset
new_data <- expand.grid(
  duration_of_employment = c(8, 12, 18),  # Replace with your values
  family_members_spouse = c("Yes", "No")      # Two levels: "Yes" and "No"
)

print(new_data)

##   duration_of_employment family_members_spouse
## 1                      8                   Yes
## 2                     12                   Yes
## 3                     18                   Yes
## 4                      8                    No
## 5                     12                    No
## 6                     18                    No

# Make predictions with confidence intervals
predictions <- predict(
  prelim_mlr_Interaction,          # Your linear model
  newdata = new_data,              # New dataset
  interval = "confidence"          # Options: "confidence" or "prediction"
)

# Combine predictions with the new data
predicted_data <- cbind(new_data, predictions)

print(predicted_data)

##   duration_of_employment family_members_spouse       fit       lwr       upr
## 1                      8                   Yes 107.18617 103.61161 110.76072
## 2                     12                   Yes 104.65769 101.80135 107.51403
## 3                     18                   Yes 100.86498  97.38969 104.34026
## 4                      8                    No  99.22883  97.93244 100.52522
## 5                     12                    No  98.85161  97.87029  99.83294
## 6                     18                    No  98.28579  97.06470  99.50687

# Visualize the predictions
ggplot(predicted_data, aes(x = duration_of_employment, y = fit, color = family_members_spouse)) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  geom_ribbon(aes(ymin = lwr, ymax = upr, fill = family_members_spouse), alpha = 0.2) +
  labs(
    title = "Predicted Literacy Score",
    x = "Duration of Employment",
    y = "Predicted Literacy Score",
    color = "Family Members (Spouse)",
    fill = "Family Members (Spouse)"
  ) +
  theme_minimal()

^

Literacy score among HO = 99.98 + -0.09(duration of employment) + 12.25(family member spouse) + -0.54(duration of employment*family member spouse)

#with R2 = 6.83, Adj : 5.85

MLR- assignment

Dr Omar Bin Nazmi, Dr Syuaib, Dr Hafiz and Dr Zaim

2024-12-01

^

Literacy score among HO = 99.98 + -0.09(duration of employment) + 12.25(family member spouse) + -0.54(duration of employment*family member spouse)