library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.1
## Warning: package 'tibble' was built under R version 4.3.1
## Warning: package 'tidyr' was built under R version 4.3.1
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.1
## Warning: package 'dplyr' was built under R version 4.3.1
## Warning: package 'stringr' was built under R version 4.3.1
## Warning: package 'forcats' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ltm)
## Warning: package 'ltm' was built under R version 4.3.3
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: msm
## Warning: package 'msm' was built under R version 4.3.3
## Loading required package: polycor
## Warning: package 'polycor' was built under R version 4.3.3
library(dplyr)
library(stats)
library(fastDummies)
## Warning: package 'fastDummies' was built under R version 4.3.3
## Thank you for using fastDummies!
## To acknowledge our work, please cite the package:
## Kaplan, J. & Schlegel, B. (2023). fastDummies: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables. Version 1.7.1. URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/.
df <- read.csv2("basak_sayisal_veriler.csv")
#glimpse(df)
extract_factors <- function (df,what,howmany,reduce=0,rotat="promax"){
  cat("________________ START --> ", what, "_____________________") 
  cat("\n")
  center <- function(x) { return (x - mean(x))}
  df_sub <- df %>% dplyr::select(starts_with(what)) %>% mutate(across(everything(), center)) 
  CA <- round(cronbach.alpha(df_sub) $ alpha,2)
  cat("\n")
  cat("cronbach_alpa =", CA) 
  cat("\n")
  if (reduce != 0) df_sub=df_sub[,-reduce]
  FA<- df_sub%>%factanal(.,howmany, scores ="regression",rotation=rotat)
  print(FA $ loadings)
  explained <- 1-FA $ uniquenesses
  barplot(explained,cex.names=0.7, col=1:length(explained),
        main="faktor analizining acikladigi oranlar", cex.main=0.8)
  cat("\n")
  cat("faktor analizining acikladigi oranlar:");cat("\n")
  explained_props <- as.data.frame(1-FA $ uniquenesses)
  colnames(explained_props) ="explained_variances"
  print(explained_props);cat("\n")
  cat("likelihood ratio test | p-value:", FA $ PVAL);  cat("\n")
  if(FA $ PVAL<0.05) print("factors are not sufficient") 
  else cat("\n", "factors are sufficient") 
  cat("\n")
  cat("________________ END _____________________")
 
  outcome <-list(FA,df_sub)
  return(outcome)
}
dummy_func <- function (df,this) {
  dummy <- df %>%
  dplyr::select(starts_with("isletmenin")) %>% 
  dplyr::select(c(this)) %>% 
  dummy_cols %>%
  dplyr::select(where(is.numeric))
  return(dummy)
}
show_model_details <- function(model_now){
  cat("\n")
  model_now %>% 
  cooks.distance %>% 
  plot(.,type="h",col="black",
       main=paste(model_now $call[2],"cooks distances (verilerin modele etkileri)"), cex.main = 0.6);abline(h=1,lty=2,col="red")
  cat(rep("##",3),sep="")
  paste("Y =", model_now $call[2]) %>% print 
  cat(rep("##",3),sep="")
  cat("\n")
  model_now %>% summary %>% print
  
}
df %>%
  dplyr::select(starts_with("isletmenin")) %>% names
## [1] "isletmenin.sektor.grubu."                                  
## [2] "isletmenin.unvani..adi..."                                 
## [3] "isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir."
## [4] "isletmenin.faaliyette.bulundugu.sure."                     
## [5] "isletmenin.olcegi."                                        
## [6] "isletmenin.calisan.sayisi."
df %>%
  dplyr::select(starts_with("isletmenin")) %>% 
  dplyr::select(c(3)) %>% table
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir.
##  Evet Hayir 
##    51    71
df %>%
  dplyr::select(starts_with("isletmenin")) %>% 
  dplyr::select(c(5,6)) %>% table
##                   isletmenin.calisan.sayisi.
## isletmenin.olcegi. 10 - 49 kisi 250 kisi ve uzeri 50 - 249 kisi
##              Buyuk            1                36            11
##              Kucuk            9                 0             7
##              Orta             9                 9            40
df %>%
  dplyr::select(starts_with("isletmenin")) %>% 
  dplyr::select(c(4)) %>% table
## isletmenin.faaliyette.bulundugu.sure.
##      1 - 10 yil     11 - 30 yil 30 yildan fazla 
##              15              51              56
make_model <- function(df,which_X){
dummies<- dummy_func(df,which_X)
df_pilot <- cbind(see_sur_scores,dummies) 
model_sosyal_cevresel_boyut <- lm(df_pilot $sosyal_cevresel_donusum~.,data = df_pilot[,-c(2,3)]) 
model_sosyal_cevresel_boyut%>%show_model_details
model_verimlilik_boyutu <- lm(df_pilot $ verimlilik_boyutu~.,data = df_pilot [,-c(1,3)]) 
model_verimlilik_boyutu%>%show_model_details}
see_sur <- extract_factors(df,"far_sur",2)
## ________________ START -->  far_sur _____________________
## 
## cronbach_alpa = 0.92
## 
## Loadings:
##                        Factor1 Factor2
## far_sur_kaynak                  1.044 
## far_sur_gelecek         0.542   0.365 
## far_sur_adil_is         0.791         
## far_sur_toplum          0.906  -0.110 
## far_sur_cevre_koruma    0.888         
## far_sur_paydas          0.608   0.216 
## far_sur_eko_performans  0.160   0.506 
## far_sur_calisan_hak     0.808         
## far_sur_tarim           0.678  -0.119 
## 
##                Factor1 Factor2
## SS loadings      4.040   1.562
## Proportion Var   0.449   0.174
## Cumulative Var   0.449   0.622

## 
## faktor analizining acikladigi oranlar:
##                        explained_variances
## far_sur_kaynak                   0.9950000
## far_sur_gelecek                  0.7264461
## far_sur_adil_is                  0.7079855
## far_sur_toplum                   0.6819977
## far_sur_cevre_koruma             0.7664654
## far_sur_paydas                   0.6143465
## far_sur_eko_performans           0.4042705
## far_sur_calisan_hak              0.7504223
## far_sur_tarim                    0.3518575
## 
## likelihood ratio test | p-value: 0.1037971
## 
##  factors are sufficient
## ________________ END _____________________
see_sur_scores <- see_sur[[1]] $ scores
colnames(see_sur_scores) <- c("sosyal_cevresel_donusum","verimlilik_boyutu")
see_sur_scores %>% boxplot(.,horizontal=TRUE,cex.axis=0.7,
                           col=1:dim(see_sur_scores)[2],
                           main = "Faktor analizinden gelen bagimli degiskenler", cex.main=0.7)

make_model(df,3)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(this)
## 
##   # Now:
##   data %>% select(all_of(this))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

## ######[1] "Y = df_pilot$sosyal_cevresel_donusum ~ ."
## ######
## 
## Call:
## lm(formula = df_pilot$sosyal_cevresel_donusum ~ ., data = df_pilot[, 
##     -c(2, 3)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.6723 -0.8867  0.4300  0.7770  3.6786 
## 
## Coefficients:
##                                                                  Estimate
## (Intercept)                                                       -0.2661
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir._Hayir   0.4572
##                                                                  Std. Error
## (Intercept)                                                          0.1965
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir._Hayir     0.2576
##                                                                  t value
## (Intercept)                                                       -1.354
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir._Hayir   1.775
##                                                                  Pr(>|t|)  
## (Intercept)                                                        0.1783  
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir._Hayir   0.0784 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.403 on 120 degrees of freedom
## Multiple R-squared:  0.02558,    Adjusted R-squared:  0.01746 
## F-statistic: 3.151 on 1 and 120 DF,  p-value: 0.07844

## ######[1] "Y = df_pilot$verimlilik_boyutu ~ ."
## ######
## 
## Call:
## lm(formula = df_pilot$verimlilik_boyutu ~ ., data = df_pilot[, 
##     -c(1, 3)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9027 -0.3443  0.1096  0.8542  5.4104 
## 
## Coefficients:
##                                                                  Estimate
## (Intercept)                                                        0.2063
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir._Hayir  -0.3545
##                                                                  Std. Error
## (Intercept)                                                          0.2043
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir._Hayir     0.2678
##                                                                  t value
## (Intercept)                                                        1.010
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir._Hayir  -1.324
##                                                                  Pr(>|t|)
## (Intercept)                                                         0.315
## isletmenin.yabanci.firmalarla.3kligi..isbirligi.var.midir._Hayir    0.188
## 
## Residual standard error: 1.459 on 120 degrees of freedom
## Multiple R-squared:  0.01439,    Adjusted R-squared:  0.00618 
## F-statistic: 1.752 on 1 and 120 DF,  p-value: 0.1881
make_model(df,4)

## ######[1] "Y = df_pilot$sosyal_cevresel_donusum ~ ."
## ######
## 
## Call:
## lm(formula = df_pilot$sosyal_cevresel_donusum ~ ., data = df_pilot[, 
##     -c(2, 3)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.5543 -0.8952  0.4718  0.8949  4.0530 
## 
## Coefficients:
##                                                         Estimate Std. Error
## (Intercept)                                               0.3496     0.3657
## `isletmenin.faaliyette.bulundugu.sure._11 - 30 yil`      -0.5328     0.4160
## `isletmenin.faaliyette.bulundugu.sure._30 yildan fazla`  -0.2764     0.4118
##                                                         t value Pr(>|t|)
## (Intercept)                                               0.956    0.341
## `isletmenin.faaliyette.bulundugu.sure._11 - 30 yil`      -1.281    0.203
## `isletmenin.faaliyette.bulundugu.sure._30 yildan fazla`  -0.671    0.503
## 
## Residual standard error: 1.416 on 119 degrees of freedom
## Multiple R-squared:  0.01585,    Adjusted R-squared:  -0.0006874 
## F-statistic: 0.9584 on 2 and 119 DF,  p-value: 0.3864

## ######[1] "Y = df_pilot$verimlilik_boyutu ~ ."
## ######
## 
## Call:
## lm(formula = df_pilot$verimlilik_boyutu ~ ., data = df_pilot[, 
##     -c(1, 3)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.1693 -0.2564 -0.0556  0.9551  5.1520 
## 
## Coefficients:
##                                                         Estimate Std. Error
## (Intercept)                                              -0.8140     0.3726
## `isletmenin.faaliyette.bulundugu.sure._11 - 30 yil`       0.9324     0.4238
## `isletmenin.faaliyette.bulundugu.sure._30 yildan fazla`   0.9242     0.4195
##                                                         t value Pr(>|t|)  
## (Intercept)                                              -2.185   0.0309 *
## `isletmenin.faaliyette.bulundugu.sure._11 - 30 yil`       2.200   0.0298 *
## `isletmenin.faaliyette.bulundugu.sure._30 yildan fazla`   2.203   0.0295 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.443 on 119 degrees of freedom
## Multiple R-squared:  0.04374,    Adjusted R-squared:  0.02767 
## F-statistic: 2.721 on 2 and 119 DF,  p-value: 0.06988
make_model(df,5)

## ######[1] "Y = df_pilot$sosyal_cevresel_donusum ~ ."
## ######
## 
## Call:
## lm(formula = df_pilot$sosyal_cevresel_donusum ~ ., data = df_pilot[, 
##     -c(2, 3)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3985 -0.8574  0.4428  0.9172  3.8892 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)               0.05096    0.20596   0.247    0.805
## isletmenin.olcegi._Kucuk -0.13362    0.41192  -0.324    0.746
## isletmenin.olcegi._Orta  -0.07033    0.27844  -0.253    0.801
## 
## Residual standard error: 1.427 on 119 degrees of freedom
## Multiple R-squared:  0.001054,   Adjusted R-squared:  -0.01573 
## F-statistic: 0.0628 on 2 and 119 DF,  p-value: 0.9392

## ######[1] "Y = df_pilot$verimlilik_boyutu ~ ."
## ######
## 
## Call:
## lm(formula = df_pilot$verimlilik_boyutu ~ ., data = df_pilot[, 
##     -c(1, 3)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.8541 -0.3437  0.0588  0.8463  5.1661 
## 
## Coefficients:
##                          Estimate Std. Error t value Pr(>|t|)
## (Intercept)                0.2057     0.2112   0.974    0.332
## isletmenin.olcegi._Kucuk  -0.1096     0.4223  -0.259    0.796
## isletmenin.olcegi._Orta   -0.4024     0.2855  -1.410    0.161
## 
## Residual standard error: 1.463 on 119 degrees of freedom
## Multiple R-squared:  0.01707,    Adjusted R-squared:  0.0005541 
## F-statistic: 1.034 on 2 and 119 DF,  p-value: 0.3589
make_model(df,6)

## ######[1] "Y = df_pilot$sosyal_cevresel_donusum ~ ."
## ######
## 
## Call:
## lm(formula = df_pilot$sosyal_cevresel_donusum ~ ., data = df_pilot[, 
##     -c(2, 3)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3963 -0.8559  0.4490  0.9585  3.9546 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                      0.2363     0.3265   0.724
## `isletmenin.calisan.sayisi._50 - 249 kisi`      -0.3212     0.3762  -0.854
## `isletmenin.calisan.sayisi._250 kisi ve uzeri`  -0.2267     0.3894  -0.582
##                                                Pr(>|t|)
## (Intercept)                                       0.471
## `isletmenin.calisan.sayisi._50 - 249 kisi`        0.395
## `isletmenin.calisan.sayisi._250 kisi ve uzeri`    0.562
## 
## Residual standard error: 1.423 on 119 degrees of freedom
## Multiple R-squared:  0.006114,   Adjusted R-squared:  -0.01059 
## F-statistic: 0.366 on 2 and 119 DF,  p-value: 0.6943

## ######[1] "Y = df_pilot$verimlilik_boyutu ~ ."
## ######
## 
## Call:
## lm(formula = df_pilot$verimlilik_boyutu ~ ., data = df_pilot[, 
##     -c(1, 3)])
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.9980 -0.2589 -0.0041  0.9329  5.3152 
## 
## Coefficients:
##                                                Estimate Std. Error t value
## (Intercept)                                    -0.12475    0.33780  -0.369
## `isletmenin.calisan.sayisi._50 - 249 kisi`      0.07181    0.38922   0.184
## `isletmenin.calisan.sayisi._250 kisi ve uzeri`  0.24565    0.40285   0.610
##                                                Pr(>|t|)
## (Intercept)                                       0.713
## `isletmenin.calisan.sayisi._50 - 249 kisi`        0.854
## `isletmenin.calisan.sayisi._250 kisi ve uzeri`    0.543
## 
## Residual standard error: 1.472 on 119 degrees of freedom
## Multiple R-squared:  0.004307,   Adjusted R-squared:  -0.01243 
## F-statistic: 0.2574 on 2 and 119 DF,  p-value: 0.7735