Kullanılan Paketle
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
##
## Attaching package: 'stevemisc'
##
## The following object is masked from 'package:lubridate':
##
## dst
##
## The following object is masked from 'package:dplyr':
##
## tbl_df
library(knitr)
library(summarytools)
##
## Attaching package: 'summarytools'
##
## The following object is masked from 'package:tibble':
##
## view
library(outliers)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
## Zorunlu paket yükleniyor: ggpp
## Registered S3 methods overwritten by 'ggpp':
## method from
## heightDetails.titleGrob ggplot2
## widthDetails.titleGrob ggplot2
##
## Attaching package: 'ggpp'
##
## The following object is masked from 'package:ggplot2':
##
## annotate
##
## Attaching package: 'psych'
##
## The following object is masked from 'package:outliers':
##
## outlier
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
##
## Attaching package: 'sur'
##
## The following object is masked from 'package:psych':
##
## skew
library(moments)
library(corrplot)
## corrplot 0.95 loaded
##
## Attaching package: 'olsrr'
##
## The following object is masked from 'package:datasets':
##
## rivers
library(dplyr)
library(naniar)
Çok Değişkenli Normallik Sayıltısı
library(sur)
attach(veri) # çarpıklık değerini veren kod
## The following object is masked from package:datasets:
##
## pressure
## [1] 1.269711
se.skew(age) #çarpıklığın standart hatası
## [1] 0.1059011
skew.ratio(age) #çarpıklık değerinin çarpıklığın standart hatasına bölme
## [1] 11.9896
skew(age) /se.skew(age) #yukarıdaki değerle aynı değer
## [1] 11.9896
library(moments) #çarpıklık değerinin hipotaz testi kontrolü
library(labelled)
jarque.test(remove_labels(age))
##
## Jarque-Bera Normality Test
##
## data: remove_labels(age)
## JB = 172.89, p-value < 2.2e-16
## alternative hypothesis: greater
jarque.test(remove_labels(mass))
##
## Jarque-Bera Normality Test
##
## data: remove_labels(mass)
## JB = 70.222, p-value = 5.551e-16
## alternative hypothesis: greater
## [1] 5.947561
Doğrusallık
plot(veri$mass, veri$age, main = "age ~ mass", xlab = "mass", ylab = "age", pch = 19, col = "blue")

plot(veri$insulin, veri$age, main = "age ~ insulin", xlab = "insulin", ylab = "age", pch = 19, col = "blue")

plot(veri$triceps, veri$age, main = "age ~ triceps", xlab = "triceps", ylab = "age", pch = 19, col = "blue")

# Veri Dönüştürme
age1 <- log(age+1)
describe(age)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 532 31.61 10.76 28 29.95 8.9 21 81 60 1.27 1.15 0.47
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 532 3.44 0.3 3.37 3.41 0.32 3.09 4.41 1.32 0.75 -0.44 0.01
Çoklu Bağlantı ve Tekillik
cor(veri[,1:8]) %>% kable(digit=2)
pregnant |
1.00 |
0.13 |
0.20 |
0.10 |
0.07 |
0.01 |
0.01 |
0.64 |
glucose |
0.13 |
1.00 |
0.22 |
0.23 |
0.50 |
0.25 |
0.17 |
0.28 |
pressure |
0.20 |
0.22 |
1.00 |
0.23 |
0.09 |
0.31 |
0.01 |
0.35 |
triceps |
0.10 |
0.23 |
0.23 |
1.00 |
0.16 |
0.65 |
0.12 |
0.16 |
insulin |
0.07 |
0.50 |
0.09 |
0.16 |
1.00 |
0.20 |
0.12 |
0.18 |
mass |
0.01 |
0.25 |
0.31 |
0.65 |
0.20 |
1.00 |
0.15 |
0.07 |
pedigree |
0.01 |
0.17 |
0.01 |
0.12 |
0.12 |
0.15 |
1.00 |
0.07 |
age |
0.64 |
0.28 |
0.35 |
0.16 |
0.18 |
0.07 |
0.07 |
1.00 |
library(ggcorrplot)
ggcorrplot(cor(veri[,1:8]),lab=TRUE,lab_size = 4)

cor_mat <- cor(veri[,1:8])
cor_pmat <- ggcorrplot::cor_pmat(x = veri[,1:8])
ggcorrplot::ggcorrplot(cor_mat,lab = T, lab_size = 4,p.mat = cor_pmat)

library(PerformanceAnalytics)
## Zorunlu paket yükleniyor: xts
## Zorunlu paket yükleniyor: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## Attaching package: 'PerformanceAnalytics'
## The following objects are masked from 'package:moments':
##
## kurtosis, skewness
## The following object is masked from 'package:graphics':
##
## legend
chart.Correlation(veri[, 1:8])

model <- lm(age ~ mass + glucose + insulin +pressure + triceps+ pedigree+ pregnant ,
data = veri)
library(olsrr)
ols_vif_tol(model) %>% kable(digit=2)
mass |
0.53 |
1.88 |
glucose |
0.69 |
1.44 |
insulin |
0.74 |
1.34 |
pressure |
0.84 |
1.19 |
triceps |
0.57 |
1.76 |
pedigree |
0.95 |
1.05 |
pregnant |
0.94 |
1.07 |
## Zorunlu paket yükleniyor: carData
##
## Attaching package: 'carData'
## The following objects are masked from 'package:sur':
##
## Anscombe, States
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## mass glucose insulin pressure triceps pedigree pregnant
## 1.881770 1.439573 1.343740 1.185145 1.756606 1.047364 1.069395
library(mctest)
mctest(model,type= "i")
##
## Call:
## imcdiag(mod = mod, method = method, corr = FALSE, vif = vif,
## tol = tol, conf = conf, cvif = cvif, ind1 = ind1, ind2 = ind2,
## leamer = leamer, all = all)
##
##
## All Individual Multicollinearity Diagnostics Result
##
## VIF TOL Wi Fi Leamer CVIF Klein IND1 IND2
## mass 1.8818 0.5314 77.1548 92.7622 0.7290 2.9446 0 0.0061 1.8995
## glucose 1.4396 0.6947 38.4626 46.2431 0.8335 2.2527 0 0.0079 1.2378
## insulin 1.3437 0.7442 30.0773 36.1615 0.8627 2.1027 0 0.0085 1.0370
## pressure 1.1851 0.8438 16.2002 19.4773 0.9186 1.8545 0 0.0096 0.6333
## triceps 1.7566 0.5693 66.2030 79.5949 0.7545 2.7488 0 0.0065 1.7460
## pedigree 1.0474 0.9548 4.1444 4.9827 0.9771 1.6389 0 0.0109 0.1833
## pregnant 1.0694 0.9351 6.0720 7.3003 0.9670 1.6734 0 0.0107 0.2631
##
## 1 --> COLLINEARITY is detected by the test
## 0 --> COLLINEARITY is not detected by the test
##
## insulin , triceps , pedigree , coefficient(s) are non-significant may be due to multicollinearity
##
## R-square of y on all x: 0.4936
##
## * use method argument to check which regressors may be the reason of collinearity
## ===================================
##
## Call:
## eigprop(mod = model)
##
## Eigenvalues CI (Intercept) mass glucose insulin pressure triceps
## 1 6.9094 1.0000 0.0004 0.0004 0.0009 0.0037 0.0005 0.0013
## 2 0.4369 3.9767 0.0002 0.0005 0.0006 0.0130 0.0000 0.0009
## 3 0.2780 4.9855 0.0001 0.0003 0.0025 0.2274 0.0003 0.0009
## 4 0.2315 5.4637 0.0052 0.0067 0.0002 0.5165 0.0073 0.0276
## 5 0.0794 9.3277 0.0304 0.0025 0.0569 0.0247 0.0357 0.5227
## 6 0.0342 14.2157 0.0289 0.0370 0.8977 0.1962 0.0862 0.0419
## 7 0.0175 19.8656 0.0020 0.7576 0.0046 0.0101 0.4315 0.3193
## 8 0.0131 22.9682 0.9328 0.1950 0.0367 0.0084 0.4384 0.0854
## pedigree pregnant
## 1 0.0048 0.0056
## 2 0.0543 0.8728
## 3 0.7358 0.0257
## 4 0.1845 0.0450
## 5 0.0038 0.0084
## 6 0.0039 0.0006
## 7 0.0097 0.0402
## 8 0.0031 0.0018
##
## ===============================
## Row 7==> mass, proportion 0.757614 >= 0.50
## Row 6==> glucose, proportion 0.897676 >= 0.50
## Row 4==> insulin, proportion 0.516464 >= 0.50
## Row 5==> triceps, proportion 0.522724 >= 0.50
## Row 3==> pedigree, proportion 0.735846 >= 0.50
## Row 2==> pregnant, proportion 0.872805 >= 0.50
Regresyon analizleri
#Bundan sonraki analizlerime age, mass, insulin değişkenleri ile
devam edeceğim
library(broom)
cor_1 <- cor.test(~ age + mass , data = veri)
tidy(cor_1) %>% kable(digit=3)
0.073 |
1.695 |
0.091 |
530 |
-0.012 |
0.157 |
Pearson’s product-moment correlation |
two.sided |
cor_1 <- cor.test(~ age + insulin , data = veri)
tidy(cor_1) %>% kable(digit=3)
0.176 |
4.124 |
0 |
530 |
0.093 |
0.257 |
Pearson’s product-moment correlation |
two.sided |
cor_1 <- cor.test(~ mass + insulin , data = veri)
tidy(cor_1) %>% kable(digit=3)
0.199 |
4.663 |
0 |
530 |
0.115 |
0.279 |
Pearson’s product-moment correlation |
two.sided |
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggpairs(veri[c("age", "mass", "insulin")])

library(scatterplot3d)
scatterplot3d(veri[c("age", "mass", "insulin")],
pch = 16,
color="steelblue",
angle=75)

scatterplot3d(veri[c("age", "mass", "insulin")],
pch = 16, color="steelblue",
angle=75,
box = FALSE,type = "h")

library(rgl)
plot3d(veri$age, veri$mass, veri$insulin,
xlab = "age", ylab = "mass",
zlab = "insulin",
type = "s",size = 1.5,col = "red")
rglwidget()