#1 Datanın qurulması;
a <- rnorm(100, mean = 315.4, sd = 43.2)
#2 Datanın statistik qiymətləri;
library(funModeling)
profiling_num(a)
NA
#1 Datanın vizual görünüşü;
hist(a)
#2 Q-Q plot;
library(ggpubr)
package 㤼㸱ggpubr㤼㸲 was built under R version 4.0.3
ggqqplot(a)
#3 Shapiro-Wilks;
shapiro.test(a)
Shapiro-Wilk normality test
data: a
W = 0.98788, p-value = 0.4993
t.test(a, mu = 300, alternative = "greater", conf.level = 0.99)
One Sample t-test
data: a
t = 3.0497, df = 99, p-value = 0.001469
alternative hypothesis: true mean is greater than 300
99 percent confidence interval:
303.0057 Inf
sample estimates:
mean of x
313.3792
library(inferr)
package 㤼㸱inferr㤼㸲 was built under R version 4.0.3
df <- data.frame(a)
infer_os_t_test(df, a, mu = 300, type = all)
One-Sample Statistics
-----------------------------------------------------------------------------------
Variable Obs Mean Std. Err. Std. Dev. [95% Conf. Interval]
-----------------------------------------------------------------------------------
a 100 313.3792 4.387 43.8701 304.6756 322.0851
-----------------------------------------------------------------------------------
Two Tail Test
---------------
Ho: mean(a) ~=300
Ha: mean(a) !=300
--------------------------------------------------------------------------------
Variable t DF Sig Mean Diff. [95% Conf. Interval]
--------------------------------------------------------------------------------
a 3.05 99 0.99853 13.3792 4.6756 22.0851
--------------------------------------------------------------------------------
library(DescTools)
SignTest(df$a, mu = 300)
One-sample Sign-Test
data: df$a
S = 61, number of differences = 100, p-value = 0.0352
alternative hypothesis: true median is not equal to 300
96.5 percent confidence interval:
301.2929 320.6618
sample estimates:
median of the differences
310.0481
prop.test(x = 92, n = 100, p = 0.88,
alternative = "greater", conf.level = 0.01)
1-sample proportions test with continuity correction
data: 92 out of 100, null probability 0.88
X-squared = 1.16, df = 1, p-value = 0.1407
alternative hypothesis: true p is greater than 0.88
1 percent confidence interval:
0.9603793 1.0000000
sample estimates:
p
0.92
#1. Datanın qurulması;
two_samples_data <- data.frame(
A = c(1.48, 1.75, 0.78, 2.85, 0.52, 1.60, 4.15, 3.97, 1.48, 3.10,
1.02, 0.53, 0.93, 1.60, 0.80, 1.05, 6.32, 3.93, 5.45, 0.97),
B = c(7.55, 3.75, 0.10, 1.10, 0.60, 0.52, 3.30, 2.10, 0.58, 4.02,
3.75, 0.65, 1.92, 0.60, 1.53, 4.23, 0.08, 1.48, 1.65, 0.72)
)
View(two_samples_data)
#2. Datanın statistik qiymətləri;
library(funModeling)
profiling_num(two_samples_data)
#3. İki asılı olmayan dataları vizual şəkildə göstərmək üçün ayrı ayrı dataların qurulub yeni data frame-də birləşdirilməsi;
A <- data.frame(numbers = two_samples_data$A, category = "A")
B <- data.frame(numbers = two_samples_data$B, category = "B")
AB <- rbind(A, B)
#4. Yeni qurulan datanın vizual görünüşü;
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages --------------------------------- tidyverse 1.3.0 --
v tibble 3.0.3 v dplyr 1.0.2
v tidyr 1.1.2 v stringr 1.4.0
v readr 1.4.0 v forcats 0.5.0
v purrr 0.3.4
-- Conflicts ------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
x dplyr::src() masks Hmisc::src()
x dplyr::summarize() masks Hmisc::summarize()
ggplot(AB, aes(category, numbers, fill = category)) +
geom_boxplot()
#1. Vizual 1;
ggplot(AB, aes(numbers, fill = category)) +
geom_histogram(color = "black", binwidth = 5, alpha = 0.5) +
facet_grid(category ~.)
#2. Vizual 2;
ggplot(AB, aes(numbers, fill = category)) +
geom_histogram(aes(y = ..density..),
color = "black", binwidth = 5, alpha = 0.5) +
geom_density(alpha = 0.3) +
facet_grid(category ~.)
#3. Numerik test;
apply(two_samples_data, 2, shapiro.test)
$A
Shapiro-Wilk normality test
data: newX[, i]
W = 0.84482, p-value = 0.00437
$B
Shapiro-Wilk normality test
data: newX[, i]
W = 0.84249, p-value = 0.004
AB[AB$category == "A",]$numbers
[1] 1.48 1.75 0.78 2.85 0.52 1.60 4.15 3.97 1.48 3.10 1.02 0.53 0.93 1.60
[15] 0.80 1.05 6.32 3.93 5.45 0.97
shapiro.test(AB[AB$category == "A",]$numbers)
Shapiro-Wilk normality test
data: AB[AB$category == "A", ]$numbers
W = 0.84482, p-value = 0.00437
AB[AB$category == "B",]$numbers
[1] 7.55 3.75 0.10 1.10 0.60 0.52 3.30 2.10 0.58 4.02 3.75 0.65 1.92 0.60
[15] 1.53 4.23 0.08 1.48 1.65 0.72
shapiro.test(AB[AB$category == "B",]$numbers)
Shapiro-Wilk normality test
data: AB[AB$category == "B", ]$numbers
W = 0.84249, p-value = 0.004
leveneTest(AB$numbers ~ AB$category, center = mean)
group coerced to factor.
Levene's Test for Homogeneity of Variance (center = mean)
Df F value Pr(>F)
group 1 0.0113 0.9159
38
t.test(AB$numbers ~ AB$category, var.equal = T)
Two Sample t-test
data: AB$numbers by AB$category
t = 0.35439, df = 38, p-value = 0.725
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.9542608 1.3592608
sample estimates:
mean in group A mean in group B
2.2140 2.0115
library(inferr)
infer_ts_ind_ttest(data = AB, x = category, y = numbers)
wilcox.test(two_samples_data$A, two_samples_data$B)
cannot compute exact p-value with ties
Wilcoxon rank sum test with continuity correction
data: two_samples_data$A and two_samples_data$B
W = 224.5, p-value = 0.5161
alternative hypothesis: true location shift is not equal to 0
prop.test(x = c(470, 134), n = c(1000, 871))
2-sample test for equality of proportions with continuity
correction
data: c(470, 134) out of c(1000, 871)
X-squared = 211.41, df = 1, p-value < 2.2e-16
alternative hypothesis: two.sided
95 percent confidence interval:
0.2759513 0.3563564
sample estimates:
prop 1 prop 2
0.4700000 0.1538462
#1 Datanın qurulması;
evvel <- c(158,189,202,353,416,426,441)
sonra <- c(284,214,101,227,290,176,290)
A <- data.frame(ortalama = evvel, evvel_sonra = "evvel")
B <- data.frame(ortalama = sonra, evvel_sonra = "sonra")
df <- rbind(A,B)
#2 Datanın statistik qiymətləri;
df %>%
dplyr::group_by(evvel_sonra) %>%
dplyr::summarise(mean(ortalama), sd(ortalama), var(ortalama))
`summarise()` ungrouping output (override with `.groups` argument)
#3 Datanın vizual görünüşü;
library(tidyverse)
ggplot(df, aes(evvel_sonra, ortalama)) +
geom_boxplot()
t.test(df$ortalama ~ df$evvel_sonra, paired = T)
Paired t-test
data: df$ortalama by df$evvel_sonra
t = 1.8425, df = 6, p-value = 0.115
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-28.26101 200.54672
sample estimates:
mean of the differences
86.14286
apply(data.frame(evvel, sonra), 2, shapiro.test)
$evvel
Shapiro-Wilk normality test
data: newX[, i]
W = 0.83244, p-value = 0.08438
$sonra
Shapiro-Wilk normality test
data: newX[, i]
W = 0.88197, p-value = 0.2354
shapiro.test(A[A$evvel_sonra == "evvel",]$ortalama)
Shapiro-Wilk normality test
data: A[A$evvel_sonra == "evvel", ]$ortalama
W = 0.83244, p-value = 0.08438
df1 <- data.frame(evvel, sonra)
infer_ts_paired_ttest(df1, x = evvel, y = sonra)
Paired Samples Statistics
------------------------------------------------------------------------------
Variables Obs Mean Std. Err. Std. Dev. [95% Conf. Interval]
------------------------------------------------------------------------------
evvel 7 312.1429 47.07614 124.5518 196.95 427.34
sonra 7 226.00 26.64851 70.50532 160.79 291.21
------------------------------------------------------------------------------
diff 7 86.14286 46.75439 123.7005 -28.27 200.55
------------------------------------------------------------------------------
Paired Samples Correlations
--------------------------------------------
Variables Obs Correlation Sig.
evvel & sonra 7 0.30 0.52
--------------------------------------------
Paired Samples Test
-------------------
Ho: mean(evvel - sonra) = 0
Ha: mean(evvel - sonra) ~= 0
---------------------------------------
Variables t df Sig.
---------------------------------------
evvel - sonra 1.8425 6 0.115
---------------------------------------
wilcox.test(df1$evvel, df1$sonra, paired = T)
cannot compute exact p-value with ties
Wilcoxon signed rank test with continuity correction
data: df1$evvel and df1$sonra
V = 23, p-value = 0.1479
alternative hypothesis: true location shift is not equal to 0
#1. Dataların qurulması;
A <- c(28,33,30,29,28,29,27,31,30,32,28,33,25,29,27,31,31,30,31,34,30,32,31,34,28,32,31,28,33,29)
B <- c(31,32,30,30,33,32,34,27,36,30,31,30,38,29,30,34,34,31,35,35,33,30,28,29,26,37,31,28,34,33)
C <- c(40,33,38,41,42,43,38,35,39,39,36,34,35,40,38,36,39,36,33,35,38,35,40,40,39,38,38,43,40,42)
#2. Qurulmuş dataların yeni bir data frame-də birləşdirilməsi;
A <- data.frame(vaxt = A, grup = "A")
B <- data.frame(vaxt = B, grup = "B")
C <- data.frame(vaxt = C, grup = "C")
df <- rbind(A,B,C)
#3. Statistik qiymətlər;
df %>%
group_by(grup) %>%
summarise(mean(vaxt), median(vaxt), sd(vaxt))
`summarise()` ungrouping output (override with `.groups` argument)
#4. Datanın vizual görünüşü;
library(tidyverse)
ggplot(df, aes(grup, vaxt, fill = grup)) +
geom_boxplot()
shapiro.test(df[df$grup == "A",]$vaxt)
Shapiro-Wilk normality test
data: df[df$grup == "A", ]$vaxt
W = 0.96974, p-value = 0.5322
shapiro.test(df[df$grup == "B",]$vaxt)
Shapiro-Wilk normality test
data: df[df$grup == "B", ]$vaxt
W = 0.97899, p-value = 0.798
shapiro.test(df[df$grup == "C",]$vaxt)
Shapiro-Wilk normality test
data: df[df$grup == "C", ]$vaxt
W = 0.95792, p-value = 0.2738
bartlett.test(vaxt ~ grup, data = df)
Bartlett test of homogeneity of variances
data: vaxt by grup
Bartlett's K-squared = 2.4009, df = 2, p-value = 0.3011
leveneTest(vaxt ~ grup, data = df)
group coerced to factor.
Levene's Test for Homogeneity of Variance (center = median)
Df F value Pr(>F)
group 2 1.0267 0.3625
87
aov(vaxt ~ grup, data = df)
Call:
aov(formula = vaxt ~ grup, data = df)
Terms:
grup Residuals
Sum of Squares 1068.8222 622.4667
Deg. of Freedom 2 87
Residual standard error: 2.674844
Estimated effects may be unbalanced
summary(aov(vaxt ~ grup, data = df))
Df Sum Sq Mean Sq F value Pr(>F)
grup 2 1068.8 534.4 74.69 <2e-16 ***
Residuals 87 622.5 7.2
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
TukeyHSD(aov(vaxt ~ grup, data = df))
Tukey multiple comparisons of means
95% family-wise confidence level
Fit: aov(formula = vaxt ~ grup, data = df)
$grup
diff lwr upr p adj
B-A 1.566667 -0.08015436 3.213488 0.0657253
C-A 7.966667 6.31984564 9.613488 0.0000000
C-B 6.400000 4.75317897 8.046821 0.0000000
infer_oneway_anova(df, vaxt, grup)
ANOVA
--------------------------------------------------------------------
Sum of
Squares DF Mean Square F Sig.
--------------------------------------------------------------------
Between Groups 1068.822 2 534.411 74.691 0.0000
Within Groups 622.467 87 7.155
Total 1691.289 89
--------------------------------------------------------------------
Report
------------------------------------------------
Category N Mean Std. Dev.
------------------------------------------------
A 30 30.133 2.224
B 30 31.700 2.938
C 30 38.100 2.808
------------------------------------------------
Number of obs = 90 R-squared = 0.632
Root MSE = 2.6748 Adj R-squared = 0.6235
kruskal.test(vaxt ~ grup, data = df)
Kruskal-Wallis rank sum test
data: vaxt by grup
Kruskal-Wallis chi-squared = 54.198, df = 2, p-value = 1.702e-12
#1 Datanın təyin olunması;
df <- mtcars
head(df)
#2 İki dəyişən arasında korelyasiyanın hesablanması;
library(ggpubr)
ggscatter(df, x = "mpg", y = "wt",
add = "reg.line",
conf.int = T,
cor.coef = T,
cor.method = "pearson")
shapiro.test(df$mpg)
Shapiro-Wilk normality test
data: df$mpg
W = 0.94756, p-value = 0.1229
shapiro.test(df$wt)
Shapiro-Wilk normality test
data: df$wt
W = 0.94326, p-value = 0.09265
cor.test(df$mpg, df$wt, method = "pearson")
Pearson's product-moment correlation
data: df$mpg and df$wt
t = -9.559, df = 30, p-value = 1.294e-10
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.9338264 -0.7440872
sample estimates:
cor
-0.8676594
cor.test(df$mpg, df$wt, method = "spearman")
Cannot compute exact p-value with ties
Spearman's rank correlation rho
data: df$mpg and df$wt
S = 10292, p-value = 1.488e-11
alternative hypothesis: true rho is not equal to 0
sample estimates:
rho
-0.886422
cor.test(df$mpg, df$wt, method = "kendall")
Cannot compute exact p-value with ties
Kendall's rank correlation tau
data: df$mpg and df$wt
z = -5.7981, p-value = 6.706e-09
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
-0.7278321
#1 Bütün dəyişənlərin korelyasiyası;
cor(df)
mpg cyl disp hp drat wt
mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
qsec vs am gear carb
mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
#2 Yalnız dəyərləri mövcud olanlar arasında korelyasiya;
cor(df, use = "complete.obs")
mpg cyl disp hp drat wt
mpg 1.0000000 -0.8521620 -0.8475514 -0.7761684 0.68117191 -0.8676594
cyl -0.8521620 1.0000000 0.9020329 0.8324475 -0.69993811 0.7824958
disp -0.8475514 0.9020329 1.0000000 0.7909486 -0.71021393 0.8879799
hp -0.7761684 0.8324475 0.7909486 1.0000000 -0.44875912 0.6587479
drat 0.6811719 -0.6999381 -0.7102139 -0.4487591 1.00000000 -0.7124406
wt -0.8676594 0.7824958 0.8879799 0.6587479 -0.71244065 1.0000000
qsec 0.4186840 -0.5912421 -0.4336979 -0.7082234 0.09120476 -0.1747159
vs 0.6640389 -0.8108118 -0.7104159 -0.7230967 0.44027846 -0.5549157
am 0.5998324 -0.5226070 -0.5912270 -0.2432043 0.71271113 -0.6924953
gear 0.4802848 -0.4926866 -0.5555692 -0.1257043 0.69961013 -0.5832870
carb -0.5509251 0.5269883 0.3949769 0.7498125 -0.09078980 0.4276059
qsec vs am gear carb
mpg 0.41868403 0.6640389 0.59983243 0.4802848 -0.55092507
cyl -0.59124207 -0.8108118 -0.52260705 -0.4926866 0.52698829
disp -0.43369788 -0.7104159 -0.59122704 -0.5555692 0.39497686
hp -0.70822339 -0.7230967 -0.24320426 -0.1257043 0.74981247
drat 0.09120476 0.4402785 0.71271113 0.6996101 -0.09078980
wt -0.17471588 -0.5549157 -0.69249526 -0.5832870 0.42760594
qsec 1.00000000 0.7445354 -0.22986086 -0.2126822 -0.65624923
vs 0.74453544 1.0000000 0.16834512 0.2060233 -0.56960714
am -0.22986086 0.1683451 1.00000000 0.7940588 0.05753435
gear -0.21268223 0.2060233 0.79405876 1.0000000 0.27407284
carb -0.65624923 -0.5696071 0.05753435 0.2740728 1.00000000
#3 Korelyasiya ilə yanaşı p-qiymətinin hesablanması;
library(Hmisc)
rcorr(as.matrix(df))
mpg cyl disp hp drat wt qsec vs am gear carb
mpg 1.00 -0.85 -0.85 -0.78 0.68 -0.87 0.42 0.66 0.60 0.48 -0.55
cyl -0.85 1.00 0.90 0.83 -0.70 0.78 -0.59 -0.81 -0.52 -0.49 0.53
disp -0.85 0.90 1.00 0.79 -0.71 0.89 -0.43 -0.71 -0.59 -0.56 0.39
hp -0.78 0.83 0.79 1.00 -0.45 0.66 -0.71 -0.72 -0.24 -0.13 0.75
drat 0.68 -0.70 -0.71 -0.45 1.00 -0.71 0.09 0.44 0.71 0.70 -0.09
wt -0.87 0.78 0.89 0.66 -0.71 1.00 -0.17 -0.55 -0.69 -0.58 0.43
qsec 0.42 -0.59 -0.43 -0.71 0.09 -0.17 1.00 0.74 -0.23 -0.21 -0.66
vs 0.66 -0.81 -0.71 -0.72 0.44 -0.55 0.74 1.00 0.17 0.21 -0.57
am 0.60 -0.52 -0.59 -0.24 0.71 -0.69 -0.23 0.17 1.00 0.79 0.06
gear 0.48 -0.49 -0.56 -0.13 0.70 -0.58 -0.21 0.21 0.79 1.00 0.27
carb -0.55 0.53 0.39 0.75 -0.09 0.43 -0.66 -0.57 0.06 0.27 1.00
n= 32
P
mpg cyl disp hp drat wt qsec vs am
mpg 0.0000 0.0000 0.0000 0.0000 0.0000 0.0171 0.0000 0.0003
cyl 0.0000 0.0000 0.0000 0.0000 0.0000 0.0004 0.0000 0.0022
disp 0.0000 0.0000 0.0000 0.0000 0.0000 0.0131 0.0000 0.0004
hp 0.0000 0.0000 0.0000 0.0100 0.0000 0.0000 0.0000 0.1798
drat 0.0000 0.0000 0.0000 0.0100 0.0000 0.6196 0.0117 0.0000
wt 0.0000 0.0000 0.0000 0.0000 0.0000 0.3389 0.0010 0.0000
qsec 0.0171 0.0004 0.0131 0.0000 0.6196 0.3389 0.0000 0.2057
vs 0.0000 0.0000 0.0000 0.0000 0.0117 0.0010 0.0000 0.3570
am 0.0003 0.0022 0.0004 0.1798 0.0000 0.0000 0.2057 0.3570
gear 0.0054 0.0042 0.0010 0.4930 0.0000 0.0005 0.2425 0.2579 0.0000
carb 0.0011 0.0019 0.0253 0.0000 0.6212 0.0146 0.0000 0.0007 0.7545
gear carb
mpg 0.0054 0.0011
cyl 0.0042 0.0019
disp 0.0010 0.0253
hp 0.4930 0.0000
drat 0.0000 0.6212
wt 0.0005 0.0146
qsec 0.2425 0.0000
vs 0.2579 0.0007
am 0.0000 0.7545
gear 0.1290
carb 0.1290
library(PerformanceAnalytics)
Loading required package: xts
Loading required package: zoo
Attaching package: 㤼㸱zoo㤼㸲
The following objects are masked from 㤼㸱package:base㤼㸲:
as.Date, as.Date.numeric
Attaching package: 㤼㸱xts㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
first, last
Attaching package: 㤼㸱PerformanceAnalytics㤼㸲
The following object is masked from 㤼㸱package:graphics㤼㸲:
legend
df <- mtcars[, c(1,3,4,5,6,7)]
chart.Correlation(df, histogram = T, pch = 19)