x <- list(a = 1:5, b = rnorm(10))
lapply(x, mean)
## $a
## [1] 3
##
## $b
## [1] 0.2065104
sapply(x, mean)
## a b
## 3.0000000 0.2065104
x1 <- list( a= c(1:5,NA),
b= rnorm(10))
lapply(x1, mean, na.rm=TRUE)
## $a
## [1] 3
##
## $b
## [1] 0.2940407
x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
lapply(x, mean)
## $a
## [1] 2.5
##
## $b
## [1] -0.03896723
##
## $c
## [1] 1.361732
##
## $d
## [1] 4.930519
x <- 1:4
lapply(x, runif)
## [[1]]
## [1] 0.2584043
##
## [[2]]
## [1] 0.5565635 0.1654728
##
## [[3]]
## [1] 0.1260730 0.7567073 0.2995682
##
## [[4]]
## [1] 0.4429644 0.5854476 0.5840174 0.7560373
x <- 1:4
lapply(x, runif, min = 0, max = 10)
## [[1]]
## [1] 0.8255283
##
## [[2]]
## [1] 5.372480 5.305196
##
## [[3]]
## [1] 4.51547459 6.94614289 0.04974884
##
## [[4]]
## [1] 2.3299425 7.5564161 7.8531321 0.7896624
x <- list(a = matrix(1:4, 2, 2), b = matrix(1:6, 3, 2))
lapply(x, function(elt) { elt[,1] })
## $a
## [1] 1 2
##
## $b
## [1] 1 2 3
lapply(x1, function(emre){emre*0.75})
## $a
## [1] 0.75 1.50 2.25 3.00 3.75 NA
##
## $b
## [1] -1.0878211 0.3877561 -0.6389411 0.6539283 1.2427780 -0.6454784
## [7] 0.2964486 0.5849739 0.7070885 0.7045725
x <- list(a = 1:4, b = rnorm(10), c = rnorm(20, 1), d = rnorm(100, 5))
lapply(x, mean)
## $a
## [1] 2.5
##
## $b
## [1] 0.2175888
##
## $c
## [1] 1.08462
##
## $d
## [1] 5.017358
sapply(x, mean)
## a b c d
## 2.5000000 0.2175888 1.0846200 5.0173580
x <- c(rnorm(10), runif(10), rnorm(10, 5)) # 30 elemanlı vektor
f <- gl(3, 10) # 3 kaetgorili bagimsiz değişken
split(x, f)
## $`1`
## [1] 0.09217836 -0.22006110 -1.59456258 0.28222376 -0.88551978 -0.01896593
## [7] 0.45715737 0.20407362 -0.02152475 0.28548222
##
## $`2`
## [1] 0.260272064 0.440674181 0.879722366 0.007292072 0.573706792 0.398069528
## [7] 0.353848374 0.954055202 0.520953819 0.656104981
##
## $`3`
## [1] 4.694031 6.451051 3.774135 2.967040 3.923932 5.157548 5.700786 4.465467
## [9] 4.093606 4.777955
lapply(split(x, f), mean)
## $`1`
## [1] -0.1419519
##
## $`2`
## [1] 0.5044699
##
## $`3`
## [1] 4.600555
library(datasets)
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
s <- split(airquality, airquality$Month)
str(s)
## List of 5
## $ 5:'data.frame': 31 obs. of 6 variables:
## ..$ Ozone : int [1:31] 41 36 12 18 NA 28 23 19 8 NA ...
## ..$ Solar.R: int [1:31] 190 118 149 313 NA NA 299 99 19 194 ...
## ..$ Wind : num [1:31] 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## ..$ Temp : int [1:31] 67 72 74 62 56 66 65 59 61 69 ...
## ..$ Month : int [1:31] 5 5 5 5 5 5 5 5 5 5 ...
## ..$ Day : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
## $ 6:'data.frame': 30 obs. of 6 variables:
## ..$ Ozone : int [1:30] NA NA NA NA NA NA 29 NA 71 39 ...
## ..$ Solar.R: int [1:30] 286 287 242 186 220 264 127 273 291 323 ...
## ..$ Wind : num [1:30] 8.6 9.7 16.1 9.2 8.6 14.3 9.7 6.9 13.8 11.5 ...
## ..$ Temp : int [1:30] 78 74 67 84 85 79 82 87 90 87 ...
## ..$ Month : int [1:30] 6 6 6 6 6 6 6 6 6 6 ...
## ..$ Day : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
## $ 7:'data.frame': 31 obs. of 6 variables:
## ..$ Ozone : int [1:31] 135 49 32 NA 64 40 77 97 97 85 ...
## ..$ Solar.R: int [1:31] 269 248 236 101 175 314 276 267 272 175 ...
## ..$ Wind : num [1:31] 4.1 9.2 9.2 10.9 4.6 10.9 5.1 6.3 5.7 7.4 ...
## ..$ Temp : int [1:31] 84 85 81 84 83 83 88 92 92 89 ...
## ..$ Month : int [1:31] 7 7 7 7 7 7 7 7 7 7 ...
## ..$ Day : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
## $ 8:'data.frame': 31 obs. of 6 variables:
## ..$ Ozone : int [1:31] 39 9 16 78 35 66 122 89 110 NA ...
## ..$ Solar.R: int [1:31] 83 24 77 NA NA NA 255 229 207 222 ...
## ..$ Wind : num [1:31] 6.9 13.8 7.4 6.9 7.4 4.6 4 10.3 8 8.6 ...
## ..$ Temp : int [1:31] 81 81 82 86 85 87 89 90 90 92 ...
## ..$ Month : int [1:31] 8 8 8 8 8 8 8 8 8 8 ...
## ..$ Day : int [1:31] 1 2 3 4 5 6 7 8 9 10 ...
## $ 9:'data.frame': 30 obs. of 6 variables:
## ..$ Ozone : int [1:30] 96 78 73 91 47 32 20 23 21 24 ...
## ..$ Solar.R: int [1:30] 167 197 183 189 95 92 252 220 230 259 ...
## ..$ Wind : num [1:30] 6.9 5.1 2.8 4.6 7.4 15.5 10.9 10.3 10.9 9.7 ...
## ..$ Temp : int [1:30] 91 92 93 93 87 84 80 78 75 73 ...
## ..$ Month : int [1:30] 9 9 9 9 9 9 9 9 9 9 ...
## ..$ Day : int [1:30] 1 2 3 4 5 6 7 8 9 10 ...
lapply(s, function(x) {
colMeans(x[, c("Ozone", "Solar.R", "Wind")])
}) ## anaomin fonkisyon kullanıldığına dikkat ediniz.
## $`5`
## Ozone Solar.R Wind
## NA NA 11.62258
##
## $`6`
## Ozone Solar.R Wind
## NA 190.16667 10.26667
##
## $`7`
## Ozone Solar.R Wind
## NA 216.483871 8.941935
##
## $`8`
## Ozone Solar.R Wind
## NA NA 8.793548
##
## $`9`
## Ozone Solar.R Wind
## NA 167.4333 10.1800
sapply(s, function(x) {
colMeans(x[, c("Ozone", "Solar.R", "Wind")])
})
## 5 6 7 8 9
## Ozone NA NA NA NA NA
## Solar.R NA 190.16667 216.483871 NA 167.4333
## Wind 11.62258 10.26667 8.941935 8.793548 10.1800
sapply(s, function(x) {
colMeans(x[, c("Ozone", "Solar.R", "Wind")],
na.rm = TRUE)
})
## 5 6 7 8 9
## Ozone 23.61538 29.44444 59.115385 59.961538 31.44828
## Solar.R 181.29630 190.16667 216.483871 171.857143 167.43333
## Wind 11.62258 10.26667 8.941935 8.793548 10.18000
library(datasets)
head(airquality)
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
lapply(airquality,mean, na.rm= TRUE)
## $Ozone
## [1] 42.12931
##
## $Solar.R
## [1] 185.9315
##
## $Wind
## [1] 9.957516
##
## $Temp
## [1] 77.88235
##
## $Month
## [1] 6.993464
##
## $Day
## [1] 15.80392
sapply(airquality,mean, na.rm= TRUE)
## Ozone Solar.R Wind Temp Month Day
## 42.129310 185.931507 9.957516 77.882353 6.993464 15.803922
## veri üret
x <- c(rnorm(10), runif(10), rnorm(10, 1))
## factor değişken
f <- gl(3, 10)
f
## [1] 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 3 3 3 3 3 3 3 3 3 3
## Levels: 1 2 3
tapply(x, f, mean)
## 1 2 3
## -0.2598788 0.4447014 1.0283033
tapply(x, f, mean, simplify = FALSE)
## $`1`
## [1] -0.2598788
##
## $`2`
## [1] 0.4447014
##
## $`3`
## [1] 1.028303
tapply(x, f, range)
## $`1`
## [1] -1.622877 1.087143
##
## $`2`
## [1] 0.04331139 0.83504364
##
## $`3`
## [1] -0.1043797 2.5634304
isim <- c("Ali","Elif","Su","Deniz","Aras","Berk","Can","Ece","Efe","Arda")
boy <- c(160,165,170,155,167,162,169,158,160,164)
kilo <- c(55,55,57,50,48,65,58,62,45,47)
cinsiyet <- c("erkek","kadin","kadin","kadin","erkek",
"erkek","erkek","kadin","erkek","erkek")
cinsiyet <- factor(cinsiyet)
beden <- c("S","M","S","M","S","L","M","L","S","S")
beden <- factor(beden)
# tapply() fonksiyonunun liste veri yapısına uygulanması
Liste <- list(isim=isim,boy=boy,cinsiyet=cinsiyet,beden=beden,kilo=kilo)
df <- data.frame(isim=isim,boy=boy,cinsiyet=cinsiyet,beden=beden,kilo=kilo)
tapply(Liste$boy, Liste$cinsiyet, sort)
## $erkek
## [1] 160 160 162 164 167 169
##
## $kadin
## [1] 155 158 165 170
tapply(Liste$boy, Liste$cinsiyet, sort, decreasing=TRUE)
## $erkek
## [1] 169 167 164 162 160 160
##
## $kadin
## [1] 170 165 158 155
tapply(df$boy, Liste$cinsiyet, sort)
## $erkek
## [1] 160 160 162 164 167 169
##
## $kadin
## [1] 155 158 165 170
tapply(df$boy, Liste$cinsiyet, mean)
## erkek kadin
## 163.6667 162.0000
tapply(df$boy, Liste$cinsiyet, sort, decreasing=TRUE)
## $erkek
## [1] 169 167 164 162 160 160
##
## $kadin
## [1] 170 165 158 155
by(df$boy, Liste$cinsiyet, sort)
## Liste$cinsiyet: erkek
## [1] 160 160 162 164 167 169
## ------------------------------------------------------------
## Liste$cinsiyet: kadin
## [1] 155 158 165 170
by(df$boy, Liste$cinsiyet, sort, decreasing=TRUE)
## Liste$cinsiyet: erkek
## [1] 169 167 164 162 160 160
## ------------------------------------------------------------
## Liste$cinsiyet: kadin
## [1] 170 165 158 155
by(df$boy, Liste$cinsiyet, mean)
## Liste$cinsiyet: erkek
## [1] 163.6667
## ------------------------------------------------------------
## Liste$cinsiyet: kadin
## [1] 162
x <- matrix(rnorm(200), 20, 10)
apply(x, 2, mean) ## Her sütunun ortalamasını alın
## [1] 0.0229302103 0.2253763312 0.1144782597 -0.4328472762 -0.0005140581
## [6] -0.1046179842 -0.4014810255 -0.0318485021 -0.2537872669 0.0512321573
apply(x, 1, sum) ## Her satırın ortalamasını alın
## [1] 4.5966666 -0.5384966 -4.5171906 -1.5917761 -1.3888403 -1.9147264
## [7] -2.0905864 5.0469212 -5.0389567 -5.7016294 2.2033481 1.0811123
## [13] -3.2616023 -2.0125029 -4.2721438 3.0069081 0.1410559 2.5829376
## [19] -1.2009040 -1.3511772
set.seed(12)
S1 <- sample(rnorm(10000, 50, 5), 100, replace=TRUE)
Matris1 <- matrix(S1, nrow=20, ncol=5)
apply(Matris1, 2, mean) # Fonksiyonun ikinci girdisi olan 2 sütun elamanlarını temsil etmektedir.
## [1] 48.20485 52.13701 49.38658 50.61689 48.60479
apply(Matris1, 2, summary)
## [,1] [,2] [,3] [,4] [,5]
## Min. 39.00080 40.23309 39.04749 39.32974 37.74364
## 1st Qu. 45.21933 48.44165 45.57123 47.36401 43.71252
## Median 49.31295 52.24410 49.49029 51.08794 47.62144
## Mean 48.20485 52.13701 49.38658 50.61689 48.60479
## 3rd Qu. 52.40540 55.97719 52.70180 54.36235 53.32016
## Max. 55.24910 63.33272 58.88203 59.93019 60.51715
apply(Matris1, 1, summary)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## Min. 45.82396 39.16789 51.63544 40.23309 39.04749 44.81304 39.73637 51.11418
## 1st Qu. 47.78055 39.32974 52.46878 43.82775 47.16408 47.46234 46.19462 51.96290
## Median 48.36804 46.24689 53.43269 47.65095 49.56534 49.64774 49.12984 52.65739
## Mean 50.47126 45.82933 54.50679 47.52181 48.65629 52.22224 50.10067 54.92558
## 3rd Qu. 54.95931 51.70256 56.11501 49.31343 52.65050 59.25790 55.94640 55.56069
## Max. 55.42443 52.69959 58.88203 56.58380 54.85404 59.93019 59.49613 63.33272
## [,9] [,10] [,11] [,12] [,13] [,14] [,15] [,16]
## Min. 44.96852 39.00080 43.36682 48.42947 42.13211 42.73818 40.55680 41.37856
## 1st Qu. 48.34900 48.83882 52.38428 50.17014 48.46619 46.50319 43.21988 42.18138
## Median 52.21976 53.65437 52.38428 51.40809 48.88713 50.98943 45.46715 47.83169
## Mean 50.61489 50.35382 51.18599 53.07152 50.44334 49.60777 46.24742 47.67032
## 3rd Qu. 53.31388 54.20555 52.91266 54.83276 55.24910 51.36429 46.82348 50.54044
## Max. 54.22331 56.06955 54.88190 60.51715 57.48218 56.44375 55.16980 56.41952
## [,17] [,18] [,19] [,20]
## Min. 40.53528 40.55680 37.74364 46.71473
## 1st Qu. 46.04637 44.03153 47.73063 49.31247
## Median 47.98124 44.46635 49.30321 51.96828
## Mean 48.55872 45.45876 47.52113 50.83282
## 3rd Qu. 49.85073 46.40143 50.16318 52.82962
## Max. 58.37998 51.83770 52.66500 53.33901
bagil_degiskenlik <- function(x){
(sd(x)/mean(x))*100
}
apply(Matris1, 2, bagil_degiskenlik)
## [1] 11.24914 10.05771 11.02709 10.59998 12.97312
apply(Matris1, 2, function(x){(sd(x)/mean(x))*100})
## [1] 11.24914 10.05771 11.02709 10.59998 12.97312
mapply(rep, 1:4, 4:1)
## [[1]]
## [1] 1 1 1 1
##
## [[2]]
## [1] 2 2 2
##
## [[3]]
## [1] 3 3
##
## [[4]]
## [1] 4
noise <- function(n, mean, sd) {
rnorm(n, mean, sd)
}
## 5 random sayı
noise(5, 1, 2)
## [1] -4.327419 1.768021 1.886192 1.184867 3.169347
mapply(noise, 1:5, 1:5, 2)
## [[1]]
## [1] -0.7104655
##
## [[2]]
## [1] 5.069163 2.364621
##
## [[3]]
## [1] 5.224162 4.429571 5.242948
##
## [[4]]
## [1] 8.278157 6.074560 3.496312 1.782874
##
## [[5]]
## [1] 3.7470981 4.2376843 9.4055567 0.9957188 5.8647980
list(noise(1, 1, 2), noise(2, 2, 2),
noise(3, 3, 2), noise(4, 4, 2),
noise(5, 5, 2))
## [[1]]
## [1] -1.039076
##
## [[2]]
## [1] 0.3540303 2.8484079
##
## [[3]]
## [1] 2.250261 2.707497 2.360474
##
## [[4]]
## [1] 2.837683 5.003308 2.944540 2.339083
##
## [[5]]
## [1] 6.066594 4.941666 5.764200 5.716390 6.835816
sumsq <- function(mu, sigma, x) {
sum(((x - mu) / sigma)^2)
}
x <- rnorm(100) ## veri üret
sumsq(1:10, 1:10, x) ## İstediğimiz bu değil
## [1] 119.891
mapply(sumsq, 1:10, 1:10, MoreArgs = list(x = x))
## [1] 236.5110 143.2120 123.2428 115.3451 111.2744 108.8388 107.2354 106.1073
## [9] 105.2742 104.6354
set.seed(10)
v1 <- rnorm(10,0,1)
sumsq <- function(mu, sigma, x){
sum(((x-mu)/ sigma)^2)
}
sumsq(0,1,v1)
## [1] 6.8161
sumsq(mean(v1), sd(v1),v1)
## [1] 9
set.seed(987)
std <- rnorm(n=300,50,10)
hist(std)

pnorm(1)
## [1] 0.8413447
pnorm(-1:1)
## [1] 0.1586553 0.5000000 0.8413447
qnorm(c(0.025, 0.975)) # %95 güven aralığı
## [1] -1.959964 1.959964
round(qnorm(c(0.005, 0.995)), 2) # %99 güven aralığı
## [1] -2.58 2.58
rnorm(10, mean = 0, sd = 1)
## [1] 0.71883622 1.33501467 1.58250235 0.34783644 0.84599059 -1.27280039
## [7] -0.64053682 -0.07203593 -0.72280017 -0.65800036
x <- 0:2
plot(x, dbinom(x, size = 2, p = 0.5),
type = "h", col = "red",
lwd = 10,
main = "Bozuk Parayı İki Kere Havaya Atma")

x <- 0:30
plot(x, dbinom(x, size = 30, p = 0.5),
type = "h",
main="n=30 için Binom Dağılımı")

x <- seq(-4, 4, 0.01)
plot(x, dnorm(x),
type = "l",
main = "Normal Dağılım Yoğunluk Fonksiyonu")

plot(x, pnorm(x), type = "l",
main = "Normal Dağılım Yığılım Fonksiyonu")

IQ <- seq(55, 145, 0.1)
plot(IQ, dnorm(IQ, mean = 100, sd = 15), type = "l")
abline(v = 100, col = "red")

sample_means <- replicate(1000,
mean(rnorm(3, mean = 100, sd = 15)))
hist(sample_means,
main = "3 Kişilik Örneklem Ortalamaları",
col = "skyblue")

sample_means <- replicate(1000,
mean(rnorm(30, mean = 100, sd = 15)))
hist(sample_means,
main = "30 Kişilik Örneklem Ortalamaları",
col = "lightgreen")

nreps <- 10000
nsmall <- 2
nlarge <- 30
sampdist.mean.small <- numeric(nreps)
sampdist.mean.large <- numeric(nreps)
# Küçük örneklem büyüklüğü için
par(mfrow = c(2,2))
for (i in 1:nreps) {
sample <- runif(nsmall, 0, 100)
sampdist.mean.small[i] <- mean(sample)
}
# Histogram çizme
hist(sampdist.mean.small,
main = "Küçük Örneklem Ortalamalarının \n Dağılımı (n = 2)",
xlab = "Örneklem Ortalamaları",
col = "lightblue")
# Q-Q grafiği
qqnorm(sampdist.mean.small,
main = "Q-Q Grafiği: Küçük Örneklem (n = 2)")
qqline(sampdist.mean.small, col = "red")
# Büyük örneklem büyüklüğü için
for (i in 1:nreps) {
sample <- runif(nlarge, 0, 100)
sampdist.mean.large[i] <- mean(sample)
}
# Histogram çizme
hist(sampdist.mean.large,
main = "Büyük Örneklem Ortalamalarının \n Dağılımı (n = 30)",
xlab = "Örneklem Ortalamaları",
col = "lightgreen")
# Q-Q grafiği
qqnorm(sampdist.mean.large,
main = "Q-Q Grafiği: Büyük Örneklem (n = 30)")
qqline(sampdist.mean.large, col = "blue")

uniform_samples <- runif(1000, min = 0, max = 10)
hist(uniform_samples,
main = "Uniform Dağılım", col = "orange")

curve(dt(x, df = 10),
from = -4, to = 4, main = "t Dağılımı")

x <- 0:10
plot(x, dpois(x, lambda = 3),
type = "h", main = "Poisson Dağılımı")

curve(dchisq(x, df = 5), from = 0, to = 20,
main = "Ki-Kare Dağılımı")
