data()
data(package = .packages(all.available = TRUE))
data(USArrests)
podatki <- force(USArrests)
head(podatki)
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
summary(podatki)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
library(psych)
podatki <- force(sat.act)
head(podatki)
## gender education age ACT SATV SATQ
## 29442 2 3 19 24 500 500
## 29457 2 3 23 35 600 500
## 29498 2 3 20 21 480 470
## 29503 1 4 27 26 550 520
## 29504 1 2 33 31 600 550
## 29518 1 5 26 28 640 640
colnames(podatki) <- c("Spol", "Izob", "Starost", "Matura", "Verb", "Kvant")
head(podatki)
## Spol Izob Starost Matura Verb Kvant
## 29442 2 3 19 24 500 500
## 29457 2 3 23 35 600 500
## 29498 2 3 20 21 480 470
## 29503 1 4 27 26 550 520
## 29504 1 2 33 31 600 550
## 29518 1 5 26 28 640 640
library(psych)
describe(podatki)
## vars n mean sd median trimmed mad min max range skew
## Spol 1 700 1.65 0.48 2 1.68 0.00 1 2 1 -0.61
## Izob 2 700 3.16 1.43 3 3.31 1.48 0 5 5 -0.68
## Starost 3 700 25.59 9.50 22 23.86 5.93 13 65 52 1.64
## Matura 4 700 28.55 4.82 29 28.84 4.45 3 36 33 -0.66
## Verb 5 700 612.23 112.90 620 619.45 118.61 200 800 600 -0.64
## Kvant 6 687 610.22 115.64 620 617.25 118.61 200 800 600 -0.59
## kurtosis se
## Spol -1.62 0.02
## Izob -0.07 0.05
## Starost 2.42 0.36
## Matura 0.53 0.18
## Verb 0.33 4.27
## Kvant -0.02 4.41
Uporaba funkcije describeBy Iz kategorialnih spremenljivk vedno predhodno ustvarimo faktor
podatki$SpolF <- factor(podatki$Spol,
levels = c(1, 2),
labels = c("M", "Z"))
library(psych)
describeBy(podatki, group = podatki$SpolF) #Ločena opisna statistika po spolu
##
## Descriptive statistics by group
## group: M
## vars n mean sd median trimmed mad min max range skew
## Spol 1 247 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## Izob 2 247 3.00 1.54 3 3.12 1.48 0 5 5 -0.54
## Starost 3 247 25.86 9.74 22 24.23 5.93 14 58 44 1.43
## Matura 4 247 28.79 5.06 30 29.23 4.45 3 36 33 -1.06
## Verb 5 247 615.11 114.16 630 622.07 118.61 200 800 600 -0.63
## Kvant 6 245 635.87 116.02 660 645.53 94.89 300 800 500 -0.72
## SpolF* 7 247 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## kurtosis se
## Spol NaN 0.00
## Izob -0.60 0.10
## Starost 1.43 0.62
## Matura 1.89 0.32
## Verb 0.13 7.26
## Kvant -0.12 7.41
## SpolF* NaN 0.00
## ------------------------------------------------------------
## group: Z
## vars n mean sd median trimmed mad min max range skew
## Spol 1 453 2.00 0.00 2 2.00 0.00 2 2 0 NaN
## Izob 2 453 3.26 1.35 3 3.40 1.48 0 5 5 -0.74
## Starost 3 453 25.45 9.37 22 23.70 5.93 13 65 52 1.77
## Matura 4 453 28.42 4.69 29 28.63 4.45 15 36 21 -0.39
## Verb 5 453 610.66 112.31 620 617.91 103.78 200 800 600 -0.65
## Kvant 6 442 596.00 113.07 600 602.21 133.43 200 800 600 -0.58
## SpolF* 7 453 2.00 0.00 2 2.00 0.00 2 2 0 NaN
## kurtosis se
## Spol NaN 0.00
## Izob 0.27 0.06
## Starost 3.03 0.44
## Matura -0.42 0.22
## Verb 0.42 5.28
## Kvant 0.13 5.38
## SpolF* NaN 0.00
summary(podatki)
## Spol Izob Starost Matura
## Min. :1.000 Min. :0.000 Min. :13.00 Min. : 3.00
## 1st Qu.:1.000 1st Qu.:3.000 1st Qu.:19.00 1st Qu.:25.00
## Median :2.000 Median :3.000 Median :22.00 Median :29.00
## Mean :1.647 Mean :3.164 Mean :25.59 Mean :28.55
## 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:29.00 3rd Qu.:32.00
## Max. :2.000 Max. :5.000 Max. :65.00 Max. :36.00
##
## Verb Kvant SpolF
## Min. :200.0 Min. :200.0 M:247
## 1st Qu.:550.0 1st Qu.:530.0 Z:453
## Median :620.0 Median :620.0
## Mean :612.2 Mean :610.2
## 3rd Qu.:700.0 3rd Qu.:700.0
## Max. :800.0 Max. :800.0
## NA's :13
Ustvarimo tabelo podatkiZ, kjer vkljucimo samo tiste osebe, ki so ženskega spola
podatkiZ <- podatki[podatki$SpolF == "Z" , ]
IZberite samo tiste študente, ki so stari med 20 in 30 let
podatki2 <- podatki[podatki$Starost >= 20 & podatki$Starost <= 30 , ]
library(pastecs)
round(stat.desc(podatki), 2)
## Spol Izob Starost Matura Verb Kvant SpolF
## nbr.val 700.00 700.00 700.00 700.00 700.00 687.00 NA
## nbr.null 0.00 57.00 0.00 0.00 0.00 0.00 NA
## nbr.na 0.00 0.00 0.00 0.00 0.00 13.00 NA
## min 1.00 0.00 13.00 3.00 200.00 200.00 NA
## max 2.00 5.00 65.00 36.00 800.00 800.00 NA
## range 1.00 5.00 52.00 33.00 600.00 600.00 NA
## sum 1153.00 2215.00 17916.00 19983.00 428564.00 419219.00 NA
## median 2.00 3.00 22.00 29.00 620.00 620.00 NA
## mean 1.65 3.16 25.59 28.55 612.23 610.22 NA
## SE.mean 0.02 0.05 0.36 0.18 4.27 4.41 NA
## CI.mean.0.95 0.04 0.11 0.70 0.36 8.38 8.66 NA
## var 0.23 2.03 90.22 23.27 12746.99 13372.45 NA
## std.dev 0.48 1.43 9.50 4.82 112.90 115.64 NA
## coef.var 0.29 0.45 0.37 0.17 0.18 0.19 NA
#install.packages("tidyr")
library(tidyr)
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:pastecs':
##
## extract
podatki <- drop_na(podatki) #Odstrani manjkajoče vrednosti