data()

data(package = .packages(all.available = TRUE))
data(USArrests)

podatki <- force(USArrests)
head(podatki)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
summary(podatki)
##      Murder          Assault         UrbanPop          Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00
library(psych)

podatki <- force(sat.act)

head(podatki)
##       gender education age ACT SATV SATQ
## 29442      2         3  19  24  500  500
## 29457      2         3  23  35  600  500
## 29498      2         3  20  21  480  470
## 29503      1         4  27  26  550  520
## 29504      1         2  33  31  600  550
## 29518      1         5  26  28  640  640
colnames(podatki) <- c("Spol", "Izob", "Starost", "Matura", "Verb", "Kvant")

head(podatki)
##       Spol Izob Starost Matura Verb Kvant
## 29442    2    3      19     24  500   500
## 29457    2    3      23     35  600   500
## 29498    2    3      20     21  480   470
## 29503    1    4      27     26  550   520
## 29504    1    2      33     31  600   550
## 29518    1    5      26     28  640   640
library(psych)
describe(podatki)
##         vars   n   mean     sd median trimmed    mad min max range  skew
## Spol       1 700   1.65   0.48      2    1.68   0.00   1   2     1 -0.61
## Izob       2 700   3.16   1.43      3    3.31   1.48   0   5     5 -0.68
## Starost    3 700  25.59   9.50     22   23.86   5.93  13  65    52  1.64
## Matura     4 700  28.55   4.82     29   28.84   4.45   3  36    33 -0.66
## Verb       5 700 612.23 112.90    620  619.45 118.61 200 800   600 -0.64
## Kvant      6 687 610.22 115.64    620  617.25 118.61 200 800   600 -0.59
##         kurtosis   se
## Spol       -1.62 0.02
## Izob       -0.07 0.05
## Starost     2.42 0.36
## Matura      0.53 0.18
## Verb        0.33 4.27
## Kvant      -0.02 4.41

Uporaba funkcije describeBy Iz kategorialnih spremenljivk vedno predhodno ustvarimo faktor

podatki$SpolF <- factor(podatki$Spol,
                        levels = c(1, 2),
                        labels = c("M", "Z"))
library(psych)

describeBy(podatki, group = podatki$SpolF) #Ločena opisna statistika po spolu
## 
##  Descriptive statistics by group 
## group: M
##         vars   n   mean     sd median trimmed    mad min max range  skew
## Spol       1 247   1.00   0.00      1    1.00   0.00   1   1     0   NaN
## Izob       2 247   3.00   1.54      3    3.12   1.48   0   5     5 -0.54
## Starost    3 247  25.86   9.74     22   24.23   5.93  14  58    44  1.43
## Matura     4 247  28.79   5.06     30   29.23   4.45   3  36    33 -1.06
## Verb       5 247 615.11 114.16    630  622.07 118.61 200 800   600 -0.63
## Kvant      6 245 635.87 116.02    660  645.53  94.89 300 800   500 -0.72
## SpolF*     7 247   1.00   0.00      1    1.00   0.00   1   1     0   NaN
##         kurtosis   se
## Spol         NaN 0.00
## Izob       -0.60 0.10
## Starost     1.43 0.62
## Matura      1.89 0.32
## Verb        0.13 7.26
## Kvant      -0.12 7.41
## SpolF*       NaN 0.00
## ------------------------------------------------------------ 
## group: Z
##         vars   n   mean     sd median trimmed    mad min max range  skew
## Spol       1 453   2.00   0.00      2    2.00   0.00   2   2     0   NaN
## Izob       2 453   3.26   1.35      3    3.40   1.48   0   5     5 -0.74
## Starost    3 453  25.45   9.37     22   23.70   5.93  13  65    52  1.77
## Matura     4 453  28.42   4.69     29   28.63   4.45  15  36    21 -0.39
## Verb       5 453 610.66 112.31    620  617.91 103.78 200 800   600 -0.65
## Kvant      6 442 596.00 113.07    600  602.21 133.43 200 800   600 -0.58
## SpolF*     7 453   2.00   0.00      2    2.00   0.00   2   2     0   NaN
##         kurtosis   se
## Spol         NaN 0.00
## Izob        0.27 0.06
## Starost     3.03 0.44
## Matura     -0.42 0.22
## Verb        0.42 5.28
## Kvant       0.13 5.38
## SpolF*       NaN 0.00
summary(podatki)
##       Spol            Izob          Starost          Matura     
##  Min.   :1.000   Min.   :0.000   Min.   :13.00   Min.   : 3.00  
##  1st Qu.:1.000   1st Qu.:3.000   1st Qu.:19.00   1st Qu.:25.00  
##  Median :2.000   Median :3.000   Median :22.00   Median :29.00  
##  Mean   :1.647   Mean   :3.164   Mean   :25.59   Mean   :28.55  
##  3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:29.00   3rd Qu.:32.00  
##  Max.   :2.000   Max.   :5.000   Max.   :65.00   Max.   :36.00  
##                                                                 
##       Verb           Kvant       SpolF  
##  Min.   :200.0   Min.   :200.0   M:247  
##  1st Qu.:550.0   1st Qu.:530.0   Z:453  
##  Median :620.0   Median :620.0          
##  Mean   :612.2   Mean   :610.2          
##  3rd Qu.:700.0   3rd Qu.:700.0          
##  Max.   :800.0   Max.   :800.0          
##                  NA's   :13

Ustvarimo tabelo podatkiZ, kjer vkljucimo samo tiste osebe, ki so ženskega spola

podatkiZ <- podatki[podatki$SpolF == "Z"    ,  ]

IZberite samo tiste študente, ki so stari med 20 in 30 let

podatki2 <- podatki[podatki$Starost >= 20 & podatki$Starost <= 30 ,  ]
library(pastecs)

round(stat.desc(podatki), 2)
##                 Spol    Izob  Starost   Matura      Verb     Kvant SpolF
## nbr.val       700.00  700.00   700.00   700.00    700.00    687.00    NA
## nbr.null        0.00   57.00     0.00     0.00      0.00      0.00    NA
## nbr.na          0.00    0.00     0.00     0.00      0.00     13.00    NA
## min             1.00    0.00    13.00     3.00    200.00    200.00    NA
## max             2.00    5.00    65.00    36.00    800.00    800.00    NA
## range           1.00    5.00    52.00    33.00    600.00    600.00    NA
## sum          1153.00 2215.00 17916.00 19983.00 428564.00 419219.00    NA
## median          2.00    3.00    22.00    29.00    620.00    620.00    NA
## mean            1.65    3.16    25.59    28.55    612.23    610.22    NA
## SE.mean         0.02    0.05     0.36     0.18      4.27      4.41    NA
## CI.mean.0.95    0.04    0.11     0.70     0.36      8.38      8.66    NA
## var             0.23    2.03    90.22    23.27  12746.99  13372.45    NA
## std.dev         0.48    1.43     9.50     4.82    112.90    115.64    NA
## coef.var        0.29    0.45     0.37     0.17      0.18      0.19    NA
#install.packages("tidyr")
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:pastecs':
## 
##     extract
podatki <- drop_na(podatki) #Odstrani manjkajoče vrednosti