data()

data(package = .packages(all.available = TRUE))
library(psych)

podatki <- force(sat.act)
head(podatki)
##       gender education age ACT SATV SATQ
## 29442      2         3  19  24  500  500
## 29457      2         3  23  35  600  500
## 29498      2         3  20  21  480  470
## 29503      1         4  27  26  550  520
## 29504      1         2  33  31  600  550
## 29518      1         5  26  28  640  640

Opis spremenljivk:

podatki$spol_faktor <- factor(podatki$gender, 
                              levels = c(1, 2), 
                              labels = c("moski", "zenski"))

head(podatki)
##       gender education age ACT SATV SATQ spol_faktor
## 29442      2         3  19  24  500  500      zenski
## 29457      2         3  23  35  600  500      zenski
## 29498      2         3  20  21  480  470      zenski
## 29503      1         4  27  26  550  520       moski
## 29504      1         2  33  31  600  550       moski
## 29518      1         5  26  28  640  640       moski

Ustvarimo podatki1, ki vključuje samo ženske.

#podatki1 <- podatki[ spol_faktor =="zenski" ,   ] 


#podatki_zenske <- subset(podatki, spol_faktor == "zenski")

Pisanje s pomočjo pipe: %>% (then)

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
podatki2 <- podatki %>%
  filter(spol_faktor == "zenski") 

Preimnujmo spremenljivko age v starost

podatki <- podatki %>% 
  rename(starost = age)

head(podatki)
##       gender education starost ACT SATV SATQ spol_faktor
## 29442      2         3      19  24  500  500      zenski
## 29457      2         3      23  35  600  500      zenski
## 29498      2         3      20  21  480  470      zenski
## 29503      1         4      27  26  550  520       moski
## 29504      1         2      33  31  600  550       moski
## 29518      1         5      26  28  640  640       moski
colnames(podatki)[3] <- "Starost"
summary(podatki[ , c("ACT", "SATV", "SATQ")])
##       ACT             SATV            SATQ      
##  Min.   : 3.00   Min.   :200.0   Min.   :200.0  
##  1st Qu.:25.00   1st Qu.:550.0   1st Qu.:530.0  
##  Median :29.00   Median :620.0   Median :620.0  
##  Mean   :28.55   Mean   :612.2   Mean   :610.2  
##  3rd Qu.:32.00   3rd Qu.:700.0   3rd Qu.:700.0  
##  Max.   :36.00   Max.   :800.0   Max.   :800.0  
##                                  NA's   :13
library(tidyr)
podatki <- drop_na(podatki)

Prikažite opisno statistiko za SATV, ločeno za moške in ženske

library(psych)

describeBy(podatki$SATV, podatki$spol_faktor)
## 
##  Descriptive statistics by group 
## group: moski
##    vars   n   mean     sd median trimmed    mad min max range  skew kurtosis
## X1    1 245 615.36 114.33    630  622.44 118.61 200 800   600 -0.63     0.14
##     se
## X1 7.3
## ------------------------------------------------------------ 
## group: zenski
##    vars   n   mean     sd median trimmed    mad min max range  skew kurtosis
## X1    1 442 610.66 112.81    620  618.09 103.78 200 800   600 -0.66     0.43
##      se
## X1 5.37