Branje podatkov

Podatke bomo prebrali iz datoteke, ki je shranjena na spletu.

fpath <- "http://bit.ly/16oBVpR"
data <- read.table(fpath,header=TRUE,sep="\t")
dim(data)
## [1] 43 12
names(data)
##  [1] "starost" "mesec"   "spol"    "masa"    "visina"  "roke"    "cevelj" 
##  [8] "lasje"   "oci"     "mati"    "oce"     "majica"

Podatki so prebrani !!

Povzetek podatkov

Poglejmo, kako so porazdeljeni podatki

summary(data)
##     starost          mesec        spol        masa           visina     
##  Min.   :20.00   Min.   : 0.000   F:33   Min.   :50.00   Min.   :156.0  
##  1st Qu.:21.00   1st Qu.: 5.000   M:10   1st Qu.:55.50   1st Qu.:164.0  
##  Median :21.00   Median : 7.000          Median :61.00   Median :170.0  
##  Mean   :22.07   Mean   : 6.814          Mean   :63.42   Mean   :169.9  
##  3rd Qu.:22.00   3rd Qu.: 9.500          3rd Qu.:70.00   3rd Qu.:173.5  
##  Max.   :59.00   Max.   :11.000          Max.   :91.00   Max.   :189.0  
##                                                                         
##       roke           cevelj      lasje  oci         mati      
##  Min.   :154.0   Min.   :36.00   S:19   S:24   Min.   :155.0  
##  1st Qu.:163.2   1st Qu.:38.00   T:24   T:19   1st Qu.:160.0  
##  Median :167.8   Median :39.00                 Median :165.0  
##  Mean   :169.3   Mean   :40.02                 Mean   :165.4  
##  3rd Qu.:172.5   3rd Qu.:41.50                 3rd Qu.:168.0  
##  Max.   :193.0   Max.   :48.00                 Max.   :180.0  
##  NA's   :5                                     NA's   :5      
##       oce        majica 
##  Min.   :170.0   L : 5  
##  1st Qu.:174.2   M :19  
##  Median :179.5   S :16  
##  Mean   :179.1   XL: 1  
##  3rd Qu.:182.0   XS: 2  
##  Max.   :190.0          
##  NA's   :5

Struktura podatkov

str(data)
## 'data.frame':    43 obs. of  12 variables:
##  $ starost: int  59 21 21 21 21 21 21 20 22 23 ...
##  $ mesec  : int  7 1 7 8 4 3 7 11 6 10 ...
##  $ spol   : Factor w/ 2 levels "F","M": 2 1 1 1 1 2 1 1 1 1 ...
##  $ masa   : int  91 60 55 70 65 88 52 53 62 59 ...
##  $ visina : int  178 173 178 167 171 171 162 161 168 169 ...
##  $ roke   : num  189 176 178 165 168 173 164 160 164 168 ...
##  $ cevelj : int  44 43 39 39 40 41 39 38 41 38 ...
##  $ lasje  : Factor w/ 2 levels "S","T": 2 2 2 1 2 2 2 2 2 1 ...
##  $ oci    : Factor w/ 2 levels "S","T": 1 2 2 2 1 2 2 2 1 1 ...
##  $ mati   : int  155 162 170 160 169 165 160 158 170 178 ...
##  $ oce    : int  180 184 180 190 176 182 170 180 185 180 ...
##  $ majica : Factor w/ 5 levels "L","M","S","XL",..: 1 3 3 3 2 4 3 3 2 2 ...

Vsi podatki

data
##    starost mesec spol masa visina  roke cevelj lasje oci mati oce majica
## 1       59     7    M   91    178 189.0     44     T   S  155 180      L
## 2       21     1    F   60    173 176.0     43     T   T  162 184      S
## 3       21     7    F   55    178 178.0     39     T   T  170 180      S
## 4       21     8    F   70    167 165.0     39     S   T  160 190      S
## 5       21     4    F   65    171 168.0     40     T   S  169 176      M
## 6       21     3    M   88    171 173.0     41     T   T  165 182     XL
## 7       21     7    F   52    162 164.0     39     T   T  160 170      S
## 8       20    11    F   53    161 160.0     38     T   T  158 180      S
## 9       22     6    F   62    168 164.0     41     T   S  170 185      M
## 10      23    10    F   59    169 168.0     38     S   S  178 180      M
## 11      21     9    F   65    170 169.0     40     S   S   NA  NA      L
## 12      21     7    F   55    164 167.5     37     T   T  160 175      S
## 13      20     2    F   51    156 154.0     38     S   S  158 182     XS
## 14      23     6    F   56    170 170.0     39     T   S  165 173      S
## 15      21     2    F   56    167 166.0     39     S   S  160 180      S
## 16      21    10    M   75    185 193.0     45     S   S   NA  NA      M
## 17      21     6    F   63    170 165.0     40     T   T   NA  NA      M
## 18      21     8    F   57    168 162.0     38     T   T  165 173      S
## 19      20    11    F   63    174 171.0     40     S   S  168 173      M
## 20      21     5    F   70    173 169.0     39     T   T  164 178      M
## 21      21     4    M   72    183 185.0     44     T   S  172 187      M
## 22      21     9    F   66    163 164.0     39     S   S  168 177      M
## 23      20    11    M   87    189 190.0     45     T   S  180 187      L
## 24      20     8    F   54    168 170.0     40     S   S  174 180      M
## 25      22     5    F   65    163 159.0     39     S   S  160 172      L
## 26      21     3    F   59    164 165.0     36     S   S  165 172      M
## 27      21     5    F   61    174 170.0     40     S   S  162 189      M
## 28      20    11    F   62    168 163.0     39     S   T  168 183      S
## 29      21     7    F   51    159 157.0     37     T   S  157 173     XS
## 30      22     2    F   58    173    NA     42     S   S   NA  NA      S
## 31      22     7    M   90    180    NA     42     T   T   NA  NA      L
## 32      21     8    M   75    180    NA     43     T   T  168 174      M
## 33      21     5    F   61    163 160.0     39     S   S  157 173      M
## 34      22     7    F   55    170    NA     39     T   T  165 178      S
## 35      23     8    F   59    168 164.0     38     T   T  174 188      S
## 36      20    11    M   70    180 181.0     40     S   S  166 188      M
## 37      21    11    F   51    166 160.0     38     S   S  163 182      S
## 38      22    10    M   73    173 180.0     42     T   S  168 175      M
## 39      21    10    F   58    170 171.0     48     S   S  177 180      M
## 40      22     7    F   56    158 156.0     37     T   T  165 179      M
## 41      21     4    F   55    157    NA     37     T   T  157 178      S
## 42      22    10    F   50    160 160.0     37     S   T  164 174      S
## 43      22     0    M   73    181 187.0     43     T   T  167 175      M

Prikaz podatkov

plot(data$starost)

Ali pa:

plot(data[ , 1 ])

plot(data[,"starost"])

!!! zdi se, da je eden ful star!!!

Kaj pa masa?

plot(data$masa)

Izbiranje vrstic in stolpcev

Začasno shranimo podatke še po d drugim imenom

D <- data
D[ , 1 ]   # prvi stolpec
##  [1] 59 21 21 21 21 21 21 20 22 23 21 21 20 23 21 21 21 21 20 21 21 21 20
## [24] 20 22 21 21 20 21 22 22 21 21 22 23 20 21 22 21 22 21 22 22
D[1 , ]    # prva vrstica
##   starost mesec spol masa visina roke cevelj lasje oci mati oce majica
## 1      59     7    M   91    178  189     44     T   S  155 180      L
D[c(1,3,5), c(4,7,9)]
##   masa cevelj oci
## 1   91     44   S
## 3   55     39   T
## 5   65     40   S
data$starost<30
##  [1] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [12]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [23]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [34]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
filter <- data$starost<30
data[ filter,]
##    starost mesec spol masa visina  roke cevelj lasje oci mati oce majica
## 2       21     1    F   60    173 176.0     43     T   T  162 184      S
## 3       21     7    F   55    178 178.0     39     T   T  170 180      S
## 4       21     8    F   70    167 165.0     39     S   T  160 190      S
## 5       21     4    F   65    171 168.0     40     T   S  169 176      M
## 6       21     3    M   88    171 173.0     41     T   T  165 182     XL
## 7       21     7    F   52    162 164.0     39     T   T  160 170      S
## 8       20    11    F   53    161 160.0     38     T   T  158 180      S
## 9       22     6    F   62    168 164.0     41     T   S  170 185      M
## 10      23    10    F   59    169 168.0     38     S   S  178 180      M
## 11      21     9    F   65    170 169.0     40     S   S   NA  NA      L
## 12      21     7    F   55    164 167.5     37     T   T  160 175      S
## 13      20     2    F   51    156 154.0     38     S   S  158 182     XS
## 14      23     6    F   56    170 170.0     39     T   S  165 173      S
## 15      21     2    F   56    167 166.0     39     S   S  160 180      S
## 16      21    10    M   75    185 193.0     45     S   S   NA  NA      M
## 17      21     6    F   63    170 165.0     40     T   T   NA  NA      M
## 18      21     8    F   57    168 162.0     38     T   T  165 173      S
## 19      20    11    F   63    174 171.0     40     S   S  168 173      M
## 20      21     5    F   70    173 169.0     39     T   T  164 178      M
## 21      21     4    M   72    183 185.0     44     T   S  172 187      M
## 22      21     9    F   66    163 164.0     39     S   S  168 177      M
## 23      20    11    M   87    189 190.0     45     T   S  180 187      L
## 24      20     8    F   54    168 170.0     40     S   S  174 180      M
## 25      22     5    F   65    163 159.0     39     S   S  160 172      L
## 26      21     3    F   59    164 165.0     36     S   S  165 172      M
## 27      21     5    F   61    174 170.0     40     S   S  162 189      M
## 28      20    11    F   62    168 163.0     39     S   T  168 183      S
## 29      21     7    F   51    159 157.0     37     T   S  157 173     XS
## 30      22     2    F   58    173    NA     42     S   S   NA  NA      S
## 31      22     7    M   90    180    NA     42     T   T   NA  NA      L
## 32      21     8    M   75    180    NA     43     T   T  168 174      M
## 33      21     5    F   61    163 160.0     39     S   S  157 173      M
## 34      22     7    F   55    170    NA     39     T   T  165 178      S
## 35      23     8    F   59    168 164.0     38     T   T  174 188      S
## 36      20    11    M   70    180 181.0     40     S   S  166 188      M
## 37      21    11    F   51    166 160.0     38     S   S  163 182      S
## 38      22    10    M   73    173 180.0     42     T   S  168 175      M
## 39      21    10    F   58    170 171.0     48     S   S  177 180      M
## 40      22     7    F   56    158 156.0     37     T   T  165 179      M
## 41      21     4    F   55    157    NA     37     T   T  157 178      S
## 42      22    10    F   50    160 160.0     37     S   T  164 174      S
## 43      22     0    M   73    181 187.0     43     T   T  167 175      M
data[ !filter,]
##   starost mesec spol masa visina roke cevelj lasje oci mati oce majica
## 1      59     7    M   91    178  189     44     T   S  155 180      L

Podatki o masi za tiste, ki so vecji od 165 cm?

filter <- data$visina>165
filter
##  [1]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [12] FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE
## [23]  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE
## [34]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE

Vektor mas za “ta velike”

veliki <- data[ filter, "masa"]
veliki
##  [1] 91 60 55 70 65 88 62 59 65 56 56 75 63 57 63 70 72 87 54 61 62 58 90
## [24] 75 55 59 70 51 73 58 73
10:13
lengtttth(10:13)

Koliko je velikih?

n <- length( data[ data$visina>165,"masa"])
n
## [1] 31
length(veliki)
## [1] 31

Med podatki imamo 31 velikih.

Negativni indeks odstrani element

x <- c(10,3,7,5,4)
x[1]
## [1] 10
x[-1]
## [1] 3 7 5 4