Branje podatkov

Podatke v R preberemo s funkcijo read.table.

fpath <- "http://bit.ly/16oBVpR"
data <- read.table(fpath, header=TRUE, sep = "\t")
names(data)
##  [1] "starost" "mesec"   "spol"    "masa"    "visina"  "roke"    "cevelj" 
##  [8] "lasje"   "oci"     "mati"    "oce"     "majica"

Podatki so v datoteki http://bit.ly/16oBVpR.

Izpis prvih nekaj vrstic.

data[1:5,]
##   starost mesec spol masa visina roke cevelj lasje oci mati oce majica
## 1      59     7    M   91    178  189     44     T   S  155 180      L
## 2      21     1    F   60    173  176     43     T   T  162 184      S
## 3      21     7    F   55    178  178     39     T   T  170 180      S
## 4      21     8    F   70    167  165     39     S   T  160 190      S
## 5      21     4    F   65    171  168     40     T   S  169 176      M
data[1:5,c(3,5,7)]
##   spol visina cevelj
## 1    M    178     44
## 2    F    173     43
## 3    F    178     39
## 4    F    167     39
## 5    F    171     40
data[1:3,3:7]
##   spol masa visina roke cevelj
## 1    M   91    178  189     44
## 2    F   60    173  176     43
## 3    F   55    178  178     39
data[1:5,c("spol","visina")]
##   spol visina
## 1    M    178
## 2    F    173
## 3    F    178
## 4    F    167
## 5    F    171
data$masa>70
##  [1]  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE
## [12] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE
## [23]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE
## [34] FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE
data[data$masa > 70,1:6]
##    starost mesec spol masa visina roke
## 1       59     7    M   91    178  189
## 6       21     3    M   88    171  173
## 16      21    10    M   75    185  193
## 21      21     4    M   72    183  185
## 23      20    11    M   87    189  190
## 31      22     7    M   90    180   NA
## 32      21     8    M   75    180   NA
## 38      22    10    M   73    173  180
## 43      22     0    M   73    181  187

Izpišimo spremenljivki spol in visina za osebe moškega spola (M)

data[data$spol=="M",c("spol","visina")]
##    spol visina
## 1     M    178
## 6     M    171
## 16    M    185
## 21    M    183
## 23    M    189
## 31    M    180
## 32    M    180
## 36    M    180
## 38    M    173
## 43    M    181

Koliko pa je fantov in koliko punčk?

table(data$spol)
## 
##  F  M 
## 33 10

Povzetek podatkov

summary(data)
##     starost          mesec        spol        masa           visina     
##  Min.   :20.00   Min.   : 0.000   F:33   Min.   :50.00   Min.   :156.0  
##  1st Qu.:21.00   1st Qu.: 5.000   M:10   1st Qu.:55.50   1st Qu.:164.0  
##  Median :21.00   Median : 7.000          Median :61.00   Median :170.0  
##  Mean   :22.07   Mean   : 6.814          Mean   :63.42   Mean   :169.9  
##  3rd Qu.:22.00   3rd Qu.: 9.500          3rd Qu.:70.00   3rd Qu.:173.5  
##  Max.   :59.00   Max.   :11.000          Max.   :91.00   Max.   :189.0  
##                                                                         
##       roke           cevelj      lasje  oci         mati      
##  Min.   :154.0   Min.   :36.00   S:19   S:24   Min.   :155.0  
##  1st Qu.:163.2   1st Qu.:38.00   T:24   T:19   1st Qu.:160.0  
##  Median :167.8   Median :39.00                 Median :165.0  
##  Mean   :169.3   Mean   :40.02                 Mean   :165.4  
##  3rd Qu.:172.5   3rd Qu.:41.50                 3rd Qu.:168.0  
##  Max.   :193.0   Max.   :48.00                 Max.   :180.0  
##  NA's   :5                                     NA's   :5      
##       oce        majica 
##  Min.   :170.0   L : 5  
##  1st Qu.:174.2   M :19  
##  Median :179.5   S :16  
##  Mean   :179.1   XL: 1  
##  3rd Qu.:182.0   XS: 2  
##  Max.   :190.0          
##  NA's   :5

Povzetek za vsak spol posebej

summary(data[data$spol=="M"  ,    ])
##     starost         mesec       spol        masa           visina     
##  Min.   :20.0   Min.   : 0.00   F: 0   Min.   :70.00   Min.   :171.0  
##  1st Qu.:21.0   1st Qu.: 4.75   M:10   1st Qu.:73.00   1st Qu.:178.5  
##  Median :21.0   Median : 7.50          Median :75.00   Median :180.0  
##  Mean   :24.9   Mean   : 7.10          Mean   :79.40   Mean   :180.0  
##  3rd Qu.:22.0   3rd Qu.:10.00          3rd Qu.:87.75   3rd Qu.:182.5  
##  Max.   :59.0   Max.   :11.00          Max.   :91.00   Max.   :189.0  
##                                                                       
##       roke           cevelj     lasje oci        mati            oce     
##  Min.   :173.0   Min.   :40.0   S:2   S:6   Min.   :155.0   Min.   :174  
##  1st Qu.:180.8   1st Qu.:42.0   T:8   T:4   1st Qu.:165.8   1st Qu.:175  
##  Median :186.0   Median :43.0               Median :167.5   Median :181  
##  Mean   :184.8   Mean   :42.9               Mean   :167.6   Mean   :181  
##  3rd Qu.:189.2   3rd Qu.:44.0               3rd Qu.:169.0   3rd Qu.:187  
##  Max.   :193.0   Max.   :45.0               Max.   :180.0   Max.   :188  
##  NA's   :2                                  NA's   :2       NA's   :2    
##  majica
##  L :3  
##  M :6  
##  S :0  
##  XL:1  
##  XS:0  
##        
## 
summary(data[data$spol=="F",])
##     starost          mesec        spol        masa           visina     
##  Min.   :20.00   Min.   : 1.000   F:33   Min.   :50.00   Min.   :156.0  
##  1st Qu.:21.00   1st Qu.: 5.000   M: 0   1st Qu.:55.00   1st Qu.:163.0  
##  Median :21.00   Median : 7.000          Median :58.00   Median :168.0  
##  Mean   :21.21   Mean   : 6.727          Mean   :58.58   Mean   :166.8  
##  3rd Qu.:22.00   3rd Qu.: 9.000          3rd Qu.:62.00   3rd Qu.:170.0  
##  Max.   :23.00   Max.   :11.000          Max.   :70.00   Max.   :178.0  
##                                                                         
##       roke           cevelj      lasje  oci         mati      
##  Min.   :154.0   Min.   :36.00   S:17   S:18   Min.   :157.0  
##  1st Qu.:160.5   1st Qu.:38.00   T:16   T:15   1st Qu.:160.0  
##  Median :165.0   Median :39.00                 Median :164.5  
##  Mean   :165.2   Mean   :39.15                 Mean   :164.8  
##  3rd Qu.:169.0   3rd Qu.:40.00                 3rd Qu.:168.0  
##  Max.   :178.0   Max.   :48.00                 Max.   :178.0  
##  NA's   :3                                     NA's   :3      
##       oce        majica 
##  Min.   :170.0   L : 2  
##  1st Qu.:173.2   M :13  
##  Median :178.5   S :16  
##  Mean   :178.6   XL: 0  
##  3rd Qu.:181.5   XS: 2  
##  Max.   :190.0          
##  NA's   :3

Funkcija subset

subset(data, spol=="M",spol:cevelj)
##    spol masa visina roke cevelj
## 1     M   91    178  189     44
## 6     M   88    171  173     41
## 16    M   75    185  193     45
## 21    M   72    183  185     44
## 23    M   87    189  190     45
## 31    M   90    180   NA     42
## 32    M   75    180   NA     43
## 36    M   70    180  181     40
## 38    M   73    173  180     42
## 43    M   73    181  187     43

Povzetki

summary(subset(data,spol=="M",spol:cevelj))
##  spol        masa           visina           roke           cevelj    
##  F: 0   Min.   :70.00   Min.   :171.0   Min.   :173.0   Min.   :40.0  
##  M:10   1st Qu.:73.00   1st Qu.:178.5   1st Qu.:180.8   1st Qu.:42.0  
##         Median :75.00   Median :180.0   Median :186.0   Median :43.0  
##         Mean   :79.40   Mean   :180.0   Mean   :184.8   Mean   :42.9  
##         3rd Qu.:87.75   3rd Qu.:182.5   3rd Qu.:189.2   3rd Qu.:44.0  
##         Max.   :91.00   Max.   :189.0   Max.   :193.0   Max.   :45.0  
##                                         NA's   :2
summary(subset(data,spol=="F",spol:cevelj))
##  spol        masa           visina           roke           cevelj     
##  F:33   Min.   :50.00   Min.   :156.0   Min.   :154.0   Min.   :36.00  
##  M: 0   1st Qu.:55.00   1st Qu.:163.0   1st Qu.:160.5   1st Qu.:38.00  
##         Median :58.00   Median :168.0   Median :165.0   Median :39.00  
##         Mean   :58.58   Mean   :166.8   Mean   :165.2   Mean   :39.15  
##         3rd Qu.:62.00   3rd Qu.:170.0   3rd Qu.:169.0   3rd Qu.:40.00  
##         Max.   :70.00   Max.   :178.0   Max.   :178.0   Max.   :48.00  
##                                         NA's   :3

Še en način:

by(data,data$spol,summary)
## data$spol: F
##     starost          mesec        spol        masa           visina     
##  Min.   :20.00   Min.   : 1.000   F:33   Min.   :50.00   Min.   :156.0  
##  1st Qu.:21.00   1st Qu.: 5.000   M: 0   1st Qu.:55.00   1st Qu.:163.0  
##  Median :21.00   Median : 7.000          Median :58.00   Median :168.0  
##  Mean   :21.21   Mean   : 6.727          Mean   :58.58   Mean   :166.8  
##  3rd Qu.:22.00   3rd Qu.: 9.000          3rd Qu.:62.00   3rd Qu.:170.0  
##  Max.   :23.00   Max.   :11.000          Max.   :70.00   Max.   :178.0  
##                                                                         
##       roke           cevelj      lasje  oci         mati      
##  Min.   :154.0   Min.   :36.00   S:17   S:18   Min.   :157.0  
##  1st Qu.:160.5   1st Qu.:38.00   T:16   T:15   1st Qu.:160.0  
##  Median :165.0   Median :39.00                 Median :164.5  
##  Mean   :165.2   Mean   :39.15                 Mean   :164.8  
##  3rd Qu.:169.0   3rd Qu.:40.00                 3rd Qu.:168.0  
##  Max.   :178.0   Max.   :48.00                 Max.   :178.0  
##  NA's   :3                                     NA's   :3      
##       oce        majica 
##  Min.   :170.0   L : 2  
##  1st Qu.:173.2   M :13  
##  Median :178.5   S :16  
##  Mean   :178.6   XL: 0  
##  3rd Qu.:181.5   XS: 2  
##  Max.   :190.0          
##  NA's   :3              
## -------------------------------------------------------- 
## data$spol: M
##     starost         mesec       spol        masa           visina     
##  Min.   :20.0   Min.   : 0.00   F: 0   Min.   :70.00   Min.   :171.0  
##  1st Qu.:21.0   1st Qu.: 4.75   M:10   1st Qu.:73.00   1st Qu.:178.5  
##  Median :21.0   Median : 7.50          Median :75.00   Median :180.0  
##  Mean   :24.9   Mean   : 7.10          Mean   :79.40   Mean   :180.0  
##  3rd Qu.:22.0   3rd Qu.:10.00          3rd Qu.:87.75   3rd Qu.:182.5  
##  Max.   :59.0   Max.   :11.00          Max.   :91.00   Max.   :189.0  
##                                                                       
##       roke           cevelj     lasje oci        mati            oce     
##  Min.   :173.0   Min.   :40.0   S:2   S:6   Min.   :155.0   Min.   :174  
##  1st Qu.:180.8   1st Qu.:42.0   T:8   T:4   1st Qu.:165.8   1st Qu.:175  
##  Median :186.0   Median :43.0               Median :167.5   Median :181  
##  Mean   :184.8   Mean   :42.9               Mean   :167.6   Mean   :181  
##  3rd Qu.:189.2   3rd Qu.:44.0               3rd Qu.:169.0   3rd Qu.:187  
##  Max.   :193.0   Max.   :45.0               Max.   :180.0   Max.   :188  
##  NA's   :2                                  NA's   :2       NA's   :2    
##  majica
##  L :3  
##  M :6  
##  S :0  
##  XL:1  
##  XS:0  
##        
## 

Po dveh faktorjih

by(data,list(data$spol,data$lasje),summary)
## : F
## : S
##     starost          mesec        spol        masa           visina     
##  Min.   :20.00   Min.   : 2.000   F:17   Min.   :50.00   Min.   :156.0  
##  1st Qu.:21.00   1st Qu.: 5.000   M: 0   1st Qu.:56.00   1st Qu.:163.0  
##  Median :21.00   Median : 8.000          Median :59.00   Median :167.0  
##  Mean   :21.06   Mean   : 7.118          Mean   :59.35   Mean   :166.8  
##  3rd Qu.:21.00   3rd Qu.:10.000          3rd Qu.:63.00   3rd Qu.:170.0  
##  Max.   :23.00   Max.   :11.000          Max.   :70.00   Max.   :174.0  
##                                                                         
##       roke           cevelj      lasje  oci         mati      
##  Min.   :154.0   Min.   :36.00   S:17   S:14   Min.   :157.0  
##  1st Qu.:160.0   1st Qu.:38.00   T: 0   T: 3   1st Qu.:160.0  
##  Median :165.0   Median :39.00                 Median :164.0  
##  Mean   :164.7   Mean   :39.47                 Mean   :165.5  
##  3rd Qu.:169.2   3rd Qu.:40.00                 3rd Qu.:168.0  
##  Max.   :171.0   Max.   :48.00                 Max.   :178.0  
##  NA's   :1                                     NA's   :2      
##       oce        majica
##  Min.   :172.0   L :2  
##  1st Qu.:173.5   M :8  
##  Median :180.0   S :6  
##  Mean   :179.1   XL:0  
##  3rd Qu.:182.0   XS:1  
##  Max.   :190.0         
##  NA's   :2             
## -------------------------------------------------------- 
## : M
## : S
##     starost          mesec       spol       masa           visina     
##  Min.   :20.00   Min.   :10.00   F:0   Min.   :70.00   Min.   :180.0  
##  1st Qu.:20.25   1st Qu.:10.25   M:2   1st Qu.:71.25   1st Qu.:181.2  
##  Median :20.50   Median :10.50         Median :72.50   Median :182.5  
##  Mean   :20.50   Mean   :10.50         Mean   :72.50   Mean   :182.5  
##  3rd Qu.:20.75   3rd Qu.:10.75         3rd Qu.:73.75   3rd Qu.:183.8  
##  Max.   :21.00   Max.   :11.00         Max.   :75.00   Max.   :185.0  
##                                                                       
##       roke         cevelj      lasje oci        mati          oce     
##  Min.   :181   Min.   :40.00   S:2   S:2   Min.   :166   Min.   :188  
##  1st Qu.:184   1st Qu.:41.25   T:0   T:0   1st Qu.:166   1st Qu.:188  
##  Median :187   Median :42.50               Median :166   Median :188  
##  Mean   :187   Mean   :42.50               Mean   :166   Mean   :188  
##  3rd Qu.:190   3rd Qu.:43.75               3rd Qu.:166   3rd Qu.:188  
##  Max.   :193   Max.   :45.00               Max.   :166   Max.   :188  
##                                            NA's   :1     NA's   :1    
##  majica
##  L :0  
##  M :2  
##  S :0  
##  XL:0  
##  XS:0  
##        
##        
## -------------------------------------------------------- 
## : F
## : T
##     starost          mesec        spol        masa           visina     
##  Min.   :20.00   Min.   : 1.000   F:16   Min.   :51.00   Min.   :157.0  
##  1st Qu.:21.00   1st Qu.: 5.750   M: 0   1st Qu.:55.00   1st Qu.:161.8  
##  Median :21.00   Median : 7.000          Median :56.00   Median :168.0  
##  Mean   :21.38   Mean   : 6.312          Mean   :57.75   Mean   :166.9  
##  3rd Qu.:22.00   3rd Qu.: 7.000          3rd Qu.:60.50   3rd Qu.:170.2  
##  Max.   :23.00   Max.   :11.000          Max.   :70.00   Max.   :178.0  
##                                                                         
##       roke           cevelj      lasje  oci         mati      
##  Min.   :156.0   Min.   :37.00   S: 0   S: 4   Min.   :157.0  
##  1st Qu.:162.5   1st Qu.:37.75   T:16   T:12   1st Qu.:160.0  
##  Median :164.5   Median :39.00                 Median :165.0  
##  Mean   :165.8   Mean   :38.81                 Mean   :164.1  
##  3rd Qu.:168.8   3rd Qu.:39.25                 3rd Qu.:167.0  
##  Max.   :178.0   Max.   :43.00                 Max.   :174.0  
##  NA's   :2                                     NA's   :1      
##       oce      majica 
##  Min.   :170   L : 0  
##  1st Qu.:174   M : 5  
##  Median :178   S :10  
##  Mean   :178   XL: 0  
##  3rd Qu.:180   XS: 1  
##  Max.   :188          
##  NA's   :1            
## -------------------------------------------------------- 
## : M
## : T
##     starost         mesec       spol       masa           visina     
##  Min.   :20.0   Min.   : 0.00   F:0   Min.   :72.00   Min.   :171.0  
##  1st Qu.:21.0   1st Qu.: 3.75   M:8   1st Qu.:73.00   1st Qu.:176.8  
##  Median :21.5   Median : 7.00         Median :81.00   Median :180.0  
##  Mean   :26.0   Mean   : 6.25         Mean   :81.12   Mean   :179.4  
##  3rd Qu.:22.0   3rd Qu.: 8.50         3rd Qu.:88.50   3rd Qu.:181.5  
##  Max.   :59.0   Max.   :11.00         Max.   :91.00   Max.   :189.0  
##                                                                      
##       roke           cevelj   lasje oci        mati            oce       
##  Min.   :173.0   Min.   :41   S:0   S:4   Min.   :155.0   Min.   :174.0  
##  1st Qu.:181.2   1st Qu.:42   T:8   T:4   1st Qu.:166.0   1st Qu.:175.0  
##  Median :186.0   Median :43               Median :168.0   Median :180.0  
##  Mean   :184.0   Mean   :43               Mean   :167.9   Mean   :180.0  
##  3rd Qu.:188.5   3rd Qu.:44               3rd Qu.:170.0   3rd Qu.:184.5  
##  Max.   :190.0   Max.   :45               Max.   :180.0   Max.   :187.0  
##  NA's   :2                                NA's   :1       NA's   :1      
##  majica
##  L :3  
##  M :4  
##  S :0  
##  XL:1  
##  XS:0  
##        
## 
by(data$visina,data$spol,sd)
## data$spol: F
## [1] 5.416725
## -------------------------------------------------------- 
## data$spol: M
## [1] 5.270463

Dva histograma

par(mfrow=c(1,2))
by(data$visina,data$spol,hist,col=8)

## data$spol: F
## $breaks
## [1] 155 160 165 170 175 180
## 
## $counts
## [1]  5  7 14  6  1
## 
## $density
## [1] 0.030303030 0.042424242 0.084848485 0.036363636 0.006060606
## 
## $mids
## [1] 157.5 162.5 167.5 172.5 177.5
## 
## $xname
## [1] "dd[x, ]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"
## -------------------------------------------------------- 
## data$spol: M
## $breaks
## [1] 170 175 180 185 190
## 
## $counts
## [1] 2 4 3 1
## 
## $density
## [1] 0.04 0.08 0.06 0.02
## 
## $mids
## [1] 172.5 177.5 182.5 187.5
## 
## $xname
## [1] "dd[x, ]"
## 
## $equidist
## [1] TRUE
## 
## attr(,"class")
## [1] "histogram"