RScript_Chapter_1.R

ruhil — Nov 2, 2013, 1:43 PM

## R Code for Chapter 1 ##

# Draw ONE number at random from a population = 1...100 
Draw.1 <- sample(1:100, 1)
Draw.1
[1] 77

# Draw TEN numbers at random from a population = 1...100 
Draw.10 <- sample(1:100, 10)
Draw.10 # Errrr, what is the problem here???
 [1] 25 90 16 89 98 64 79 53 82 33

# Draw TEN numbers at random from a population = 1...100 WITH REPLACEMENT
Draw.10 <- sample(1:100, 10, replace=TRUE)
Draw.10 
 [1] 11 47 70 21 80 92 15 30 39 10

# Lets sample some major landmasses 
require(datasets)
data(islands)
Islands.4 <- sample(islands, 4, replace=TRUE)
Islands.4
       Ellesmere            Timor       Madagascar Tierra del Fuego 
              82               13              227               19 

# How about Stratified Sampling? 
require(sampling)
Loading required package: sampling
Loading required package: MASS
Loading required package: lpSolve
data(swissmunicipalities)
names(swissmunicipalities)
 [1] "CT"           "REG"          "COM"          "Nom"         
 [5] "HApoly"       "Surfacesbois" "Surfacescult" "Alp"         
 [9] "Airbat"       "Airind"       "P00BMTOT"     "P00BWTOT"    
[13] "Pop020"       "Pop2040"      "Pop4065"      "Pop65P"      
[17] "H00PTOT"      "H00P01"       "H00P02"       "H00P03"      
[21] "H00P04"       "POPTOT"      
summary(swissmunicipalities)
       CT            REG            COM            Nom      
 Min.   : 1.0   Min.   :1.00   Min.   :   1   Brugg  :   2  
 1st Qu.: 5.0   1st Qu.:2.00   1st Qu.:1372   Hofen  :   2  
 Median :18.0   Median :2.00   Median :3606   Aadorf :   1  
 Mean   :14.2   Mean   :3.19   Mean   :3430   Aarau  :   1  
 3rd Qu.:22.0   3rd Qu.:5.00   3rd Qu.:5410   Aarberg:   1  
 Max.   :26.0   Max.   :7.00   Max.   :6806   Aarburg:   1  
                                              (Other):2888  
     HApoly       Surfacesbois   Surfacescult       Alp      
 Min.   :   32   Min.   :   0   Min.   :   0   Min.   :   0  
 1st Qu.:  361   1st Qu.:  96   1st Qu.: 122   1st Qu.:   0  
 Median :  683   Median : 225   Median : 241   Median :   0  
 Mean   : 1381   Mean   : 439   Mean   : 341   Mean   : 186  
 3rd Qu.: 1336   3rd Qu.: 516   3rd Qu.: 431   3rd Qu.:  83  
 Max.   :28225   Max.   :7055   Max.   :3634   Max.   :8337  

     Airbat           Airind          P00BMTOT         P00BWTOT     
 Min.   :   0.0   Min.   :  0.00   Min.   :    11   Min.   :    11  
 1st Qu.:  13.0   1st Qu.:  0.00   1st Qu.:   172   1st Qu.:   171  
 Median :  27.0   Median :  2.00   Median :   434   Median :   437  
 Mean   :  47.5   Mean   :  6.99   Mean   :  1232   Mean   :  1285  
 3rd Qu.:  57.0   3rd Qu.:  7.00   3rd Qu.:  1112   3rd Qu.:  1123  
 Max.   :2884.0   Max.   :260.00   Max.   :175836   Max.   :187437  

     Pop020         Pop2040          Pop4065           Pop65P     
 Min.   :    1   Min.   :     3   Min.   :     7   Min.   :    2  
 1st Qu.:   88   1st Qu.:    90   1st Qu.:   113   1st Qu.:   50  
 Median :  224   Median :   234   Median :   283   Median :  118  
 Mean   :  575   Mean   :   739   Mean   :   816   Mean   :  386  
 3rd Qu.:  570   3rd Qu.:   629   3rd Qu.:   725   3rd Qu.:  300  
 Max.   :57324   Max.   :131422   Max.   :108178   Max.   :66349  

    H00PTOT           H00P01          H00P02          H00P03     
 Min.   :     8   Min.   :    0   Min.   :    1   Min.   :    0  
 1st Qu.:   132   1st Qu.:   35   1st Qu.:   42   1st Qu.:   19  
 Median :   331   Median :   88   Median :  107   Median :   48  
 Mean   :  1076   Mean   :  387   Mean   :  340   Mean   :  139  
 3rd Qu.:   862   3rd Qu.:  242   3rd Qu.:  276   3rd Qu.:  126  
 Max.   :186880   Max.   :94797   Max.   :55019   Max.   :17596  

     H00P04          POPTOT      
 Min.   :    0   Min.   :    22  
 1st Qu.:   33   1st Qu.:   344  
 Median :   85   Median :   870  
 Mean   :  209   Mean   :  2517  
 3rd Qu.:  213   3rd Qu.:  2235  
 Max.   :19468   Max.   :363273  

new.data <- swissmunicipalities[order(swissmunicipalities$REG, swissmunicipalities$CT), ] # Sorting the data by REG and CT
Strat.Sample <- strata(new.data, c("REG"), size=c(40,40,40,40,40,40,40),  method="srswr") # Roughly 10% Sample getting 40 in each REG