Exercise on Simple and Stratified Random Sampling

Tasks :

(Item 1)

Load dataset agpop.dat into R.

(Item 2)

Create a dataframe in R from the dataset in (1) without any missing value for ACRES92.

(Item 3)

Create four(4) additional dataframes for each of the regions.

(Item 4)

Set random seed using the last 5 digits of your student number.

(Item 5)

Obtain an SRS of size 300 from the dataframe in (2).

(Item 6)

Obtain an estimate of the population mean using eqn. (2.8).

(Item 7)

Obtain a 95% confidence interval for the population mean using eqns. (2.12) and (2.22).

(Item 8)

Obtain a stratified sample of size 300 from the dataframes from (3) allocated according to the sizes mentioned in page 75 (Lohr).

(Item 9)

Obtain an estimate of the population mean using eqn. (3.2).

(Item 10)

Obtain a 95% confidence interval for the population mean using the formulas on page 79 (Lohr) using the percentile of the t-distribution with n-H degrees of freedom. (n = 300, H=4)
##############################################
############  STAT 250 Exercise  #############
############   10-Oct-2018       #############
############ John Pauline Pineda #############
##############################################

# Set working directory
setwd("F:/SamplingDesign")

Item 1

##############################################
############       ITEM 1        #############
##############################################
# Load dataset agpop.dat into R
##############################################

# Load the working data
agpop <- read.csv("agpop.dat")

# Initial exploratory analysis

# Check the data dimensions
# 3078 rows and 15 columns
dim(agpop) 
## [1] 3078   15
# Generate the data summary
summary(agpop)
##                COUNTY         STATE         ACRES92       
##  WASHINGTON COUNTY:  30   TX     : 254   Min.   :    -99  
##  JEFFERSON COUNTY :  25   GA     : 159   1st Qu.:  80903  
##  FRANKLIN COUNTY  :  24   KY     : 120   Median : 191648  
##  JACKSON COUNTY   :  23   MO     : 114   Mean   : 306677  
##  LINCOLN COUNTY   :  23   KS     : 105   3rd Qu.: 366886  
##  MADISON COUNTY   :  19   IL     : 102   Max.   :7229585  
##  (Other)          :2934   (Other):2224                    
##     ACRES87           ACRES82           FARMS92          FARMS87      
##  Min.   :    -99   Min.   :    -99   Min.   :   0.0   Min.   :   0.0  
##  1st Qu.:  86236   1st Qu.:  96397   1st Qu.: 295.0   1st Qu.: 318.5  
##  Median : 199864   Median : 207292   Median : 521.0   Median : 572.0  
##  Mean   : 313016   Mean   : 320194   Mean   : 625.5   Mean   : 678.3  
##  3rd Qu.: 372224   3rd Qu.: 377065   3rd Qu.: 838.0   3rd Qu.: 921.0  
##  Max.   :7687460   Max.   :7313958   Max.   :7021.0   Max.   :7590.0  
##                                                                       
##     FARMS82          LARGEF92         LARGEF87         LARGEF82     
##  Min.   :   0.0   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 345.0   1st Qu.:  8.00   1st Qu.:  8.00   1st Qu.:  8.00  
##  Median : 616.0   Median : 30.00   Median : 27.00   Median : 25.00  
##  Mean   : 728.1   Mean   : 56.18   Mean   : 54.86   Mean   : 52.62  
##  3rd Qu.: 991.0   3rd Qu.: 75.00   3rd Qu.: 70.00   3rd Qu.: 65.00  
##  Max.   :7394.0   Max.   :579.00   Max.   :596.00   Max.   :546.00  
##                                                                     
##     SMALLF92          SMALLF87          SMALLF82       REGION   
##  Min.   :   0.00   Min.   :   0.00   Min.   :   0.00   NC:1054  
##  1st Qu.:  13.00   1st Qu.:  17.00   1st Qu.:  16.00   NE: 220  
##  Median :  29.00   Median :  35.00   Median :  34.00   S :1382  
##  Mean   :  54.09   Mean   :  59.54   Mean   :  60.97   W : 422  
##  3rd Qu.:  59.00   3rd Qu.:  67.00   3rd Qu.:  67.00            
##  Max.   :4298.00   Max.   :3654.00   Max.   :3522.00            
## 

Item 2

##############################################
############       ITEM 2        #############
##############################################
# Create a dataframe in R from the dataset 
# in (1) without any missing value for ACRES92
##############################################

# Count the number of rows with missing values for the ACRES92 column
# 19 rows with missing values
nrow(agpop[agpop$ACRES92==-99,])
## [1] 19
# Count the number of rows with missing values for the REGION column
# 0 row with missing values
nrow(agpop[agpop$REGION==-99,])
## [1] 0
# Remove missing values and only keep the needed columns
agpop_complete <- agpop[agpop$ACRES92!=-99,c("ACRES92","REGION")]

# Check the data dimensions
# 3059 rows and 2 columns
dim(agpop_complete) 
## [1] 3059    2
# Generate the data summary
summary(agpop_complete)
##     ACRES92        REGION   
##  Min.   :      0   NC:1052  
##  1st Qu.:  82446   NE: 213  
##  Median : 193688   S :1376  
##  Mean   : 308582   W : 418  
##  3rd Qu.: 368482            
##  Max.   :7229585
# Specify the population size
(N <- nrow(agpop_complete))
## [1] 3059
# Specify the population mean for reference
# Population mean = 308582.4
(agpop_mean <- mean(agpop_complete$ACRES92))
## [1] 308582.4
# Specify the population variance for reference
# Population variance = 1.80891e+11
(agpop_variance <- var(agpop_complete$ACRES92))
## [1] 1.80891e+11
# Specify the population standard deviation for reference
# Population standard deviation = 425312.8
(agpop_sd <- sd(agpop_complete$ACRES92))
## [1] 425312.8

Item 3

##############################################
############       ITEM 3        #############
##############################################
# Create four(4) additional dataframes  
# for each of the regions
##############################################

# Specify the number of strata
(H <- nlevels(agpop_complete$REGION))
## [1] 4
# Create data objects for the regions / strata
NCregion <- agpop_complete[agpop_complete$REGION=="NC",]
NEregion <- agpop_complete[agpop_complete$REGION=="NE",]
Sregion <- agpop_complete[agpop_complete$REGION=="S",]
Wregion <- agpop_complete[agpop_complete$REGION=="W",]

# Specify the population size per stratum
# North Central region stratum population size = 1052
(N.NCregion <- nrow(NCregion))
## [1] 1052
# North East region stratum population size = 213
(N.NEregion <- nrow(NEregion))
## [1] 213
# South region stratum population size = 1376
(N.Sregion <- nrow(Sregion))
## [1] 1376
# West region stratum population size = 418
(N.Wregion <- nrow(Wregion))
## [1] 418
# Specify the population mean per stratum for reference
# North Central region stratum population mean = 326570.8
(NCregion_mean <- mean(NCregion$ACRES92))
## [1] 326570.8
# North East region stratum population mean = 93600.31
(NEregion_mean <- mean(NEregion$ACRES92))
## [1] 93600.31
# South region stratum population mean = 200009.2
(Sregion_mean <- mean(Sregion$ACRES92))
## [1] 200009.2
# West region stratum population mean = 730266.9
(Wregion_mean <- mean(Wregion$ACRES92))
## [1] 730266.9
# Specify the population variance per stratum for reference
# North Central region stratum population variance = 7.35429e+10
(NCregion_variance <- var(NCregion$ACRES92))
## [1] 73542921422
# North East region stratum population variance = 6.22619e+09
(NEregion_variance <- var(NEregion$ACRES92))
## [1] 6226188633
# South region stratum population variance = 5.96004e+10
(Sregion_variance <- var(Sregion$ACRES92))
## [1] 59600425689
# West region stratum population variance = 6.99922e+11
(Wregion_variance <- var(Wregion$ACRES92))
## [1] 699922245636
# Specify the population standard deviation per stratum for reference
# North Central region stratum population standard deviation = 271188
(NCregion_sd <- sd(NCregion$ACRES92))
## [1] 271188
# North East region stratum population standard deviation = 78906.2
(NEregion_sd <- sd(NEregion$ACRES92))
## [1] 78906.2
# South region stratum population standard deviation = 244132
(Sregion_sd <- sd(Sregion$ACRES92))
## [1] 244132
# West region stratum population standard deviation = 836613.6
(Wregion_sd <- sd(Wregion$ACRES92))
## [1] 836613.6

Item 4

##############################################
############       ITEM 4        #############
##############################################
# Set the random seed using the 
# last 5 digits of your student number
##############################################

# Set random seed
set.seed(89176)

Item 5

##############################################
############       ITEM 5        #############
##############################################
# Obtain an SRS of size 300 
# from the dataframe in (2)
##############################################

# Specify the sample size
(n <- 300)
## [1] 300
# Generate the sample indices
set.seed(89176)
(sampleindices <- sample(N,n))
##   [1]  602  164 2038 1444  733 1545 2387  234 1927  524 2181 1232 2116  704
##  [15] 2867 2191 2641 2343  813  966  616 2089   79 1163 2496 1355 1491  332
##  [29] 2355 2878 2722 1586  621 2669 2281  781 3019   71 1652 2731  467 1379
##  [43]  162 2674 1542 2876  922 1696 1577  519  545 1767   97   99  881 2534
##  [57] 1911  723 3012  333  130 2160 2610  551  247 2499  300 1570  271 2551
##  [71] 1020 1899  349 2630 2656  397 1691 2941 2944 1568 2977 2035  122 1429
##  [85] 1936  959 1114 2486  639 2834    7  179  537 1450 1666  558 1920 1001
##  [99] 2391 1364  268 1057 2672 2718 1113 1251  903 1439 1747 1757 2426 1155
## [113]  823 2798 1019  955   20  336 2440  963 1779 1912 1727 2012 1123  987
## [127]   26  543 1555 2677   57 1842 1917 2190  740 1126  880 1050 2456 2058
## [141] 2872 2412  874 2637  207 1880  106  665   35 2015 2694 1380 1229 2255
## [155]  330 1235  400 2590 2483  576 2250  296 2479 1817  205 2598 1797 2760
## [169] 1306 1070 1520 2654 1075 1605 1308 1623 2799  982  844 2510 1826  789
## [183] 2589  972  780 2042 2787 2506 1081  933 1975  131 1601 2334 2472 1693
## [197]   17 1165 2434 1454 2045 1658   47 1323 2901 2248  315 2103 2724  456
## [211] 3003 2345 1396 1565  735  371 2532  399 1723  884 2962 1383 2608 2491
## [225] 1182 2233 1665 1844 1151 1871 1241  573 1749 1643 2143 2858 1525 1538
## [239] 1793 1583 1497  301 2331 1201 2697 2671 2926 1858  581  756 1869 2306
## [253] 1290  930   72 2619  859  842 2582 1282  775 2392 1141 2273  250   40
## [267] 3031 1857 1271 2080  896  226 2765 2475 1461 1524  598  405  692  892
## [281] 2454  915  701 1481 2554 2292 2948 2840  368 1970   74 1868 2327  999
## [295] 2435 2832 1125 2782 2658  925
# Generate the actual samples
(agpop_sampled <- agpop_complete[sampleindices,])
##      ACRES92 REGION
## 606   233217     NC
## 164     4768      W
## 2049  245049     NC
## 1450  250475     NC
## 737   184599     NC
## 1551   96540      S
## 2400   41899      S
## 234   156801      W
## 1933 1797466      W
## 528   238609     NC
## 2192 1457339      W
## 1238   47308     NC
## 2127  419760      S
## 708   217191     NC
## 2884   93364     NE
## 2202 1318447      W
## 2656  678590      S
## 2356  974811     NC
## 817   223328     NC
## 970   111913      S
## 620   357684     NC
## 2100  187175     NC
## 79     18818      S
## 1169  126981      S
## 2509  408710      S
## 1361  286337     NC
## 1497   42712      S
## 335   299699      S
## 2368 1361106     NC
## 2895   82967      W
## 2737  307783      S
## 1592  868064      W
## 625   221209      W
## 2684   98449      S
## 2294   44800      S
## 785   202429     NC
## 3038  115487      S
## 71    141260      S
## 1658  180400      S
## 2746  563183      S
## 471    18644      S
## 1385  255498     NC
## 162   229365      W
## 2689 1555905      S
## 1548  118651      S
## 2893  112085      W
## 926   427403     NC
## 1702   51916      S
## 1583 2232575      W
## 523   214452      W
## 549   312173     NC
## 1773  591185     NC
## 97    223889      S
## 99     37606      S
## 885   403375     NC
## 2547 2001152      S
## 1917 1881764      W
## 727   299709     NC
## 3031   54622      S
## 336   296242      S
## 130   156363      S
## 2171  421233      S
## 2624  354917      S
## 555   343870     NC
## 248   641755      W
## 2512  547829      S
## 302   227202      S
## 1576 1424228      W
## 272   546538      W
## 2564  426189      S
## 1024   23062      S
## 1905 3112271      W
## 352   151242      S
## 2645  346653      S
## 2671  536300      S
## 401   109923      S
## 1697   68736      S
## 2958   92761     NC
## 2961  356651     NC
## 1574  598694      W
## 2996  114184     NC
## 2046  275644     NC
## 122    69422      S
## 1435  111549     NC
## 1942  738041      W
## 963   443802     NC
## 1118   36059      S
## 2499  612718      S
## 643   353528      W
## 2851   71803      S
## 7     167832      S
## 179   164130      W
## 541   308497     NC
## 1456  188595     NC
## 1672   56693      S
## 562   317205     NC
## 1926  770155      W
## 1005   41352      S
## 2404   96181      S
## 1370  420778     NC
## 269    32072      W
## 1061   35712      S
## 2687  362642      S
## 2733  328367      S
## 1117   58730      S
## 1257  121153     NC
## 907   141386     NC
## 1445  368849     NC
## 1753 1128346     NC
## 1763  503575     NC
## 2439  191486      S
## 1159   31583     NE
## 827    32318     NC
## 2815  136320      S
## 1023   98545      S
## 959   484093     NC
## 20     47200      S
## 339    44962      S
## 2453  123557      S
## 967   177858      S
## 1785  649612     NC
## 1918      10      W
## 1733  877382     NC
## 2023  138297     NC
## 1127   97643      S
## 991   144904      S
## 26    111315      S
## 547   368114     NC
## 1561  108236      S
## 2692  391842      S
## 57    155914      S
## 1848  297326     NC
## 1923 1769177      W
## 2201  380464      W
## 744    82426     NC
## 1130    6166      S
## 884   222028     NC
## 1054  105068      S
## 2469   31368      S
## 2069  200405     NC
## 2889   89785     NE
## 2425  135469      S
## 878   486997     NC
## 2652   49579      S
## 207   388084      W
## 1886   11644     NE
## 106   367969      S
## 669    69354     NC
## 35    130063      S
## 2026  179280     NC
## 2709  536507      S
## 1386  232189     NC
## 1235  438914     NC
## 2268   90065     NE
## 333    11738      S
## 1241  210638     NC
## 404   168593      S
## 2604  490578      S
## 2496  962576      S
## 580   266083     NC
## 2262   41347     NE
## 298    36230      S
## 2492  123792      S
## 1823 1387740     NC
## 205   342653      W
## 2612  656961      S
## 1803  265048     NC
## 2775  167374      W
## 1312  269147     NC
## 1074   93887      S
## 1526   62833      S
## 2669  525885      S
## 1079  196701      S
## 1611 1197028      W
## 1314  272049     NC
## 1629   70697      S
## 2816   37044      S
## 986    78966      S
## 848   257351     NC
## 2523  208073      S
## 1832  612694     NC
## 793   148662     NC
## 2603  470096      S
## 976    27836      S
## 784    86236     NC
## 2053  219023     NC
## 2804   85954      S
## 2519  513533      S
## 1085   61145      S
## 937   432326     NC
## 1984  135494     NE
## 131   313232      S
## 1607 1629363      W
## 2347  392935     NC
## 2485   11292      S
## 1699  194015      S
## 17     67950      S
## 1171   80241      S
## 2447   91343      S
## 1460  228936     NC
## 2056  210601     NC
## 1664  156027      S
## 47    207226      S
## 1329  250507     NC
## 2918   92074      W
## 2260   20777     NE
## 318    69405      S
## 2114  300829      S
## 2739  260892      S
## 460    68729      S
## 3022   30015      S
## 2358  373787     NC
## 1402  252783     NC
## 1571 3002378      W
## 739   261482     NC
## 375   168861      S
## 2545  471498      S
## 403    19060      S
## 1729  818893     NC
## 888   442362     NC
## 2980  133197     NC
## 1389  252890     NC
## 2622  383573      S
## 2504  396508      S
## 1188   62242     NE
## 2245  125707     NE
## 1671   75496      S
## 1850  250086     NC
## 1155   25470     NE
## 1877   39844     NE
## 1247  193956     NC
## 577   321950     NC
## 1755 1233663     NC
## 1649  162634      S
## 2154  390957      S
## 2875   43332      S
## 1531  125713      S
## 1544   80342      S
## 1799 1425338     NC
## 1589  349938      W
## 1503   79962      S
## 303    70672      S
## 2344  688081     NC
## 1207   77493     NC
## 2712  926093      S
## 2686  617851      S
## 2943  130051     NC
## 1864  360203     NC
## 585   305685     NC
## 760   115517     NC
## 1875   46056     NE
## 2319   82634      S
## 1296  165961     NC
## 934   449151     NC
## 72     56680      S
## 2634  518028      S
## 863   378517     NC
## 846   181020     NC
## 2596  201952      S
## 1288   61832     NC
## 779   105658     NC
## 2405   91858      S
## 1145  116221      S
## 2286   12408     NE
## 251   878447      W
## 40    191810      S
## 3050   28622      S
## 1863  347598     NC
## 1277  181569     NC
## 2091   19088     NC
## 900   499112     NC
## 226   796892      W
## 2780   25810      S
## 2488  165309      S
## 1467  168586     NC
## 1530   98914      S
## 602   542855     NC
## 409     8151      S
## 696   431415     NC
## 896   517623     NC
## 2467   53026      S
## 919   588061     NC
## 705   662629     NC
## 1487  316617     NC
## 2568  412632      S
## 2305   62108      S
## 2965  529966     NC
## 2857  160973      S
## 372    17105      S
## 1979  195626     NE
## 74    151325      S
## 1874   33935     NE
## 2340  496799     NC
## 1003   86074      S
## 2448  182754      S
## 2849   48889      S
## 1129   57789      S
## 2798   17392      S
## 2673  593819      S
## 929   668420     NC

Item 8

##############################################
############       ITEM 8        #############
##############################################
# Obtain a stratified sample of size 300 from 
# the dataframes from (3) allocated according 
# to the sizes mentioned in page 75 (Lohr)
##############################################

# Specify the sample size for each stratum
(n.NCregion <- 103)
## [1] 103
(n.NEregion <- 21)
## [1] 21
(n.Sregion <- 135)
## [1] 135
(n.Wregion <- 41)
## [1] 41
# Generate the sample indices for each stratum
set.seed(89176)
(sampleindices.NCregion <- sample(N.NCregion,n.NCregion))
##   [1]  207   57  700  496  252  530  818   80  660  180  746  421  722  240
##  [15]  978  747  900  798  277  329  209  709   27  395  846  459  505  112
##  [29]  796  972  919  535  210  899  768  263 1015   24  555  916  157  462
##  [43]   55  895  516  961  308  566  526  173  182  588   32   33  293  841
##  [57]  634 1039  562  110   43  714  862 1002   82  824   99  517   89  839
##  [71]  335  624  115  863  870  130  554  962  975  512 1023  664   40  465
##  [85]  630  312  362  807  208 1022    3   58  174  469  539 1043  620  323
##  [99]  771  439   87  340  859
set.seed(89176)
(sampleindices.NEregion <- sample(N.NEregion,n.NEregion))
##  [1]  42  12 141 100  51 106 162  16 130  36 146  82 140  47 188 143 171
## [18] 151  53  62  40
set.seed(89176)
(sampleindices.Sregion <- sample(N.Sregion,n.Sregion))
##   [1]  271   74  916  649  330  694 1072  105  864  235  977  552  947  315
##  [15] 1283  980 1181 1047  363  432  275  932   35  519 1112  604  664  148
##  [29] 1359 1280 1210  705  276 1185 1012  347 1339   32  732 1209  207  610
##  [43]   72 1182  682 1270  407  748  696  229  240  779   43   44  388 1114
##  [57]  840  318  745  146   57  948 1145  242  108 1095  132  687  119 1115
##  [71]  446  830  153 1148 1159  173  737 1281 1282  683 1295  885   53  621
##  [85]  841  417  484 1078  277 1228    3   78  233  627  721  241 1305  433
##  [99] 1032  589  116  456 1152 1171  479  538 1322  619  751  755 1042  496
## [113]  353 1200  437  409    9  144 1044  412  761  817  738  859 1272  421
## [127]   12  231  662 1140   25  783  815  931 1363
set.seed(89176)
(sampleindices.Wregion <- sample(N.Wregion,n.Wregion))
##  [1]  83  23 278 196 100 209 323  32 259  71 292 165 282  94 381 291 349
## [18] 309 107 127  81 273  11 152 325 176 193  43 303 370 402 203  80 340
## [35] 290  99 382   9 208 343  59
# Generate the actual samples
(NCregion_sampled <- NCregion[sampleindices.NCregion,])
##      ACRES92 REGION
## 775   223429     NC
## 581   349252     NC
## 1743  769225     NC
## 1301  210897     NC
## 820    71596     NC
## 1335  422916     NC
## 1861 1005877     NC
## 604   293266     NC
## 1465  204171     NC
## 748   175847     NC
## 1789  270005     NC
## 1226  233921     NC
## 1765  522536     NC
## 808   130826     NC
## 2395  367239     NC
## 1790  335849     NC
## 2086  202324     NC
## 1841  236950     NC
## 845   121710     NC
## 897   424104     NC
## 777   220057     NC
## 1752 1165695     NC
## 551   261494     NC
## 963   443802     NC
## 2032    4060     NC
## 1264   73661     NC
## 1310  366534     NC
## 680   223764     NC
## 1839  290608     NC
## 2389 1417516     NC
## 2336  787857     NC
## 1340  457670     NC
## 778   227711     NC
## 2085  253383     NC
## 1811  430972     NC
## 831    80078     NC
## 2961  356651     NC
## 548   415104     NC
## 1360  231610     NC
## 2102  216318     NC
## 725   310518     NC
## 1267  129083     NC
## 579   615034     NC
## 2081   96293     NC
## 1321  482991     NC
## 2378  485748     NC
## 876   407464     NC
## 1371  290627     NC
## 1331  744710     NC
## 741   251277     NC
## 750   141703     NC
## 1393  187856     NC
## 556   224811     NC
## 557   401625     NC
## 861   162244     NC
## 2027   99214     NC
## 1439  242018     NC
## 2986   47376     NC
## 1367  237239     NC
## 678   390149     NC
## 567   399155     NC
## 1757  552707     NC
## 2048  113329     NC
## 2948  351633     NC
## 606   233217     NC
## 1867  228167     NC
## 623   353683     NC
## 1322   68778     NC
## 613   241422     NC
## 2025  215796     NC
## 903   479903     NC
## 1429  249046     NC
## 683    40917     NC
## 2049  245049     NC
## 2056  210601     NC
## 698   303715     NC
## 1359  643762     NC
## 2379 2076199     NC
## 2392  615479     NC
## 1317  107810     NC
## 2970  346398     NC
## 1469   54082     NC
## 564   332377     NC
## 1270  108726     NC
## 1435  111549     NC
## 880   302849     NC
## 930   465527     NC
## 1850  250086     NC
## 776    22555     NC
## 2969    8763     NC
## 527   321728     NC
## 582   191291     NC
## 742   443475     NC
## 1274   79921     NC
## 1344  263274     NC
## 2990  348602     NC
## 1425  268447     NC
## 891   316317     NC
## 1814  338136     NC
## 1244  190706     NC
## 611   278922     NC
## 908   517376     NC
## 2045   28983     NC
(NEregion_sampled <- NEregion[sampleindices.NEregion,])
##      ACRES92 REGION
## 1874   33935     NE
## 1151    5340     NE
## 2228   19026     NE
## 1983       0     NE
## 1883    2636     NE
## 1989  133854     NE
## 2249   85113     NE
## 1155   25470     NE
## 2216  172366     NE
## 1200  106971     NE
## 2233   39412     NE
## 1964  138620     NE
## 2226   76997     NE
## 1879   34292     NE
## 2276   52760     NE
## 2230  176643     NE
## 2258  160802     NE
## 2238    5095     NE
## 1885    7799     NE
## 1894   58758     NE
## 1872   20910     NE
(Sregion_sampled <-  Sregion[sampleindices.Sregion,])
##      ACRES92 REGION
## 425    73869      S
## 79     18818      S
## 2437  224247      S
## 1563  342237      S
## 484    37923      S
## 1664  156027      S
## 2594   32436      S
## 110   281895      S
## 2319   82634      S
## 389    33641      S
## 2498   19131      S
## 1166   43320      S
## 2468   37550      S
## 469    51836      S
## 2837   98914      S
## 2501  765139      S
## 2704  187728      S
## 2569  496742      S
## 517   115516      S
## 1032    1501      S
## 429    36074      S
## 2453  123557      S
## 40    191810      S
## 1119  246536      S
## 2635  432379      S
## 1518   89168      S
## 1634  127760      S
## 300    23735      S
## 3038  115487      S
## 2834    6152      S
## 2733  328367      S
## 1675   13310      S
## 430    53944      S
## 2708  632622      S
## 2533  670459      S
## 501    32800      S
## 3018  101214      S
## 37    128357      S
## 1702   51916      S
## 2732  455873      S
## 361    45214      S
## 1524   53401      S
## 77    250819      S
## 2705  507449      S
## 1652   41750      S
## 2824    9559      S
## 1007  127161      S
## 1718   87478      S
## 1666  113654      S
## 383    82549      S
## 394    10192      S
## 2126  662121      S
## 48    199714      S
## 49    138437      S
## 988   112831      S
## 2637  527837      S
## 2295   51020      S
## 472    44470      S
## 1715  167379      S
## 298    36230      S
## 62    112620      S
## 2469   31368      S
## 2668  220355      S
## 396   198184      S
## 113   143104      S
## 2617  322324      S
## 137   195510      S
## 1657   19676      S
## 124    32003      S
## 2638  318658      S
## 1046    5256      S
## 2177  577693      S
## 305   301977      S
## 2671  536300      S
## 2682  402011      S
## 326   244185      S
## 1707  266067      S
## 2835  167858      S
## 2836   21507      S
## 1653    7046      S
## 2849   48889      S
## 2406   30299      S
## 58    144193      S
## 1535  175231      S
## 2296   90995      S
## 1017  197826      S
## 1084   44548      S
## 2600  545664      S
## 431    35387      S
## 2780   25810      S
## 8     177189      S
## 83    313573      S
## 387     5901      S
## 1541  218154      S
## 1691  155213      S
## 395   178861      S
## 2859  125394      S
## 1033    3383      S
## 2553  856707      S
## 1503   79962      S
## 121    79803      S
## 1056  191002      S
## 2675   56975      S
## 2694  572607      S
## 1079  196701      S
## 1138   40181      S
## 3001   76080      S
## 1533   89816      S
## 1721  179554      S
## 1725   38394      S
## 2563 1117134      S
## 1096  327511      S
## 507    88829      S
## 2723   58690      S
## 1037  278675      S
## 1009  206090      S
## 14    109555      S
## 296    24489      S
## 2566  581776      S
## 1012   69711      S
## 2108  513789      S
## 2164  353045      S
## 1708   53690      S
## 2314   72500      S
## 2826   37777      S
## 1021   80692      S
## 17     67950      S
## 385    21697      S
## 1632  144529      S
## 2663  495364      S
## 30    210733      S
## 2130  338529      S
## 2162  328976      S
## 2452   73654      S
## 3042  104194      S
(Wregion_sampled <-  Wregion[sampleindices.Wregion,])
##      ACRES92 REGION
## 225  1257229      W
## 165   236222      W
## 1937   82100      W
## 1578 2085181      W
## 243   440581      W
## 1591  367482      W
## 2211  694304      W
## 174   597766      W
## 1918      10      W
## 213   318156      W
## 2180  818736      W
## 645   380928      W
## 1941  235826      W
## 237   167106      W
## 2914   32637      W
## 1952    5361      W
## 2770  373582      W
## 2197  720153      W
## 250   103470      W
## 270  1004360      W
## 223   322823      W
## 1932  324476      W
## 153   137834      W
## 632   150021      W
## 2213  139820      W
## 656   224369      W
## 1575 1619482      W
## 185   725118      W
## 2191 1154399      W
## 2903   44742      W
## 3062 2415873      W
## 1585  699409      W
## 222   207448      W
## 2761  209819      W
## 1951  231758      W
## 242   331639      W
## 2915   55360      W
## 151  1151284      W
## 1590 1644001      W
## 2764   58522      W
## 201   783715      W
# Verify that the samples contain the correct size and category
table(NCregion_sampled$REGION)
## 
##  NC  NE   S   W 
## 103   0   0   0
table(NEregion_sampled$REGION)
## 
## NC NE  S  W 
##  0 21  0  0
table(Sregion_sampled$REGION)
## 
##  NC  NE   S   W 
##   0   0 135   0
table(Wregion_sampled$REGION)
## 
## NC NE  S  W 
##  0  0  0 41

Item 6

##############################################
############       ITEM 6        #############
##############################################
# Obtain an estimate of the population mean  
# using eqn. (2.8)
##############################################

# Compute the sample mean ( equation 2.8 )
# Sample mean = 331645.1
# (Reference ) Population mean = 308582.4
(agpop_sampled_mean <- mean(agpop_sampled$ACRES92))
## [1] 331645.1
# Estimated population mean based from simple random sampling is 331645.1

Item 7

##############################################
############       ITEM 7        #############
##############################################
# Obtain a 95% confidence interval for the 
# population mean using eqns. (2.12) and (2.22) 
##############################################

# Compute the sample variance
# Sample variance = 1.8243e+11
# ( Reference ) Population variance = 1.80891e+11
(agpop_sampled_variance <- var(agpop_sampled$ACRES92)) 
## [1] 182430448622
# Compute the sample standard deviation
# Sample standard deviation = 427118.8
# ( Reference ) Population standard deviation = 425312.8
(agpop_sampled_stdev <- sd(agpop_sampled$ACRES92)) 
## [1] 427118.8
sqrt(agpop_sampled_variance)
## [1] 427118.8
# Compute the fpc : 1 - sample size / population size
# fpc = 0.90192
(fpc <- (1-(n/N)))
## [1] 0.9019287
# Compute for sample variance over sample size
# Sample variance over sample size = 6.08101e+08
(agpop_sampled_svariancesize=((agpop_sampled_variance/n)))
## [1] 608101495
# Compute for sample standard error ( equation 2.12 )
# Sample standard error = square root of the product of fpc and sample variance / sample size
# Sample standard error = 23419.31
(agpop_sampled_standarderror <- sqrt(fpc*agpop_sampled_svariancesize))
## [1] 23419.31
# Obtain a 95% confidence interval for the population mean ( equation 2.22 )
# 95% CI ( sample mean - t(0.025,n-1) * standard error, sample mean + t(0.025,n-1) * standard error)
# Set the alpha
(alpha <- (0.05/2))
## [1] 0.025
# Determine the t-value
# tvalue = 1.96793
(tvalue_srs <- qt(1-alpha, df=n-1))
## [1] 1.96793
# Compute for the lower CI
# Lower CI = 285557.6
(agpop_sampled_mean_LowerCI <- agpop_sampled_mean -
    (tvalue_srs*agpop_sampled_standarderror))
## [1] 285557.6
# Compute for the upper CI
# Upper CI = 377732.7
(agpop_sampled_mean_UpperCI <- agpop_sampled_mean +
    (tvalue_srs*agpop_sampled_standarderror))
## [1] 377732.7
# 95% CI for the population mean using simple random sampling is (285557.6,377732.7)
# Actual population mean is 308582.4
# Estimated population mean based from simple random sampling is 331645.1

Item 9

##############################################
############       ITEM 9        #############
##############################################
# Obtain an estimate of the population mean 
# using eqn. (3.2) 
##############################################

# Compute the sample mean per stratum
# North Central region stratum sample mean = 327814.8
# ( Reference ) North Central region stratum population mean = 326570.8
(NCregion_sampled_mean <- mean(NCregion_sampled$ACRES92))
## [1] 327814.8
# North East region stratum sample mean = 64609.48
# ( Reference ) North East region stratum population mean = 93600.31
(NEregion_sampled_mean <- mean(NEregion_sampled$ACRES92))
## [1] 64609.48
# South region stratum sample mean = 185847.9
# ( Reference ) South region stratum population mean = 200009.2
(Sregion_sampled_mean <- mean(Sregion_sampled$ACRES92))
## [1] 185847.9
# West Central region stratum sample mean = 550075.7
# ( Reference ) West region stratum population mean = 730266.9
(Wregion_sampled_mean <- mean(Wregion_sampled$ACRES92))
## [1] 550075.7
# Compute the stratified sample mean ( equation 3.2 )
# Stratified sample mean = 275999.1
# (Reference ) Population mean = 308582.4
(agpop_sampled_stratified_mean <- (N.NCregion/N)*NCregion_sampled_mean +
                                  (N.NEregion/N)*NEregion_sampled_mean +
                                  (N.Sregion/N)*Sregion_sampled_mean +
                                  (N.Wregion/N)*Wregion_sampled_mean )
## [1] 275999.1
# Estimated population mean based from stratified random sampling is 275999.1

Item 10

##############################################
############      ITEM 10        #############
##############################################
# Obtain a 95% confidence interval for the 
# population mean using the formulas on page 79 (Lohr) 
# using the percentile of the t-distribution with 
# n-H degrees of freedom. (n = 300, H=4) 
##############################################

# Compute the sample variance per stratum ( formula on page 78 )
# North Central region stratum sample variance = 8.17382e+10
# ( Reference ) North Central region stratum population variance = 7.35429e+10
(NCregion_sampled_variance <- var(NCregion_sampled$ACRES92))
## [1] 81738228993
# North East region stratum sample variance = 3.62715e+9
# ( Reference ) North East region stratum population variance = 6.22619e+9
(NEregion_sampled_variance <- var(NEregion_sampled$ACRES92))
## [1] 3627154393
# South region stratum sample variance = 3.96351e+10
# ( Reference ) South region stratum population variance = 5.96004e+10
(Sregion_sampled_variance <- var(Sregion_sampled$ACRES92))
## [1] 39635139107
# West Central region stratum sample variance = 3.36550e+11
# ( Reference ) West region stratum population variance = 6.99922e+11
(Wregion_sampled_variance <- var(Wregion_sampled$ACRES92))
## [1] 336550109765
# Compute the estimated variance of the population mean using equation 3.5
# Estimated variance of the population mean = 2.77235e+08
(agpop_sampled_stratified_varianceofmean <- (
  ((1-(n.NCregion/N.NCregion)) * (N.NCregion/N)^2 * (NCregion_sampled_variance/n.NCregion)) +
  ((1-(n.NEregion/N.NEregion)) * (N.NEregion/N)^2 * (NEregion_sampled_variance/n.NEregion)) +
  ((1-(n.Sregion/N.Sregion)) * (N.Sregion/N)^2 *  (Sregion_sampled_variance/n.Sregion)) +
  ((1-(n.Wregion/N.Wregion)) * (N.Wregion/N)^2 *(Wregion_sampled_variance/n.Wregion)) 
  ))
## [1] 277235264
# Compute the standard error of the estimated variance of the population mean
# ( formula on page 79 )
# Standard error of the estimated variance of the population mean = 16650.38
(agpop_sampled_stratified_standarderror <- sqrt(agpop_sampled_stratified_varianceofmean))
## [1] 16650.38
# Obtain a 95% confidence interval for the population mean ( formula on page 79 )
# 95% CI ( sample mean - t(0.025,n-H) * standard error, sample mean + t(0.025,n-H) * standard error)
# n = 300, H = 4

# Set the alpha
(alpha <- (0.05/2))
## [1] 0.025
# Determine the t-value
# tvalue = 1.968011
(tvalue_stratified <- qt(1-alpha, df=n-H))
## [1] 1.968011
# Compute for the lower CI
# Lower CI = 243231
(agpop_sampled_stratified_mean_LowerCI <- agpop_sampled_stratified_mean -
    (tvalue_stratified*agpop_sampled_stratified_standarderror))
## [1] 243231
# Compute for the upper CI
# Upper CI = 308767.2
(agpop_sampled_stratified_mean_UpperCI <- agpop_sampled_stratified_mean +
    (tvalue_stratified*agpop_sampled_stratified_standarderror))
## [1] 308767.2
# 95% CI for the population mean using stratified random sampling is (243231,308767.2)
# Actual population mean is 308582.4
# Estimated population mean based from stratified random sampling is 275999.1