Exercise on Simple and Stratified Random Sampling
Tasks :
(Item 1)
Load dataset agpop.dat into R.
(Item 2)
Create a dataframe in R from the dataset in (1) without any missing value for ACRES92.
(Item 3)
Create four(4) additional dataframes for each of the regions.
(Item 4)
Set random seed using the last 5 digits of your student number.
(Item 5)
Obtain an SRS of size 300 from the dataframe in (2).
(Item 6)
Obtain an estimate of the population mean using eqn. (2.8).
(Item 7)
Obtain a 95% confidence interval for the population mean using eqns. (2.12) and (2.22).
(Item 8)
Obtain a stratified sample of size 300 from the dataframes from (3) allocated according to the sizes mentioned in page 75 (Lohr).
(Item 9)
Obtain an estimate of the population mean using eqn. (3.2).
(Item 10)
Obtain a 95% confidence interval for the population mean using the formulas on page 79 (Lohr) using the percentile of the t-distribution with n-H degrees of freedom. (n = 300, H=4)
##############################################
############ STAT 250 Exercise #############
############ 10-Oct-2018 #############
############ John Pauline Pineda #############
##############################################
# Set working directory
setwd("F:/SamplingDesign")
Item 1
##############################################
############ ITEM 1 #############
##############################################
# Load dataset agpop.dat into R
##############################################
# Load the working data
agpop <- read.csv("agpop.dat")
# Initial exploratory analysis
# Check the data dimensions
# 3078 rows and 15 columns
dim(agpop)
## [1] 3078 15
# Generate the data summary
summary(agpop)
## COUNTY STATE ACRES92
## WASHINGTON COUNTY: 30 TX : 254 Min. : -99
## JEFFERSON COUNTY : 25 GA : 159 1st Qu.: 80903
## FRANKLIN COUNTY : 24 KY : 120 Median : 191648
## JACKSON COUNTY : 23 MO : 114 Mean : 306677
## LINCOLN COUNTY : 23 KS : 105 3rd Qu.: 366886
## MADISON COUNTY : 19 IL : 102 Max. :7229585
## (Other) :2934 (Other):2224
## ACRES87 ACRES82 FARMS92 FARMS87
## Min. : -99 Min. : -99 Min. : 0.0 Min. : 0.0
## 1st Qu.: 86236 1st Qu.: 96397 1st Qu.: 295.0 1st Qu.: 318.5
## Median : 199864 Median : 207292 Median : 521.0 Median : 572.0
## Mean : 313016 Mean : 320194 Mean : 625.5 Mean : 678.3
## 3rd Qu.: 372224 3rd Qu.: 377065 3rd Qu.: 838.0 3rd Qu.: 921.0
## Max. :7687460 Max. :7313958 Max. :7021.0 Max. :7590.0
##
## FARMS82 LARGEF92 LARGEF87 LARGEF82
## Min. : 0.0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 345.0 1st Qu.: 8.00 1st Qu.: 8.00 1st Qu.: 8.00
## Median : 616.0 Median : 30.00 Median : 27.00 Median : 25.00
## Mean : 728.1 Mean : 56.18 Mean : 54.86 Mean : 52.62
## 3rd Qu.: 991.0 3rd Qu.: 75.00 3rd Qu.: 70.00 3rd Qu.: 65.00
## Max. :7394.0 Max. :579.00 Max. :596.00 Max. :546.00
##
## SMALLF92 SMALLF87 SMALLF82 REGION
## Min. : 0.00 Min. : 0.00 Min. : 0.00 NC:1054
## 1st Qu.: 13.00 1st Qu.: 17.00 1st Qu.: 16.00 NE: 220
## Median : 29.00 Median : 35.00 Median : 34.00 S :1382
## Mean : 54.09 Mean : 59.54 Mean : 60.97 W : 422
## 3rd Qu.: 59.00 3rd Qu.: 67.00 3rd Qu.: 67.00
## Max. :4298.00 Max. :3654.00 Max. :3522.00
##
Item 2
##############################################
############ ITEM 2 #############
##############################################
# Create a dataframe in R from the dataset
# in (1) without any missing value for ACRES92
##############################################
# Count the number of rows with missing values for the ACRES92 column
# 19 rows with missing values
nrow(agpop[agpop$ACRES92==-99,])
## [1] 19
# Count the number of rows with missing values for the REGION column
# 0 row with missing values
nrow(agpop[agpop$REGION==-99,])
## [1] 0
# Remove missing values and only keep the needed columns
agpop_complete <- agpop[agpop$ACRES92!=-99,c("ACRES92","REGION")]
# Check the data dimensions
# 3059 rows and 2 columns
dim(agpop_complete)
## [1] 3059 2
# Generate the data summary
summary(agpop_complete)
## ACRES92 REGION
## Min. : 0 NC:1052
## 1st Qu.: 82446 NE: 213
## Median : 193688 S :1376
## Mean : 308582 W : 418
## 3rd Qu.: 368482
## Max. :7229585
# Specify the population size
(N <- nrow(agpop_complete))
## [1] 3059
# Specify the population mean for reference
# Population mean = 308582.4
(agpop_mean <- mean(agpop_complete$ACRES92))
## [1] 308582.4
# Specify the population variance for reference
# Population variance = 1.80891e+11
(agpop_variance <- var(agpop_complete$ACRES92))
## [1] 1.80891e+11
# Specify the population standard deviation for reference
# Population standard deviation = 425312.8
(agpop_sd <- sd(agpop_complete$ACRES92))
## [1] 425312.8
Item 3
##############################################
############ ITEM 3 #############
##############################################
# Create four(4) additional dataframes
# for each of the regions
##############################################
# Specify the number of strata
(H <- nlevels(agpop_complete$REGION))
## [1] 4
# Create data objects for the regions / strata
NCregion <- agpop_complete[agpop_complete$REGION=="NC",]
NEregion <- agpop_complete[agpop_complete$REGION=="NE",]
Sregion <- agpop_complete[agpop_complete$REGION=="S",]
Wregion <- agpop_complete[agpop_complete$REGION=="W",]
# Specify the population size per stratum
# North Central region stratum population size = 1052
(N.NCregion <- nrow(NCregion))
## [1] 1052
# North East region stratum population size = 213
(N.NEregion <- nrow(NEregion))
## [1] 213
# South region stratum population size = 1376
(N.Sregion <- nrow(Sregion))
## [1] 1376
# West region stratum population size = 418
(N.Wregion <- nrow(Wregion))
## [1] 418
# Specify the population mean per stratum for reference
# North Central region stratum population mean = 326570.8
(NCregion_mean <- mean(NCregion$ACRES92))
## [1] 326570.8
# North East region stratum population mean = 93600.31
(NEregion_mean <- mean(NEregion$ACRES92))
## [1] 93600.31
# South region stratum population mean = 200009.2
(Sregion_mean <- mean(Sregion$ACRES92))
## [1] 200009.2
# West region stratum population mean = 730266.9
(Wregion_mean <- mean(Wregion$ACRES92))
## [1] 730266.9
# Specify the population variance per stratum for reference
# North Central region stratum population variance = 7.35429e+10
(NCregion_variance <- var(NCregion$ACRES92))
## [1] 73542921422
# North East region stratum population variance = 6.22619e+09
(NEregion_variance <- var(NEregion$ACRES92))
## [1] 6226188633
# South region stratum population variance = 5.96004e+10
(Sregion_variance <- var(Sregion$ACRES92))
## [1] 59600425689
# West region stratum population variance = 6.99922e+11
(Wregion_variance <- var(Wregion$ACRES92))
## [1] 699922245636
# Specify the population standard deviation per stratum for reference
# North Central region stratum population standard deviation = 271188
(NCregion_sd <- sd(NCregion$ACRES92))
## [1] 271188
# North East region stratum population standard deviation = 78906.2
(NEregion_sd <- sd(NEregion$ACRES92))
## [1] 78906.2
# South region stratum population standard deviation = 244132
(Sregion_sd <- sd(Sregion$ACRES92))
## [1] 244132
# West region stratum population standard deviation = 836613.6
(Wregion_sd <- sd(Wregion$ACRES92))
## [1] 836613.6
Item 4
##############################################
############ ITEM 4 #############
##############################################
# Set the random seed using the
# last 5 digits of your student number
##############################################
# Set random seed
set.seed(89176)
Item 5
##############################################
############ ITEM 5 #############
##############################################
# Obtain an SRS of size 300
# from the dataframe in (2)
##############################################
# Specify the sample size
(n <- 300)
## [1] 300
# Generate the sample indices
set.seed(89176)
(sampleindices <- sample(N,n))
## [1] 602 164 2038 1444 733 1545 2387 234 1927 524 2181 1232 2116 704
## [15] 2867 2191 2641 2343 813 966 616 2089 79 1163 2496 1355 1491 332
## [29] 2355 2878 2722 1586 621 2669 2281 781 3019 71 1652 2731 467 1379
## [43] 162 2674 1542 2876 922 1696 1577 519 545 1767 97 99 881 2534
## [57] 1911 723 3012 333 130 2160 2610 551 247 2499 300 1570 271 2551
## [71] 1020 1899 349 2630 2656 397 1691 2941 2944 1568 2977 2035 122 1429
## [85] 1936 959 1114 2486 639 2834 7 179 537 1450 1666 558 1920 1001
## [99] 2391 1364 268 1057 2672 2718 1113 1251 903 1439 1747 1757 2426 1155
## [113] 823 2798 1019 955 20 336 2440 963 1779 1912 1727 2012 1123 987
## [127] 26 543 1555 2677 57 1842 1917 2190 740 1126 880 1050 2456 2058
## [141] 2872 2412 874 2637 207 1880 106 665 35 2015 2694 1380 1229 2255
## [155] 330 1235 400 2590 2483 576 2250 296 2479 1817 205 2598 1797 2760
## [169] 1306 1070 1520 2654 1075 1605 1308 1623 2799 982 844 2510 1826 789
## [183] 2589 972 780 2042 2787 2506 1081 933 1975 131 1601 2334 2472 1693
## [197] 17 1165 2434 1454 2045 1658 47 1323 2901 2248 315 2103 2724 456
## [211] 3003 2345 1396 1565 735 371 2532 399 1723 884 2962 1383 2608 2491
## [225] 1182 2233 1665 1844 1151 1871 1241 573 1749 1643 2143 2858 1525 1538
## [239] 1793 1583 1497 301 2331 1201 2697 2671 2926 1858 581 756 1869 2306
## [253] 1290 930 72 2619 859 842 2582 1282 775 2392 1141 2273 250 40
## [267] 3031 1857 1271 2080 896 226 2765 2475 1461 1524 598 405 692 892
## [281] 2454 915 701 1481 2554 2292 2948 2840 368 1970 74 1868 2327 999
## [295] 2435 2832 1125 2782 2658 925
# Generate the actual samples
(agpop_sampled <- agpop_complete[sampleindices,])
## ACRES92 REGION
## 606 233217 NC
## 164 4768 W
## 2049 245049 NC
## 1450 250475 NC
## 737 184599 NC
## 1551 96540 S
## 2400 41899 S
## 234 156801 W
## 1933 1797466 W
## 528 238609 NC
## 2192 1457339 W
## 1238 47308 NC
## 2127 419760 S
## 708 217191 NC
## 2884 93364 NE
## 2202 1318447 W
## 2656 678590 S
## 2356 974811 NC
## 817 223328 NC
## 970 111913 S
## 620 357684 NC
## 2100 187175 NC
## 79 18818 S
## 1169 126981 S
## 2509 408710 S
## 1361 286337 NC
## 1497 42712 S
## 335 299699 S
## 2368 1361106 NC
## 2895 82967 W
## 2737 307783 S
## 1592 868064 W
## 625 221209 W
## 2684 98449 S
## 2294 44800 S
## 785 202429 NC
## 3038 115487 S
## 71 141260 S
## 1658 180400 S
## 2746 563183 S
## 471 18644 S
## 1385 255498 NC
## 162 229365 W
## 2689 1555905 S
## 1548 118651 S
## 2893 112085 W
## 926 427403 NC
## 1702 51916 S
## 1583 2232575 W
## 523 214452 W
## 549 312173 NC
## 1773 591185 NC
## 97 223889 S
## 99 37606 S
## 885 403375 NC
## 2547 2001152 S
## 1917 1881764 W
## 727 299709 NC
## 3031 54622 S
## 336 296242 S
## 130 156363 S
## 2171 421233 S
## 2624 354917 S
## 555 343870 NC
## 248 641755 W
## 2512 547829 S
## 302 227202 S
## 1576 1424228 W
## 272 546538 W
## 2564 426189 S
## 1024 23062 S
## 1905 3112271 W
## 352 151242 S
## 2645 346653 S
## 2671 536300 S
## 401 109923 S
## 1697 68736 S
## 2958 92761 NC
## 2961 356651 NC
## 1574 598694 W
## 2996 114184 NC
## 2046 275644 NC
## 122 69422 S
## 1435 111549 NC
## 1942 738041 W
## 963 443802 NC
## 1118 36059 S
## 2499 612718 S
## 643 353528 W
## 2851 71803 S
## 7 167832 S
## 179 164130 W
## 541 308497 NC
## 1456 188595 NC
## 1672 56693 S
## 562 317205 NC
## 1926 770155 W
## 1005 41352 S
## 2404 96181 S
## 1370 420778 NC
## 269 32072 W
## 1061 35712 S
## 2687 362642 S
## 2733 328367 S
## 1117 58730 S
## 1257 121153 NC
## 907 141386 NC
## 1445 368849 NC
## 1753 1128346 NC
## 1763 503575 NC
## 2439 191486 S
## 1159 31583 NE
## 827 32318 NC
## 2815 136320 S
## 1023 98545 S
## 959 484093 NC
## 20 47200 S
## 339 44962 S
## 2453 123557 S
## 967 177858 S
## 1785 649612 NC
## 1918 10 W
## 1733 877382 NC
## 2023 138297 NC
## 1127 97643 S
## 991 144904 S
## 26 111315 S
## 547 368114 NC
## 1561 108236 S
## 2692 391842 S
## 57 155914 S
## 1848 297326 NC
## 1923 1769177 W
## 2201 380464 W
## 744 82426 NC
## 1130 6166 S
## 884 222028 NC
## 1054 105068 S
## 2469 31368 S
## 2069 200405 NC
## 2889 89785 NE
## 2425 135469 S
## 878 486997 NC
## 2652 49579 S
## 207 388084 W
## 1886 11644 NE
## 106 367969 S
## 669 69354 NC
## 35 130063 S
## 2026 179280 NC
## 2709 536507 S
## 1386 232189 NC
## 1235 438914 NC
## 2268 90065 NE
## 333 11738 S
## 1241 210638 NC
## 404 168593 S
## 2604 490578 S
## 2496 962576 S
## 580 266083 NC
## 2262 41347 NE
## 298 36230 S
## 2492 123792 S
## 1823 1387740 NC
## 205 342653 W
## 2612 656961 S
## 1803 265048 NC
## 2775 167374 W
## 1312 269147 NC
## 1074 93887 S
## 1526 62833 S
## 2669 525885 S
## 1079 196701 S
## 1611 1197028 W
## 1314 272049 NC
## 1629 70697 S
## 2816 37044 S
## 986 78966 S
## 848 257351 NC
## 2523 208073 S
## 1832 612694 NC
## 793 148662 NC
## 2603 470096 S
## 976 27836 S
## 784 86236 NC
## 2053 219023 NC
## 2804 85954 S
## 2519 513533 S
## 1085 61145 S
## 937 432326 NC
## 1984 135494 NE
## 131 313232 S
## 1607 1629363 W
## 2347 392935 NC
## 2485 11292 S
## 1699 194015 S
## 17 67950 S
## 1171 80241 S
## 2447 91343 S
## 1460 228936 NC
## 2056 210601 NC
## 1664 156027 S
## 47 207226 S
## 1329 250507 NC
## 2918 92074 W
## 2260 20777 NE
## 318 69405 S
## 2114 300829 S
## 2739 260892 S
## 460 68729 S
## 3022 30015 S
## 2358 373787 NC
## 1402 252783 NC
## 1571 3002378 W
## 739 261482 NC
## 375 168861 S
## 2545 471498 S
## 403 19060 S
## 1729 818893 NC
## 888 442362 NC
## 2980 133197 NC
## 1389 252890 NC
## 2622 383573 S
## 2504 396508 S
## 1188 62242 NE
## 2245 125707 NE
## 1671 75496 S
## 1850 250086 NC
## 1155 25470 NE
## 1877 39844 NE
## 1247 193956 NC
## 577 321950 NC
## 1755 1233663 NC
## 1649 162634 S
## 2154 390957 S
## 2875 43332 S
## 1531 125713 S
## 1544 80342 S
## 1799 1425338 NC
## 1589 349938 W
## 1503 79962 S
## 303 70672 S
## 2344 688081 NC
## 1207 77493 NC
## 2712 926093 S
## 2686 617851 S
## 2943 130051 NC
## 1864 360203 NC
## 585 305685 NC
## 760 115517 NC
## 1875 46056 NE
## 2319 82634 S
## 1296 165961 NC
## 934 449151 NC
## 72 56680 S
## 2634 518028 S
## 863 378517 NC
## 846 181020 NC
## 2596 201952 S
## 1288 61832 NC
## 779 105658 NC
## 2405 91858 S
## 1145 116221 S
## 2286 12408 NE
## 251 878447 W
## 40 191810 S
## 3050 28622 S
## 1863 347598 NC
## 1277 181569 NC
## 2091 19088 NC
## 900 499112 NC
## 226 796892 W
## 2780 25810 S
## 2488 165309 S
## 1467 168586 NC
## 1530 98914 S
## 602 542855 NC
## 409 8151 S
## 696 431415 NC
## 896 517623 NC
## 2467 53026 S
## 919 588061 NC
## 705 662629 NC
## 1487 316617 NC
## 2568 412632 S
## 2305 62108 S
## 2965 529966 NC
## 2857 160973 S
## 372 17105 S
## 1979 195626 NE
## 74 151325 S
## 1874 33935 NE
## 2340 496799 NC
## 1003 86074 S
## 2448 182754 S
## 2849 48889 S
## 1129 57789 S
## 2798 17392 S
## 2673 593819 S
## 929 668420 NC
Item 8
##############################################
############ ITEM 8 #############
##############################################
# Obtain a stratified sample of size 300 from
# the dataframes from (3) allocated according
# to the sizes mentioned in page 75 (Lohr)
##############################################
# Specify the sample size for each stratum
(n.NCregion <- 103)
## [1] 103
(n.NEregion <- 21)
## [1] 21
(n.Sregion <- 135)
## [1] 135
(n.Wregion <- 41)
## [1] 41
# Generate the sample indices for each stratum
set.seed(89176)
(sampleindices.NCregion <- sample(N.NCregion,n.NCregion))
## [1] 207 57 700 496 252 530 818 80 660 180 746 421 722 240
## [15] 978 747 900 798 277 329 209 709 27 395 846 459 505 112
## [29] 796 972 919 535 210 899 768 263 1015 24 555 916 157 462
## [43] 55 895 516 961 308 566 526 173 182 588 32 33 293 841
## [57] 634 1039 562 110 43 714 862 1002 82 824 99 517 89 839
## [71] 335 624 115 863 870 130 554 962 975 512 1023 664 40 465
## [85] 630 312 362 807 208 1022 3 58 174 469 539 1043 620 323
## [99] 771 439 87 340 859
set.seed(89176)
(sampleindices.NEregion <- sample(N.NEregion,n.NEregion))
## [1] 42 12 141 100 51 106 162 16 130 36 146 82 140 47 188 143 171
## [18] 151 53 62 40
set.seed(89176)
(sampleindices.Sregion <- sample(N.Sregion,n.Sregion))
## [1] 271 74 916 649 330 694 1072 105 864 235 977 552 947 315
## [15] 1283 980 1181 1047 363 432 275 932 35 519 1112 604 664 148
## [29] 1359 1280 1210 705 276 1185 1012 347 1339 32 732 1209 207 610
## [43] 72 1182 682 1270 407 748 696 229 240 779 43 44 388 1114
## [57] 840 318 745 146 57 948 1145 242 108 1095 132 687 119 1115
## [71] 446 830 153 1148 1159 173 737 1281 1282 683 1295 885 53 621
## [85] 841 417 484 1078 277 1228 3 78 233 627 721 241 1305 433
## [99] 1032 589 116 456 1152 1171 479 538 1322 619 751 755 1042 496
## [113] 353 1200 437 409 9 144 1044 412 761 817 738 859 1272 421
## [127] 12 231 662 1140 25 783 815 931 1363
set.seed(89176)
(sampleindices.Wregion <- sample(N.Wregion,n.Wregion))
## [1] 83 23 278 196 100 209 323 32 259 71 292 165 282 94 381 291 349
## [18] 309 107 127 81 273 11 152 325 176 193 43 303 370 402 203 80 340
## [35] 290 99 382 9 208 343 59
# Generate the actual samples
(NCregion_sampled <- NCregion[sampleindices.NCregion,])
## ACRES92 REGION
## 775 223429 NC
## 581 349252 NC
## 1743 769225 NC
## 1301 210897 NC
## 820 71596 NC
## 1335 422916 NC
## 1861 1005877 NC
## 604 293266 NC
## 1465 204171 NC
## 748 175847 NC
## 1789 270005 NC
## 1226 233921 NC
## 1765 522536 NC
## 808 130826 NC
## 2395 367239 NC
## 1790 335849 NC
## 2086 202324 NC
## 1841 236950 NC
## 845 121710 NC
## 897 424104 NC
## 777 220057 NC
## 1752 1165695 NC
## 551 261494 NC
## 963 443802 NC
## 2032 4060 NC
## 1264 73661 NC
## 1310 366534 NC
## 680 223764 NC
## 1839 290608 NC
## 2389 1417516 NC
## 2336 787857 NC
## 1340 457670 NC
## 778 227711 NC
## 2085 253383 NC
## 1811 430972 NC
## 831 80078 NC
## 2961 356651 NC
## 548 415104 NC
## 1360 231610 NC
## 2102 216318 NC
## 725 310518 NC
## 1267 129083 NC
## 579 615034 NC
## 2081 96293 NC
## 1321 482991 NC
## 2378 485748 NC
## 876 407464 NC
## 1371 290627 NC
## 1331 744710 NC
## 741 251277 NC
## 750 141703 NC
## 1393 187856 NC
## 556 224811 NC
## 557 401625 NC
## 861 162244 NC
## 2027 99214 NC
## 1439 242018 NC
## 2986 47376 NC
## 1367 237239 NC
## 678 390149 NC
## 567 399155 NC
## 1757 552707 NC
## 2048 113329 NC
## 2948 351633 NC
## 606 233217 NC
## 1867 228167 NC
## 623 353683 NC
## 1322 68778 NC
## 613 241422 NC
## 2025 215796 NC
## 903 479903 NC
## 1429 249046 NC
## 683 40917 NC
## 2049 245049 NC
## 2056 210601 NC
## 698 303715 NC
## 1359 643762 NC
## 2379 2076199 NC
## 2392 615479 NC
## 1317 107810 NC
## 2970 346398 NC
## 1469 54082 NC
## 564 332377 NC
## 1270 108726 NC
## 1435 111549 NC
## 880 302849 NC
## 930 465527 NC
## 1850 250086 NC
## 776 22555 NC
## 2969 8763 NC
## 527 321728 NC
## 582 191291 NC
## 742 443475 NC
## 1274 79921 NC
## 1344 263274 NC
## 2990 348602 NC
## 1425 268447 NC
## 891 316317 NC
## 1814 338136 NC
## 1244 190706 NC
## 611 278922 NC
## 908 517376 NC
## 2045 28983 NC
(NEregion_sampled <- NEregion[sampleindices.NEregion,])
## ACRES92 REGION
## 1874 33935 NE
## 1151 5340 NE
## 2228 19026 NE
## 1983 0 NE
## 1883 2636 NE
## 1989 133854 NE
## 2249 85113 NE
## 1155 25470 NE
## 2216 172366 NE
## 1200 106971 NE
## 2233 39412 NE
## 1964 138620 NE
## 2226 76997 NE
## 1879 34292 NE
## 2276 52760 NE
## 2230 176643 NE
## 2258 160802 NE
## 2238 5095 NE
## 1885 7799 NE
## 1894 58758 NE
## 1872 20910 NE
(Sregion_sampled <- Sregion[sampleindices.Sregion,])
## ACRES92 REGION
## 425 73869 S
## 79 18818 S
## 2437 224247 S
## 1563 342237 S
## 484 37923 S
## 1664 156027 S
## 2594 32436 S
## 110 281895 S
## 2319 82634 S
## 389 33641 S
## 2498 19131 S
## 1166 43320 S
## 2468 37550 S
## 469 51836 S
## 2837 98914 S
## 2501 765139 S
## 2704 187728 S
## 2569 496742 S
## 517 115516 S
## 1032 1501 S
## 429 36074 S
## 2453 123557 S
## 40 191810 S
## 1119 246536 S
## 2635 432379 S
## 1518 89168 S
## 1634 127760 S
## 300 23735 S
## 3038 115487 S
## 2834 6152 S
## 2733 328367 S
## 1675 13310 S
## 430 53944 S
## 2708 632622 S
## 2533 670459 S
## 501 32800 S
## 3018 101214 S
## 37 128357 S
## 1702 51916 S
## 2732 455873 S
## 361 45214 S
## 1524 53401 S
## 77 250819 S
## 2705 507449 S
## 1652 41750 S
## 2824 9559 S
## 1007 127161 S
## 1718 87478 S
## 1666 113654 S
## 383 82549 S
## 394 10192 S
## 2126 662121 S
## 48 199714 S
## 49 138437 S
## 988 112831 S
## 2637 527837 S
## 2295 51020 S
## 472 44470 S
## 1715 167379 S
## 298 36230 S
## 62 112620 S
## 2469 31368 S
## 2668 220355 S
## 396 198184 S
## 113 143104 S
## 2617 322324 S
## 137 195510 S
## 1657 19676 S
## 124 32003 S
## 2638 318658 S
## 1046 5256 S
## 2177 577693 S
## 305 301977 S
## 2671 536300 S
## 2682 402011 S
## 326 244185 S
## 1707 266067 S
## 2835 167858 S
## 2836 21507 S
## 1653 7046 S
## 2849 48889 S
## 2406 30299 S
## 58 144193 S
## 1535 175231 S
## 2296 90995 S
## 1017 197826 S
## 1084 44548 S
## 2600 545664 S
## 431 35387 S
## 2780 25810 S
## 8 177189 S
## 83 313573 S
## 387 5901 S
## 1541 218154 S
## 1691 155213 S
## 395 178861 S
## 2859 125394 S
## 1033 3383 S
## 2553 856707 S
## 1503 79962 S
## 121 79803 S
## 1056 191002 S
## 2675 56975 S
## 2694 572607 S
## 1079 196701 S
## 1138 40181 S
## 3001 76080 S
## 1533 89816 S
## 1721 179554 S
## 1725 38394 S
## 2563 1117134 S
## 1096 327511 S
## 507 88829 S
## 2723 58690 S
## 1037 278675 S
## 1009 206090 S
## 14 109555 S
## 296 24489 S
## 2566 581776 S
## 1012 69711 S
## 2108 513789 S
## 2164 353045 S
## 1708 53690 S
## 2314 72500 S
## 2826 37777 S
## 1021 80692 S
## 17 67950 S
## 385 21697 S
## 1632 144529 S
## 2663 495364 S
## 30 210733 S
## 2130 338529 S
## 2162 328976 S
## 2452 73654 S
## 3042 104194 S
(Wregion_sampled <- Wregion[sampleindices.Wregion,])
## ACRES92 REGION
## 225 1257229 W
## 165 236222 W
## 1937 82100 W
## 1578 2085181 W
## 243 440581 W
## 1591 367482 W
## 2211 694304 W
## 174 597766 W
## 1918 10 W
## 213 318156 W
## 2180 818736 W
## 645 380928 W
## 1941 235826 W
## 237 167106 W
## 2914 32637 W
## 1952 5361 W
## 2770 373582 W
## 2197 720153 W
## 250 103470 W
## 270 1004360 W
## 223 322823 W
## 1932 324476 W
## 153 137834 W
## 632 150021 W
## 2213 139820 W
## 656 224369 W
## 1575 1619482 W
## 185 725118 W
## 2191 1154399 W
## 2903 44742 W
## 3062 2415873 W
## 1585 699409 W
## 222 207448 W
## 2761 209819 W
## 1951 231758 W
## 242 331639 W
## 2915 55360 W
## 151 1151284 W
## 1590 1644001 W
## 2764 58522 W
## 201 783715 W
# Verify that the samples contain the correct size and category
table(NCregion_sampled$REGION)
##
## NC NE S W
## 103 0 0 0
table(NEregion_sampled$REGION)
##
## NC NE S W
## 0 21 0 0
table(Sregion_sampled$REGION)
##
## NC NE S W
## 0 0 135 0
table(Wregion_sampled$REGION)
##
## NC NE S W
## 0 0 0 41
Item 6
##############################################
############ ITEM 6 #############
##############################################
# Obtain an estimate of the population mean
# using eqn. (2.8)
##############################################
# Compute the sample mean ( equation 2.8 )
# Sample mean = 331645.1
# (Reference ) Population mean = 308582.4
(agpop_sampled_mean <- mean(agpop_sampled$ACRES92))
## [1] 331645.1
# Estimated population mean based from simple random sampling is 331645.1
Item 7
##############################################
############ ITEM 7 #############
##############################################
# Obtain a 95% confidence interval for the
# population mean using eqns. (2.12) and (2.22)
##############################################
# Compute the sample variance
# Sample variance = 1.8243e+11
# ( Reference ) Population variance = 1.80891e+11
(agpop_sampled_variance <- var(agpop_sampled$ACRES92))
## [1] 182430448622
# Compute the sample standard deviation
# Sample standard deviation = 427118.8
# ( Reference ) Population standard deviation = 425312.8
(agpop_sampled_stdev <- sd(agpop_sampled$ACRES92))
## [1] 427118.8
sqrt(agpop_sampled_variance)
## [1] 427118.8
# Compute the fpc : 1 - sample size / population size
# fpc = 0.90192
(fpc <- (1-(n/N)))
## [1] 0.9019287
# Compute for sample variance over sample size
# Sample variance over sample size = 6.08101e+08
(agpop_sampled_svariancesize=((agpop_sampled_variance/n)))
## [1] 608101495
# Compute for sample standard error ( equation 2.12 )
# Sample standard error = square root of the product of fpc and sample variance / sample size
# Sample standard error = 23419.31
(agpop_sampled_standarderror <- sqrt(fpc*agpop_sampled_svariancesize))
## [1] 23419.31
# Obtain a 95% confidence interval for the population mean ( equation 2.22 )
# 95% CI ( sample mean - t(0.025,n-1) * standard error, sample mean + t(0.025,n-1) * standard error)
# Set the alpha
(alpha <- (0.05/2))
## [1] 0.025
# Determine the t-value
# tvalue = 1.96793
(tvalue_srs <- qt(1-alpha, df=n-1))
## [1] 1.96793
# Compute for the lower CI
# Lower CI = 285557.6
(agpop_sampled_mean_LowerCI <- agpop_sampled_mean -
(tvalue_srs*agpop_sampled_standarderror))
## [1] 285557.6
# Compute for the upper CI
# Upper CI = 377732.7
(agpop_sampled_mean_UpperCI <- agpop_sampled_mean +
(tvalue_srs*agpop_sampled_standarderror))
## [1] 377732.7
# 95% CI for the population mean using simple random sampling is (285557.6,377732.7)
# Actual population mean is 308582.4
# Estimated population mean based from simple random sampling is 331645.1
Item 9
##############################################
############ ITEM 9 #############
##############################################
# Obtain an estimate of the population mean
# using eqn. (3.2)
##############################################
# Compute the sample mean per stratum
# North Central region stratum sample mean = 327814.8
# ( Reference ) North Central region stratum population mean = 326570.8
(NCregion_sampled_mean <- mean(NCregion_sampled$ACRES92))
## [1] 327814.8
# North East region stratum sample mean = 64609.48
# ( Reference ) North East region stratum population mean = 93600.31
(NEregion_sampled_mean <- mean(NEregion_sampled$ACRES92))
## [1] 64609.48
# South region stratum sample mean = 185847.9
# ( Reference ) South region stratum population mean = 200009.2
(Sregion_sampled_mean <- mean(Sregion_sampled$ACRES92))
## [1] 185847.9
# West Central region stratum sample mean = 550075.7
# ( Reference ) West region stratum population mean = 730266.9
(Wregion_sampled_mean <- mean(Wregion_sampled$ACRES92))
## [1] 550075.7
# Compute the stratified sample mean ( equation 3.2 )
# Stratified sample mean = 275999.1
# (Reference ) Population mean = 308582.4
(agpop_sampled_stratified_mean <- (N.NCregion/N)*NCregion_sampled_mean +
(N.NEregion/N)*NEregion_sampled_mean +
(N.Sregion/N)*Sregion_sampled_mean +
(N.Wregion/N)*Wregion_sampled_mean )
## [1] 275999.1
# Estimated population mean based from stratified random sampling is 275999.1
Item 10
##############################################
############ ITEM 10 #############
##############################################
# Obtain a 95% confidence interval for the
# population mean using the formulas on page 79 (Lohr)
# using the percentile of the t-distribution with
# n-H degrees of freedom. (n = 300, H=4)
##############################################
# Compute the sample variance per stratum ( formula on page 78 )
# North Central region stratum sample variance = 8.17382e+10
# ( Reference ) North Central region stratum population variance = 7.35429e+10
(NCregion_sampled_variance <- var(NCregion_sampled$ACRES92))
## [1] 81738228993
# North East region stratum sample variance = 3.62715e+9
# ( Reference ) North East region stratum population variance = 6.22619e+9
(NEregion_sampled_variance <- var(NEregion_sampled$ACRES92))
## [1] 3627154393
# South region stratum sample variance = 3.96351e+10
# ( Reference ) South region stratum population variance = 5.96004e+10
(Sregion_sampled_variance <- var(Sregion_sampled$ACRES92))
## [1] 39635139107
# West Central region stratum sample variance = 3.36550e+11
# ( Reference ) West region stratum population variance = 6.99922e+11
(Wregion_sampled_variance <- var(Wregion_sampled$ACRES92))
## [1] 336550109765
# Compute the estimated variance of the population mean using equation 3.5
# Estimated variance of the population mean = 2.77235e+08
(agpop_sampled_stratified_varianceofmean <- (
((1-(n.NCregion/N.NCregion)) * (N.NCregion/N)^2 * (NCregion_sampled_variance/n.NCregion)) +
((1-(n.NEregion/N.NEregion)) * (N.NEregion/N)^2 * (NEregion_sampled_variance/n.NEregion)) +
((1-(n.Sregion/N.Sregion)) * (N.Sregion/N)^2 * (Sregion_sampled_variance/n.Sregion)) +
((1-(n.Wregion/N.Wregion)) * (N.Wregion/N)^2 *(Wregion_sampled_variance/n.Wregion))
))
## [1] 277235264
# Compute the standard error of the estimated variance of the population mean
# ( formula on page 79 )
# Standard error of the estimated variance of the population mean = 16650.38
(agpop_sampled_stratified_standarderror <- sqrt(agpop_sampled_stratified_varianceofmean))
## [1] 16650.38
# Obtain a 95% confidence interval for the population mean ( formula on page 79 )
# 95% CI ( sample mean - t(0.025,n-H) * standard error, sample mean + t(0.025,n-H) * standard error)
# n = 300, H = 4
# Set the alpha
(alpha <- (0.05/2))
## [1] 0.025
# Determine the t-value
# tvalue = 1.968011
(tvalue_stratified <- qt(1-alpha, df=n-H))
## [1] 1.968011
# Compute for the lower CI
# Lower CI = 243231
(agpop_sampled_stratified_mean_LowerCI <- agpop_sampled_stratified_mean -
(tvalue_stratified*agpop_sampled_stratified_standarderror))
## [1] 243231
# Compute for the upper CI
# Upper CI = 308767.2
(agpop_sampled_stratified_mean_UpperCI <- agpop_sampled_stratified_mean +
(tvalue_stratified*agpop_sampled_stratified_standarderror))
## [1] 308767.2
# 95% CI for the population mean using stratified random sampling is (243231,308767.2)
# Actual population mean is 308582.4
# Estimated population mean based from stratified random sampling is 275999.1