Exercise using Ratio Estimation
Tasks
(Item1)
Construct a dataset where you extract the ACRES92, ACRES87 and FARMS92 from the agpop.DAT dataset
(Item2)
Eliminate all observations where any of the above variables are missing (i.e. equal to -99)
(Item3)
Set seed equal to the last 5 digits of your student number
(Item4)
Obtain an SRS of size 300 and estimate the population mean of ACRES92 using a ratio estimator with
(Item4.1.) ACRES87 as auxiliary variable
(Item4.2.) FARMS92 as auxiliary variable
Item 1-2
##############################################
############ ITEMS 1-2 #############
##############################################
# Construct a dataset where you extract the
# ACRES92, ACRES87 and FARMS92
# from the agpop.DAT dataset
# Eliminate all observations where
# any of the above variables
# are missing (i.e. equal to -99)
##############################################
# Load the working data
agpop <- read.csv("agpop.dat")
# Initial exploratory analysis
# Check the data dimensions
# 3078 rows and 15 columns
dim(agpop)
## [1] 3078 15
# Generate the data summary
summary(agpop)
## COUNTY STATE ACRES92
## WASHINGTON COUNTY: 30 TX : 254 Min. : -99
## JEFFERSON COUNTY : 25 GA : 159 1st Qu.: 80903
## FRANKLIN COUNTY : 24 KY : 120 Median : 191648
## JACKSON COUNTY : 23 MO : 114 Mean : 306677
## LINCOLN COUNTY : 23 KS : 105 3rd Qu.: 366886
## MADISON COUNTY : 19 IL : 102 Max. :7229585
## (Other) :2934 (Other):2224
## ACRES87 ACRES82 FARMS92 FARMS87
## Min. : -99 Min. : -99 Min. : 0.0 Min. : 0.0
## 1st Qu.: 86236 1st Qu.: 96397 1st Qu.: 295.0 1st Qu.: 318.5
## Median : 199864 Median : 207292 Median : 521.0 Median : 572.0
## Mean : 313016 Mean : 320194 Mean : 625.5 Mean : 678.3
## 3rd Qu.: 372224 3rd Qu.: 377065 3rd Qu.: 838.0 3rd Qu.: 921.0
## Max. :7687460 Max. :7313958 Max. :7021.0 Max. :7590.0
##
## FARMS82 LARGEF92 LARGEF87 LARGEF82
## Min. : 0.0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 345.0 1st Qu.: 8.00 1st Qu.: 8.00 1st Qu.: 8.00
## Median : 616.0 Median : 30.00 Median : 27.00 Median : 25.00
## Mean : 728.1 Mean : 56.18 Mean : 54.86 Mean : 52.62
## 3rd Qu.: 991.0 3rd Qu.: 75.00 3rd Qu.: 70.00 3rd Qu.: 65.00
## Max. :7394.0 Max. :579.00 Max. :596.00 Max. :546.00
##
## SMALLF92 SMALLF87 SMALLF82 REGION
## Min. : 0.00 Min. : 0.00 Min. : 0.00 NC:1054
## 1st Qu.: 13.00 1st Qu.: 17.00 1st Qu.: 16.00 NE: 220
## Median : 29.00 Median : 35.00 Median : 34.00 S :1382
## Mean : 54.09 Mean : 59.54 Mean : 60.97 W : 422
## 3rd Qu.: 59.00 3rd Qu.: 67.00 3rd Qu.: 67.00
## Max. :4298.00 Max. :3654.00 Max. :3522.00
##
# Count the number of rows with missing values for the ACRES92 column
# 19 rows with missing values
nrow(agpop[agpop$ACRES92==-99,])
## [1] 19
# Count the number of rows with missing values for the ACRES87 column
# 23 rows with missing values
nrow(agpop[agpop$ACRES87==-99,])
## [1] 23
# Count the number of rows with missing values for the FARMS92 column
# 0 rows with missing values
nrow(agpop[agpop$FARMS92==-99,])
## [1] 0
# Remove missing values and only keep the needed columns
agpop_complete <- agpop[agpop$ACRES92!=-99,c("ACRES92","ACRES87","FARMS92")]
# Remove missing values and only keep the needed columns
agpop_complete <- agpop_complete[agpop_complete$ACRES87!=-99,c("ACRES92","ACRES87","FARMS92")]
# Check the data dimensions
# 3044 rows and 3 columns
dim(agpop_complete)
## [1] 3044 3
(N <- nrow(agpop_complete))
## [1] 3044
# Double check if all missing rows have been indeed removed
# Count the number of rows with missing values for the ACRES92 column
# 0 rows with missing values
nrow(agpop_complete[agpop_complete$ACRES92==-99,])
## [1] 0
# Count the number of rows with missing values for the ACRES92 column
# 0 rows with missing values
nrow(agpop_complete[agpop_complete$ACRES87==-99,])
## [1] 0
# Count the number of rows with missing values for the ACRES92 column
# 0 rows with missing values
nrow(agpop_complete[agpop_complete$FARMS92==-99,])
## [1] 0
# Generate the population means
# Population mean for ACRES92 is 309900.4
(ACRES92_popmean <- mean(agpop_complete$ACRES92))
## [1] 309900.4
# Population mean for ACRES87 is 316094.7
(ACRES87_popmean <- mean(agpop_complete$ACRES87))
## [1] 316094.7
# Population mean for FARMS92 is 631.7424
(FARMS92_popmean <- mean(agpop_complete$FARMS92))
## [1] 631.7424
Item 3
##############################################
############ ITEM 3 #############
##############################################
# Set seed equal to the last 5 digits
# of your student number
##############################################
# Set the seed numbers
(seedSRS <- 89176)
## [1] 89176
Item 4
##############################################
############ ITEM 4 #############
##############################################
# Obtain an SRS of size 300
# and estimate the population mean of ACRES92
# using a ratio estimator with
# ACRES87 as auxiliary variable
# FARMS92 as auxiliary variable
##############################################
# Specify the sample size
(n <- 300)
## [1] 300
# Verify the population size
# Population size is 3044
(N)
## [1] 3044
# Generate the sample indices
set.seed(seedSRS)
(sampleindices <- sample(N,n))
## [1] 599 163 2028 1437 730 1538 2375 233 1917 522 2170 1226 2105 700
## [15] 2853 2180 2628 2332 809 962 613 2079 79 1157 2484 1348 1483 330
## [29] 2343 2864 2709 1578 618 2656 2269 778 3004 71 1644 2717 465 1372
## [43] 161 2661 1534 2862 918 1687 1569 516 542 1758 96 98 877 2521
## [57] 1901 719 1688 331 129 2149 2597 548 246 2487 298 1562 269 2538
## [71] 1015 1890 347 2617 2643 395 1683 2926 2930 1560 2962 2024 122 1421
## [85] 1926 954 1109 2473 636 2820 7 178 534 1443 1657 555 1910 996
## [99] 2379 1357 267 1051 2659 2705 1107 1245 898 1432 1738 1748 2413 1149
## [113] 819 2784 1013 950 20 334 2427 958 1770 1902 1718 2002 1118 982
## [127] 26 540 1547 2664 57 1832 1907 2179 736 1121 876 1044 2443 2047
## [141] 2857 2400 869 2624 206 1871 105 662 35 2005 2680 1373 1222 2244
## [155] 328 1228 398 2577 2471 573 2238 294 2466 1807 204 2585 1788 2746
## [169] 1299 1064 1512 2640 1069 1597 1301 1615 2785 977 840 2497 1816 785
## [183] 2575 967 776 2032 2772 2492 1076 928 1965 130 1593 2322 2459 1684
## [197] 17 1159 2421 1446 2035 1649 47 1316 2470 2237 313 2092 3014 454
## [211] 2988 2333 1389 1556 731 370 2518 397 1714 879 2947 1376 2595 2478
## [225] 1175 2221 2950 1834 1145 1861 1235 570 1740 1634 2132 2843 1517 1530
## [239] 1784 1575 1489 300 2318 1195 2683 2657 2178 1848 577 752 1859 2293
## [253] 1283 925 72 2605 854 838 2568 1275 771 2946 1135 2261 249 40
## [267] 2342 1847 1264 2069 892 225 2750 2462 1453 1515 595 403 689 887
## [281] 2441 910 697 1473 2540 2279 1148 2825 366 1959 74 1858 2315 993
## [295] 2846 2817 1119 2767 2644 920
# Generate the actual samples
(agpop_sampled <- agpop_complete[sampleindices,])
## ACRES92 ACRES87 FARMS92
## 606 233217 230461 913
## 163 286288 241276 482
## 2050 230988 251969 1153
## 1449 316809 300812 1171
## 737 184599 184566 491
## 1550 181946 177963 125
## 2399 1406379 1397710 258
## 234 156801 150334 131
## 1933 1797466 1805222 485
## 529 268506 268437 740
## 2192 1457339 1519876 442
## 1236 193688 207726 835
## 2127 419760 404416 1307
## 707 258014 262198 772
## 2882 24848 28234 108
## 2202 1318447 1381625 1186
## 2654 547428 548293 550
## 2356 974811 1009978 298
## 816 158788 172226 849
## 969 90033 89986 767
## 620 357684 377025 1495
## 2101 302456 318255 1089
## 79 18818 19659 116
## 1167 83232 92806 840
## 2508 416631 422998 1622
## 1360 231610 234126 819
## 1495 151743 173064 394
## 335 299699 329388 728
## 2367 545064 486467 240
## 2893 112085 115566 1240
## 2735 644730 685935 1273
## 1590 1644001 1722206 641
## 625 221209 207722 271
## 2682 402011 546742 163
## 2293 74733 85937 279
## 785 202429 209556 731
## 3038 115487 118540 355
## 71 141260 156950 235
## 1658 180400 182498 376
## 2743 461127 402967 1795
## 472 44470 54722 157
## 1384 181292 189383 754
## 161 2108834 2358559 463
## 2687 362642 318164 215
## 1546 86096 87159 374
## 2891 274546 274119 142
## 925 409839 396556 516
## 1701 144858 154350 1293
## 1581 135126 152109 70
## 523 214452 224153 437
## 549 312173 328319 944
## 1772 1269572 1300508 988
## 96 168755 165498 727
## 98 42794 41293 371
## 884 222028 223426 820
## 2545 471498 499983 179
## 1916 2149450 2220431 544
## 726 402310 444816 1308
## 1702 51916 54858 222
## 336 296242 311074 1654
## 129 122871 112409 791
## 2171 421233 435566 1093
## 2622 383573 403124 1053
## 555 343870 334112 1653
## 248 641755 643050 253
## 2511 629681 593971 123
## 302 227202 214364 214
## 1574 598694 536553 599
## 272 546538 505471 240
## 2562 518788 589050 195
## 1022 44709 55183 564
## 1905 3112271 2991513 592
## 352 151242 166766 328
## 2643 558553 517379 707
## 2669 525885 505366 1378
## 402 25802 27899 191
## 1697 68736 75808 226
## 2955 218145 228959 720
## 2959 170228 183626 893
## 1572 2338866 2433747 514
## 2994 86091 100728 324
## 2046 275644 288175 1032
## 122 69422 80104 387
## 1433 199292 201016 1016
## 1942 738041 833913 200
## 961 471658 529749 283
## 1116 27469 26993 175
## 2497 103063 103622 669
## 643 353528 401677 285
## 2849 48889 41268 149
## 7 167832 192082 941
## 178 775829 702173 1092
## 541 308497 317974 821
## 1455 323465 336976 968
## 1671 75496 91744 195
## 562 317205 319657 853
## 1926 770155 788473 345
## 1003 86074 96982 739
## 2403 93098 88616 502
## 1369 249731 252824 663
## 270 1004360 882165 530
## 1058 159794 161234 1019
## 2685 497106 445493 561
## 2731 242901 276750 907
## 1114 87574 89425 677
## 1256 73437 76193 402
## 905 271713 263592 981
## 1444 201714 206871 849
## 1752 1165695 1122980 741
## 1762 485012 487285 451
## 2437 224247 222142 432
## 1157 37477 46747 410
## 826 184118 197875 993
## 2812 73097 71550 332
## 1020 144828 144862 617
## 957 687593 704788 1163
## 20 47200 49177 338
## 339 44962 48999 277
## 2451 275219 279482 1578
## 965 265978 253967 362
## 1784 437826 442540 787
## 1917 1881764 1894215 338
## 1732 559385 609823 462
## 2024 122480 117130 689
## 1125 193137 195787 431
## 989 192189 201861 1489
## 26 111315 118184 458
## 547 368114 376952 1362
## 1559 230524 245244 121
## 2690 667177 529092 230
## 57 155914 159757 1129
## 1846 532901 570445 479
## 1923 1769177 1635787 586
## 2201 380464 391692 1948
## 743 67998 75234 246
## 1128 247106 244811 570
## 883 201798 216179 509
## 1051 136869 137344 1067
## 2467 53026 58327 219
## 2069 200405 209643 1017
## 2886 132674 140177 493
## 2424 70457 78611 453
## 876 407464 397383 613
## 2650 515960 543283 207
## 207 388084 377352 844
## 1885 7799 10033 188
## 105 183895 173516 651
## 669 69354 73076 171
## 35 130063 127653 767
## 2027 99214 115999 821
## 2706 247626 249326 1609
## 1385 255498 260645 734
## 1232 277400 282467 914
## 2268 90065 95605 293
## 333 11738 17507 71
## 1238 47308 47988 206
## 405 3046 3841 51
## 2602 660412 762442 1565
## 2495 352488 350886 1488
## 580 266083 266090 872
## 2261 44425 56734 461
## 298 36230 41178 315
## 2490 125092 135209 617
## 1821 401978 403549 273
## 205 342653 347504 1057
## 2610 345138 348386 1904
## 1802 658572 709723 876
## 2772 1294703 1318672 716
## 1311 379603 389539 1540
## 1071 154082 164293 971
## 1524 53401 57612 362
## 2666 73948 75350 349
## 1076 165015 174061 653
## 1609 683088 704878 165
## 1313 79183 91078 683
## 1627 52974 57431 507
## 2813 24478 25831 111
## 984 137337 137781 694
## 847 79235 86245 654
## 2521 548351 509782 980
## 1830 446007 474848 206
## 792 229097 234599 633
## 2600 545664 615426 229
## 974 80864 84750 798
## 783 222435 228419 1181
## 2054 74461 79180 385
## 2799 68326 72611 319
## 2516 2405018 2377767 122
## 1083 140432 136561 466
## 935 582053 542578 547
## 1985 242637 285731 1051
## 130 156363 156212 879
## 1605 1968857 1938423 479
## 2346 236608 236960 437
## 2483 183178 190772 588
## 1698 115854 117441 476
## 17 67950 84626 215
## 1169 126981 132804 588
## 2445 54518 62446 828
## 1458 138986 139937 494
## 2057 17138 18335 272
## 1663 8882 7533 147
## 47 207226 223190 910
## 1328 395023 368115 947
## 2494 214497 216162 1637
## 2260 20777 26898 147
## 318 69405 73603 224
## 2114 300829 318542 833
## 3048 47366 48770 238
## 461 121588 131334 288
## 3020 19956 21369 158
## 2357 560057 575695 326
## 1401 201670 200323 680
## 1568 78230 91427 263
## 738 392639 415755 1141
## 377 213943 216594 558
## 2542 571684 655499 391
## 404 168593 163114 342
## 1728 777675 816265 635
## 886 324063 340899 382
## 2978 265731 281891 980
## 1388 163076 178651 567
## 2620 330173 312129 1763
## 2502 337351 347215 1748
## 1185 123932 137529 809
## 2244 88982 99920 438
## 2981 270930 291181 1094
## 1848 297326 332862 725
## 1153 34235 42562 523
## 1875 46056 47923 173
## 1246 18047 18555 101
## 577 321950 322206 1112
## 1754 531643 557568 527
## 1646 53902 50446 179
## 2154 390957 388174 704
## 2872 2358 3374 55
## 1529 262371 277137 261
## 1542 93180 123870 554
## 1798 345739 359241 1079
## 1587 1730537 1646324 376
## 1501 294547 260026 230
## 304 86026 83994 210
## 2342 1243168 1104452 541
## 1205 16099 16140 58
## 2709 536507 474001 434
## 2683 1695484 1890612 151
## 2200 34292 35230 283
## 1862 217228 241886 557
## 584 347599 342938 1194
## 759 488215 487699 1112
## 1873 25439 26574 155
## 2317 128124 124284 666
## 1295 113422 132863 509
## 932 547369 545417 536
## 72 56680 57923 559
## 2630 698832 653556 128
## 861 162244 165339 759
## 845 121710 132099 500
## 2593 313952 330751 303
## 1287 168073 178100 563
## 778 227711 221878 804
## 2977 282405 315416 1324
## 1142 61883 65151 425
## 2285 9631 10356 120
## 251 878447 996785 309
## 40 191810 207817 753
## 2366 502469 490333 931
## 1861 1005877 1122369 327
## 1275 3786 4159 36
## 2091 19088 19808 260
## 899 532890 538449 246
## 226 796892 855503 268
## 2776 105576 101622 189
## 2486 49452 53305 541
## 1465 204171 218426 783
## 1527 95736 96008 600
## 602 542855 543881 1441
## 410 184137 179393 314
## 696 431415 459120 1165
## 894 512728 503589 400
## 2465 257000 258567 697
## 917 485656 469908 842
## 704 453944 485142 1438
## 1485 289729 290980 1541
## 2564 426189 392585 1521
## 2303 156853 155717 370
## 1156 74484 82864 519
## 2854 24924 29758 272
## 373 62983 68705 201
## 1979 195626 212804 699
## 74 151325 154580 300
## 1872 20910 21479 147
## 2339 1026353 992938 1089
## 1000 147154 155594 836
## 2875 43332 39358 156
## 2846 297064 300699 1389
## 1126 210570 196324 847
## 2794 51604 59527 181
## 2670 30268 64047 225
## 927 441417 436761 1102
# Check the dimension
(dim(agpop_sampled))
## [1] 300 3
# Generate the sample means
# Sample mean for ACRES92 is 351725.5
(ACRES92_samplemean <- mean(agpop_sampled$ACRES92))
## [1] 351725.5
# Sample mean for ACRES87 is 358120.2
(ACRES87_samplemean <- mean(agpop_sampled$ACRES87))
## [1] 358120.2
# Sample mean for FARMS92 is 629.43
(FARMS92_samplemean <- mean(agpop_sampled$FARMS92))
## [1] 629.43
# Estimate the population mean for ACRES92 using ACRES87 as auxiliary variable
# Estimated population mean for ACRES92
# using ACRES87 as auxiliary variable is 310450.4
# For refernce, actual ACRES92 population mean is 309900.4
(ACRES92_estimatedmeanusingACRES87 <- (ACRES92_samplemean/ACRES87_samplemean)*ACRES87_popmean)
## [1] 310450.4
# Estimate the population mean for ACRES92 using FARMS92 as auxiliary variable
# Estimated population mean for ACRES92
# using FARMS92 as auxiliary variable is 353017.7
# For refernce, actual ACRES92 population mean is 309900.4
(ACRES92_estimatedmeanusingFARMS92 <- (ACRES92_samplemean/FARMS92_samplemean)*FARMS92_popmean)
## [1] 353017.7