title: “Assessment 1 17122” output: html_document —
library(ggplot2)
library(dplyr)
library(magrittr)
library(data.table)
library(readxl) # Load the readxl package
library(tidyr)
library(stringr)
library(openxlsx)
library(readxl)
library("Hmisc")
library(lattice)
library(latticeExtra)
library(tidyverse)
A1a <- read_excel("pop_dataset.xlsx", sheet = 1)
A1 <- A1a[order(A1a$region), ] #sort original regions into numerical order
#Question 1
dim (A1)
head(A1)
# 1.3 Print out the number of unique regions in the dataset (500 unique regions, each with 112 observations).
#To create a dataframe with region numbered from 1 to 500
df1 <-aggregate(A1$region, by=list(A1$region), FUN=length) #Aggregate regions into groups
# 1.4 What is the minimum age bin? # 0 year
A1
## # A tibble: 56,000 x 4
## region age gender population
## <chr> <dbl> <chr> <dbl>
## 1 SSC20005 0 M 0
## 2 SSC20005 0 F 0
## 3 SSC20005 1 M 0
## 4 SSC20005 1 F 0
## 5 SSC20005 2 M 0
## 6 SSC20005 2 F 0
## 7 SSC20005 3 M 0
## 8 SSC20005 3 F 0
## 9 SSC20005 4 M 0
## 10 SSC20005 4 F 0
## # ... with 55,990 more rows
# 1.5 What is the maximum age bin? # 55 years
# 1.6 What is the bin size for the age field? # 1 year
#Question 2.1 Expected vale of age for whole data sample
# 2.1 Use the expected value for the age to find the mean age for the whole data sample. (Ans: 27.8)
pop_tot <- sum(A1$population) #Total population
pop_tot
## [1] 796015
df2 <-aggregate(A1[,4], by=list(A1$age), FUN=sum) #Population for each age for all regions
str(df2)
## 'data.frame': 56 obs. of 2 variables:
## $ Group.1 : num 0 1 2 3 4 5 6 7 8 9 ...
## $ population: num 13179 13810 13645 14189 13833 ...
prob=df2$population/pop_tot #Probability for each age
age=df2$Group.1 #Ages from 0 to 55
ExpV_age <-sum(prob * age) # Sum of prob x age
ExpV_age #2.1 Expected value for the age (mean age) for the whole population
## [1] 27.80027
Question 2.2 Standard Deviation for whole data sample
## 2.2 Provide the standard deviation for the whole data sample. Ans=Sample Standard Deviation= 15.778,the same as the population Standard Deviation of 15.778 to 3dp.
agesq=df2$Group.1^2
agesq
## [1] 0 1 4 9 16 25 36 49 64 81 100 121 144 169 196
## [16] 225 256 289 324 361 400 441 484 529 576 625 676 729 784 841
## [31] 900 961 1024 1089 1156 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936
## [46] 2025 2116 2209 2304 2401 2500 2601 2704 2809 2916 3025
ExpV_age1 <-sum(prob * agesq)
ExpV_age1
## [1] 1021.801
ExpV_age2 <- ExpV_age^2
ExpV_age2
## [1] 772.8548
# Sample Variance
SampleVar <-ExpV_age1 - ExpV_age2
SampleVar
## [1] 248.9465
SampleSDAge <- SampleVar^0.5
SampleSDAge
## [1] 15.77804
# Population Variance
N <- 56000 # N = number of observations of Sample
Pop_Var <- N * SampleVar/(N - 1)
Pop_Var
## [1] 248.9509
#Standard Deviation for the age for the whole sample
PopSDAge <- Pop_Var^0.5
PopSDAge
## [1] 15.77818
Question 3 Statistics of mean age for each region
options(digits=5)
Age <-A1[c(1:112), 2] #Range of ages 0 to 55, this the same for all regions
# To calculate the weighted mean of the ages for each region
i <- 1
Region <- 1
format(round(Region, 0), nsmall = 0)
## [1] "1"
WMStore <- list() # Create empty list to store weighted means for each age
while (i < 56000) {
j <- i + 111
Pop <- A1[c(i:j), 4] # Population for each region, 112 observations per region
Pop
WM <- weighted.mean(Age, Pop)
WM # weighted mean for each region
WMStore[[Region]] <- WM # WMStore contains the weighted mean for all the 500 regions
Region <- Region + 1
i <- i+112
if (i ==56000){
break
}
}
WMS <- unlist(WMStore)
WMS # This contains the weighted mean for each of the regions
df_WMS <- as.data.frame((as.data.frame(WMS))) # This is a dataframe of the weighted mean in a column of 500 rows
mean(WMS) # 3.1 Mean=30.608 sd(WMS) # 3.2 SD=7.996 min(WMS) # 3.3 Minimum = 2 quantile(WMS,0.25) # 3.4 First Quartile = 27.426 quantile(WMS,0.50) # 3.5 Median = 29.232 quantile(WMS,0.75) # 3.6 Third Quartile = 33.35 max(WMS) # 3.7 Maximum = 55 IQR(WMS) # 3.8 IQR = 5.924
#3.6 Histogram of the distribution of means
hist(WMS, xlab=“Weighted Mean of Age”, main=“Distribution of means from each region”)
df3S <-aggregate(A1[,4], by=list(A1$region), FUN=sum) # population for each region
df3Smin <- filter(df3S, df3S$population == min(df3S$population)) # Find all regions with smallest population
df3Smin #Find all regions with smallest population, SSC20099 is one of them with 3 people
## Group.1 population
## 1 SSC20099 3
## 2 SSC20127 3
## 3 SSC20151 3
## 4 SSC20346 3
## 5 SSC20383 3
## 6 SSC20398 3
## 7 SSC20422 3
## 8 SSC20502 3
## 9 SSC20503 3
## 10 SSC20516 3
## 11 SSC21012 3
## 12 SSC21026 3
## 13 SSC21037 3
## 14 SSC21283 3
## 15 SSC22012 3
## 16 SSC22021 3
## 17 SSC22028 3
## 18 SSC22070 3
## 19 SSC22084 3
## 20 SSC22157 3
## 21 SSC22193 3
## 22 SSC22237 3
## 23 SSC22281 3
## 24 SSC22367 3
## 25 SSC22597 3
## 26 SSC22605 3
## 27 SSC22606 3
## 28 SSC22616 3
## 29 SSC22698 3
## 30 SSC22702 3
## 31 SSC22719 3
## 32 SSC22772 3
## 33 SSC22809 3
## 34 SSC22840 3
## 35 SSC22873 3
#Question 5.1 Distribution of Ages
df4S <-aggregate(A1[,4], by=list(A1$region), FUN=sum) # population for each region
df4Smax <- filter(df4S, df4S$population == max(df4S$population)) # Find region with largest population
df4Smax #Find all regions with largest population, and SSC22015 is the largest with 37948 people
## Group.1 population
## 1 SSC22015 37948
RL<- A1 %>% filter_at(vars(region), any_vars(. %in% c('SSC22015'))) #Data in the region SSC22015 which has the largest [population]
RLC <-aggregate(RL[,4], by=list(RL$age), FUN=sum) #add male and female population for each age
#Distribution of Ages for region SSC22015
p1 <- ggplot(RLC, aes(x = age, y = population)) +
geom_line()
p1
print(p1 + ggtitle("Distribution of ages for region with most people"))
#Question 5.2 Cumulative Distribution of Ages
p2<- ggplot(RLC, aes(x = age)) +
stat_ecdf(geom = "step")
stat_ecdf()
## geom_step: na.rm = FALSE
## stat_ecdf: n = NULL, pad = TRUE, na.rm = FALSE
## position_identity
p3 <- print(p2 + ggtitle("Cumulative Distribution of ages for region with most people"))
p3
# Question 5.3 Cumulative distribution for males and females, maybe not used/finished
RLS <- A1a[order(RL$gender), ] #Population data for largest region in the order of gender
RLSF <- RLS[1:56, ] #Data for Female
RLSM <- RLS[57:112, ] #Data for Male
#Q5.1 (Alt) #Probably not used
str(RLS)
## tibble [112 x 4] (S3: tbl_df/tbl/data.frame)
## $ region : chr [1:112] "SSC21184" "SSC21184" "SSC21184" "SSC21184" ...
## $ age : num [1:112] 0 1 2 3 4 5 6 7 8 9 ...
## $ gender : chr [1:112] "F" "F" "F" "F" ...
## $ population: num [1:112] 95 107 120 125 117 147 137 130 157 161 ...
x1 <- RLS$age
x1
## [1] 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## [26] 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
## [51] 50 51 52 53 54 55 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [76] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
## [101] 44 45 46 47 48 49 50 51 52 53 54 55
#str(x)
y1 <- RLSF$population # Female
y2 <- RLSM$population # Male
# merge two data frames by age
RLS <- A1a[order(RL$gender), ]
RLSF <- RLS[1:56, ] #Female
RLSM <- RLS[57:112, ] #Male
RLSF2<- RLSF[ , c(2, 4)]
RLSM2<- RLSM[ , c(2, 4)]
RLFM <- merge(RLSF2,RLSM2,by="age")
str(RLFM)
## 'data.frame': 56 obs. of 3 variables:
## $ age : num 0 1 2 3 4 5 6 7 8 9 ...
## $ population.x: num 95 107 120 125 117 147 137 130 157 161 ...
## $ population.y: num 114 88 122 123 114 140 131 127 139 149 ...
colnames(RLFM)[2] <- "F"
colnames(RLFM)[3] <- "M"
#May need more development to finish Q5.3 using this approach
#p1A <- ggplot(RLFM, aes(x = age, y = F)) +
#geom_line()
#p1A
#Question 5.3 (Alternative 1) not using ggplot and check to see if I can change colour and add title
set.seed(42)
RLSF <- RLS[1:56, ] #Data for Female
RLSM <- RLS[57:112, ] #Data for Male
Female <- RLSF$population #Population for female
Male <- RLSM$population #Population for male
gender <- data.frame(Female, Male)
ecdfplot(~ Female + Male, data=gender, auto.key=list(space='right'))
#Question 5.3 (Alternative 2) Use ggplot
set.seed(1234)
RLSF <- RLS[1:56, ] #Female
RLSM <- RLS[57:112, ] #Male
Female <- RLSF$population
Male <- RLSM$population
# create a random data frame
gender <- data.frame(x = c(Female, Male), group = gl(2, 56))
#gender1<- as.numeric(gender$group)
gender$group <- as.numeric(gender$group)
gender$group[1:56] <- "Female"
gender$group[57:112] <- "Male"
p15 <- ggplot(gender, aes(x = x, col = group)) + stat_ecdf()
p16<- print(p15 + ggtitle("Cumulative Distribution of females and males population") + labs(x = "Population per Region",
y = "Probability"))
p16
# stat_ecdf() function is used to plot ECDF plot
#Question 6.1 Ratio of old to young vs population scatter plot
options(digits=5)
# To calculate the population young and old, ratio old/young for each region
i <- 1
m <- 81
Region <- 1
format(round(Region, 0), nsmall = 0)
## [1] "1"
PopTotStore <- list() # Create empty list to store total population for each region
RatioStore <- list() # Create empty list to store ratio for each region
while (i < 56000) {
j <- i + 79
n <- m + 31
Popyoung <- A1[c(i:j), 4]
# Population for young, 80 observations per region
Popold <- A1[c(m:n), 4] # Population for olf, 32 observations per region
PopyoungS <- sum(Popyoung)
PopoldS <- sum(Popold)
#print(Popold)
PopTotStore [[Region]] <- PopyoungS + PopoldS
#print(PopTotStore)
RatioStore [[Region]] <- PopoldS/PopyoungS
#print(RatioStore)
Region <- Region + 1
i <- i + 112
m <- m + 112
if (i == 56000) {
break
if (m == 56000) {
break
}
}
}
PopTotStore <- unlist(PopTotStore) #This contains the total Population for each of the regions
RatioStore <- unlist(RatioStore) #This contains the ratio for each of the regions
PopTot <- as.data.frame((as.data.frame(PopTotStore))) # This is a dataframe of the total population for each region
PopTot$Region <- 1:nrow(PopTot)
PopTot
## PopTotStore Region
## 1 33 1
## 2 425 2
## 3 100 3
## 4 1137 4
## 5 51 5
## 6 924 6
## 7 359 7
## 8 5821 8
## 9 4978 9
## 10 3 10
## 11 38 11
## 12 17 12
## 13 2049 13
## 14 3 14
## 15 1826 15
## 16 270 16
## 17 3 17
## 18 2643 18
## 19 178 19
## 20 12 20
## 21 314 21
## 22 6666 22
## 23 24 23
## 24 15 24
## 25 3439 25
## 26 3168 26
## 27 18 27
## 28 9837 28
## 29 10 29
## 30 41 30
## 31 9 31
## 32 60 32
## 33 54 33
## 34 89 34
## 35 8 35
## 36 27 36
## 37 161 37
## 38 52 38
## 39 6 39
## 40 138 40
## 41 58 41
## 42 3391 42
## 43 57 43
## 44 9116 44
## 45 9046 45
## 46 29 46
## 47 21 47
## 48 60 48
## 49 3104 49
## 50 36 50
## 51 3 51
## 52 112 52
## 53 61 53
## 54 38 54
## 55 16 55
## 56 9802 56
## 57 11357 57
## 58 40 58
## 59 104 59
## 60 123 60
## 61 6 61
## 62 3 62
## 63 159 63
## 64 103 64
## 65 211 65
## 66 3 66
## 67 21 67
## 68 2615 68
## 69 6 69
## 70 3555 70
## 71 3 71
## 72 75 72
## 73 52 73
## 74 5 74
## 75 166 75
## 76 3735 76
## 77 1188 77
## 78 120 78
## 79 213 79
## 80 16705 80
## 81 39 81
## 82 3 82
## 83 3 83
## 84 29 84
## 85 190 85
## 86 23 86
## 87 3 87
## 88 3772 88
## 89 1282 89
## 90 7 90
## 91 194 91
## 92 6803 92
## 93 5402 93
## 94 15972 94
## 95 7 95
## 96 239 96
## 97 3363 97
## 98 198 98
## 99 9729 99
## 100 5016 100
## 101 1011 101
## 102 1790 102
## 103 7287 103
## 104 132 104
## 105 54 105
## 106 48 106
## 107 494 107
## 108 2306 108
## 109 9 109
## 110 12 110
## 111 17442 111
## 112 14 112
## 113 58 113
## 114 427 114
## 115 18 115
## 116 148 116
## 117 6461 117
## 118 211 118
## 119 1082 119
## 120 45 120
## 121 117 121
## 122 18540 122
## 123 12 123
## 124 171 124
## 125 2563 125
## 126 219 126
## 127 16 127
## 128 284 128
## 129 50 129
## 130 3981 130
## 131 1367 131
## 132 568 132
## 133 1752 133
## 134 18 134
## 135 847 135
## 136 72 136
## 137 7866 137
## 138 239 138
## 139 17809 139
## 140 6 140
## 141 15608 141
## 142 1434 142
## 143 88 143
## 144 9 144
## 145 19340 145
## 146 1055 146
## 147 862 147
## 148 12350 148
## 149 132 149
## 150 89 150
## 151 79 151
## 152 14 152
## 153 15 153
## 154 9 154
## 155 12 155
## 156 547 156
## 157 69 157
## 158 32 158
## 159 65 159
## 160 3 160
## 161 210 161
## 162 5 162
## 163 54 163
## 164 3 164
## 165 130 165
## 166 37 166
## 167 3 167
## 168 17140 168
## 169 1554 169
## 170 7 170
## 171 147 171
## 172 475 172
## 173 468 173
## 174 4 174
## 175 6 175
## 176 46 176
## 177 76 177
## 178 286 178
## 179 67 179
## 180 6281 180
## 181 20939 181
## 182 61 182
## 183 6550 183
## 184 5 184
## 185 217 185
## 186 19180 186
## 187 11218 187
## 188 616 188
## 189 44 189
## 190 1288 190
## 191 22 191
## 192 13730 192
## 193 14546 193
## 194 6 194
## 195 7 195
## 196 9 196
## 197 663 197
## 198 30 198
## 199 6 199
## 200 1908 200
## 201 6 201
## 202 2575 202
## 203 25 203
## 204 183 204
## 205 2661 205
## 206 84 206
## 207 18 207
## 208 229 208
## 209 304 209
## 210 128 210
## 211 35 211
## 212 41 212
## 213 3 213
## 214 808 214
## 215 24 215
## 216 2386 216
## 217 12 217
## 218 9818 218
## 219 21 219
## 220 2289 220
## 221 41 221
## 222 4978 222
## 223 5033 223
## 224 18 224
## 225 2391 225
## 226 18 226
## 227 1207 227
## 228 211 228
## 229 12 229
## 230 52 230
## 231 9 231
## 232 249 232
## 233 22 233
## 234 18 234
## 235 176 235
## 236 127 236
## 237 12 237
## 238 102 238
## 239 88 239
## 240 1094 240
## 241 2548 241
## 242 310 242
## 243 22 243
## 244 942 244
## 245 50 245
## 246 74 246
## 247 73 247
## 248 698 248
## 249 28 249
## 250 323 250
## 251 988 251
## 252 376 252
## 253 538 253
## 254 1799 254
## 255 135 255
## 256 7282 256
## 257 70 257
## 258 3504 258
## 259 21 259
## 260 8 260
## 261 528 261
## 262 28 262
## 263 63 263
## 264 22 264
## 265 289 265
## 266 1148 266
## 267 36 267
## 268 51 268
## 269 74 269
## 270 750 270
## 271 12 271
## 272 440 272
## 273 81 273
## 274 2960 274
## 275 22979 275
## 276 1213 276
## 277 10 277
## 278 23 278
## 279 15 279
## 280 12 280
## 281 129 281
## 282 6 282
## 283 77 283
## 284 26 284
## 285 10770 285
## 286 28 286
## 287 4310 287
## 288 30 288
## 289 15 289
## 290 16669 290
## 291 794 291
## 292 489 292
## 293 34 293
## 294 2545 294
## 295 1289 295
## 296 21 296
## 297 36 297
## 298 148 298
## 299 144 299
## 300 81 300
## 301 55 301
## 302 6 302
## 303 265 303
## 304 245 304
## 305 40 305
## 306 526 306
## 307 69 307
## 308 61 308
## 309 40 309
## 310 59 310
## 311 15 311
## 312 201 312
## 313 61 313
## 314 283 314
## 315 38 315
## 316 6 316
## 317 19 317
## 318 23 318
## 319 779 319
## 320 191 320
## 321 47 321
## 322 790 322
## 323 45 323
## 324 12 324
## 325 307 325
## 326 216 326
## 327 292 327
## 328 37 328
## 329 1166 329
## 330 154 330
## 331 13 331
## 332 19 332
## 333 4 333
## 334 17 334
## 335 72 335
## 336 2823 336
## 337 162 337
## 338 79 338
## 339 67 339
## 340 69 340
## 341 6048 341
## 342 36 342
## 343 120 343
## 344 20 344
## 345 3 345
## 346 37948 346
## 347 267 347
## 348 3 348
## 349 3 349
## 350 13253 350
## 351 7776 351
## 352 71 352
## 353 15 353
## 354 98 354
## 355 208 355
## 356 109 356
## 357 9 357
## 358 204 358
## 359 3 359
## 360 1879 360
## 361 60 361
## 362 1226 362
## 363 130 363
## 364 3 364
## 365 15 365
## 366 380 366
## 367 33 367
## 368 62 368
## 369 99 369
## 370 10596 370
## 371 1536 371
## 372 88 372
## 373 337 373
## 374 130 374
## 375 159 375
## 376 1953 376
## 377 3 377
## 378 163 378
## 379 13 379
## 380 6 380
## 381 10 381
## 382 65 382
## 383 42 383
## 384 3 384
## 385 19 385
## 386 5056 386
## 387 86 387
## 388 373 388
## 389 33 389
## 390 4177 390
## 391 3 391
## 392 1330 392
## 393 4060 393
## 394 12 394
## 395 833 395
## 396 12 396
## 397 21 397
## 398 3 398
## 399 93 399
## 400 260 400
## 401 762 401
## 402 290 402
## 403 1933 403
## 404 1229 404
## 405 28 405
## 406 22 406
## 407 42 407
## 408 10937 408
## 409 183 409
## 410 199 410
## 411 6 411
## 412 108 412
## 413 9 413
## 414 36 414
## 415 34 415
## 416 3 416
## 417 78 417
## 418 17 418
## 419 351 419
## 420 56 420
## 421 73 421
## 422 34 422
## 423 12 423
## 424 152 424
## 425 1301 425
## 426 6 426
## 427 24 427
## 428 1412 428
## 429 16 429
## 430 714 430
## 431 57 431
## 432 30 432
## 433 16 433
## 434 24 434
## 435 190 435
## 436 7 436
## 437 574 437
## 438 202 438
## 439 66 439
## 440 32 440
## 441 8174 441
## 442 17928 442
## 443 35 443
## 444 19274 444
## 445 277 445
## 446 3 446
## 447 299 447
## 448 3 448
## 449 3 449
## 450 10 450
## 451 3 451
## 452 87 452
## 453 831 453
## 454 86 454
## 455 71 455
## 456 268 456
## 457 2257 457
## 458 76 458
## 459 3 459
## 460 27 460
## 461 3 461
## 462 9 462
## 463 24 463
## 464 3838 464
## 465 12 465
## 466 4306 466
## 467 3 467
## 468 158 468
## 469 18 469
## 470 1082 470
## 471 10893 471
## 472 4153 472
## 473 12544 473
## 474 2342 474
## 475 4074 475
## 476 21 476
## 477 3 477
## 478 12 478
## 479 1159 479
## 480 42 480
## 481 57 481
## 482 3 482
## 483 205 483
## 484 3 484
## 485 32 485
## 486 69 486
## 487 15 487
## 488 1079 488
## 489 6 489
## 490 6 490
## 491 144 491
## 492 25 492
## 493 3 493
## 494 3269 494
## 495 16 495
## 496 353 496
## 497 191 497
## 498 23 498
## 499 24 499
## 500 15 500
Ratio <- as.data.frame((as.data.frame(RatioStore))) # This is a dataframe of the ratio old/young for each region
Ratio$Region <- 1:nrow(Ratio)
PopRatio <- merge(Ratio, PopTot, by="Region")
names(PopRatio) [2] <- "Ratio"
names(PopRatio)[3] <- "PopRegion"
#To delete rows where ratio is inf or ratio < 1000
PopRatio1<- PopRatio[PopRatio$Ratio <= 20, ]
p17 <- ggplot(PopRatio1, aes(x=PopRegion, y=Ratio)) +
geom_point()+
geom_smooth(method=lm, color="red")+
labs(title="Ratio of old to young vs Population in Region",
x="Population in Region")+
theme_classic()
p17
# Question 6.2
#The scatter plot in 6.1 indicates the following trends:
#1 When the population is low, the ratio of old to young is high. This suggests that there are more old people than young people when the population of a region is low.
#2 When the population is high, the ratio is low. This shows that in the more populous regions,
# there are more young people than old people.
#3 This is consistent with a trend that older people would move to a small country town where the #cost of housing is cheaper, and they will have a greater spending power with their limited funds #and many of them have likely retired.
#4 The younger couples and people will live in a more populous region for cheaper housing, jobs, #schooling, health facility and other conveniences to support their life styles.
#5 There are many more in between the two extremes, and the trend will depend on a combination of #factors. e.g stages of their live, wealth leveks, empty nesters, availability of jobs etc.
#Question 7.1 Ratio of female to male vs population scatter plot
options(digits=5)
# To calculate the population female and male, ratio female/male for each region
PopGenS <- 0 # Create empty list to store total population for each region
RatioGenS <- 0
p <- 1
a <- 56
Region <- 1
format(round(Region, 0), nsmall = 0)
## [1] "1"
PopGenS <- list() # Create empty list to store total population for each region
RatioGenS <- list() # Create empty list to store ratio for each region
while (p < 56000) {
q <- p + 111
b <- a +55
PopSort <- A1[c(p:q), ] #Data from row i to j
#RLG <-PopSort[order(PopSort$gender),]
RLG <-PopSort[order(PopSort$gender),] #Sort in the order of gender for each region
PopF <- RLG [c(1:56), 4] # Population for female, 56 observations per region
PopM <- RLG [c(57:112), 4] # Population for male, 56 observations per region
PopFS <- sum(PopF)
PopMS <- sum(PopM)
PopTS <- PopFS+PopMS
PopGenS [[Region]] <- PopTS
RatioGenS [[Region]] <- PopFS/PopMS
#print(RatioGenS)
Region <- Region + 1
p <- p + 112
a <- a + 112
if (p == 56000) {
break
if (a == 56000) {
break
}
}
}
PopGenS <- unlist(PopGenS) #This contains the total Population for each of the regions
RatioGenS <- unlist(RatioGenS) #This contains the ratio for each of the regions
PopGenST <- as.data.frame((as.data.frame(PopGenS))) # This is a dataframe of the total population for each region
PopGenST$Region <- 1:nrow(PopGenST)
PopGenST
## PopGenS Region
## 1 33 1
## 2 425 2
## 3 100 3
## 4 1137 4
## 5 51 5
## 6 924 6
## 7 359 7
## 8 5821 8
## 9 4978 9
## 10 3 10
## 11 38 11
## 12 17 12
## 13 2049 13
## 14 3 14
## 15 1826 15
## 16 270 16
## 17 3 17
## 18 2643 18
## 19 178 19
## 20 12 20
## 21 314 21
## 22 6666 22
## 23 24 23
## 24 15 24
## 25 3439 25
## 26 3168 26
## 27 18 27
## 28 9837 28
## 29 10 29
## 30 41 30
## 31 9 31
## 32 60 32
## 33 54 33
## 34 89 34
## 35 8 35
## 36 27 36
## 37 161 37
## 38 52 38
## 39 6 39
## 40 138 40
## 41 58 41
## 42 3391 42
## 43 57 43
## 44 9116 44
## 45 9046 45
## 46 29 46
## 47 21 47
## 48 60 48
## 49 3104 49
## 50 36 50
## 51 3 51
## 52 112 52
## 53 61 53
## 54 38 54
## 55 16 55
## 56 9802 56
## 57 11357 57
## 58 40 58
## 59 104 59
## 60 123 60
## 61 6 61
## 62 3 62
## 63 159 63
## 64 103 64
## 65 211 65
## 66 3 66
## 67 21 67
## 68 2615 68
## 69 6 69
## 70 3555 70
## 71 3 71
## 72 75 72
## 73 52 73
## 74 5 74
## 75 166 75
## 76 3735 76
## 77 1188 77
## 78 120 78
## 79 213 79
## 80 16705 80
## 81 39 81
## 82 3 82
## 83 3 83
## 84 29 84
## 85 190 85
## 86 23 86
## 87 3 87
## 88 3772 88
## 89 1282 89
## 90 7 90
## 91 194 91
## 92 6803 92
## 93 5402 93
## 94 15972 94
## 95 7 95
## 96 239 96
## 97 3363 97
## 98 198 98
## 99 9729 99
## 100 5016 100
## 101 1011 101
## 102 1790 102
## 103 7287 103
## 104 132 104
## 105 54 105
## 106 48 106
## 107 494 107
## 108 2306 108
## 109 9 109
## 110 12 110
## 111 17442 111
## 112 14 112
## 113 58 113
## 114 427 114
## 115 18 115
## 116 148 116
## 117 6461 117
## 118 211 118
## 119 1082 119
## 120 45 120
## 121 117 121
## 122 18540 122
## 123 12 123
## 124 171 124
## 125 2563 125
## 126 219 126
## 127 16 127
## 128 284 128
## 129 50 129
## 130 3981 130
## 131 1367 131
## 132 568 132
## 133 1752 133
## 134 18 134
## 135 847 135
## 136 72 136
## 137 7866 137
## 138 239 138
## 139 17809 139
## 140 6 140
## 141 15608 141
## 142 1434 142
## 143 88 143
## 144 9 144
## 145 19340 145
## 146 1055 146
## 147 862 147
## 148 12350 148
## 149 132 149
## 150 89 150
## 151 79 151
## 152 14 152
## 153 15 153
## 154 9 154
## 155 12 155
## 156 547 156
## 157 69 157
## 158 32 158
## 159 65 159
## 160 3 160
## 161 210 161
## 162 5 162
## 163 54 163
## 164 3 164
## 165 130 165
## 166 37 166
## 167 3 167
## 168 17140 168
## 169 1554 169
## 170 7 170
## 171 147 171
## 172 475 172
## 173 468 173
## 174 4 174
## 175 6 175
## 176 46 176
## 177 76 177
## 178 286 178
## 179 67 179
## 180 6281 180
## 181 20939 181
## 182 61 182
## 183 6550 183
## 184 5 184
## 185 217 185
## 186 19180 186
## 187 11218 187
## 188 616 188
## 189 44 189
## 190 1288 190
## 191 22 191
## 192 13730 192
## 193 14546 193
## 194 6 194
## 195 7 195
## 196 9 196
## 197 663 197
## 198 30 198
## 199 6 199
## 200 1908 200
## 201 6 201
## 202 2575 202
## 203 25 203
## 204 183 204
## 205 2661 205
## 206 84 206
## 207 18 207
## 208 229 208
## 209 304 209
## 210 128 210
## 211 35 211
## 212 41 212
## 213 3 213
## 214 808 214
## 215 24 215
## 216 2386 216
## 217 12 217
## 218 9818 218
## 219 21 219
## 220 2289 220
## 221 41 221
## 222 4978 222
## 223 5033 223
## 224 18 224
## 225 2391 225
## 226 18 226
## 227 1207 227
## 228 211 228
## 229 12 229
## 230 52 230
## 231 9 231
## 232 249 232
## 233 22 233
## 234 18 234
## 235 176 235
## 236 127 236
## 237 12 237
## 238 102 238
## 239 88 239
## 240 1094 240
## 241 2548 241
## 242 310 242
## 243 22 243
## 244 942 244
## 245 50 245
## 246 74 246
## 247 73 247
## 248 698 248
## 249 28 249
## 250 323 250
## 251 988 251
## 252 376 252
## 253 538 253
## 254 1799 254
## 255 135 255
## 256 7282 256
## 257 70 257
## 258 3504 258
## 259 21 259
## 260 8 260
## 261 528 261
## 262 28 262
## 263 63 263
## 264 22 264
## 265 289 265
## 266 1148 266
## 267 36 267
## 268 51 268
## 269 74 269
## 270 750 270
## 271 12 271
## 272 440 272
## 273 81 273
## 274 2960 274
## 275 22979 275
## 276 1213 276
## 277 10 277
## 278 23 278
## 279 15 279
## 280 12 280
## 281 129 281
## 282 6 282
## 283 77 283
## 284 26 284
## 285 10770 285
## 286 28 286
## 287 4310 287
## 288 30 288
## 289 15 289
## 290 16669 290
## 291 794 291
## 292 489 292
## 293 34 293
## 294 2545 294
## 295 1289 295
## 296 21 296
## 297 36 297
## 298 148 298
## 299 144 299
## 300 81 300
## 301 55 301
## 302 6 302
## 303 265 303
## 304 245 304
## 305 40 305
## 306 526 306
## 307 69 307
## 308 61 308
## 309 40 309
## 310 59 310
## 311 15 311
## 312 201 312
## 313 61 313
## 314 283 314
## 315 38 315
## 316 6 316
## 317 19 317
## 318 23 318
## 319 779 319
## 320 191 320
## 321 47 321
## 322 790 322
## 323 45 323
## 324 12 324
## 325 307 325
## 326 216 326
## 327 292 327
## 328 37 328
## 329 1166 329
## 330 154 330
## 331 13 331
## 332 19 332
## 333 4 333
## 334 17 334
## 335 72 335
## 336 2823 336
## 337 162 337
## 338 79 338
## 339 67 339
## 340 69 340
## 341 6048 341
## 342 36 342
## 343 120 343
## 344 20 344
## 345 3 345
## 346 37948 346
## 347 267 347
## 348 3 348
## 349 3 349
## 350 13253 350
## 351 7776 351
## 352 71 352
## 353 15 353
## 354 98 354
## 355 208 355
## 356 109 356
## 357 9 357
## 358 204 358
## 359 3 359
## 360 1879 360
## 361 60 361
## 362 1226 362
## 363 130 363
## 364 3 364
## 365 15 365
## 366 380 366
## 367 33 367
## 368 62 368
## 369 99 369
## 370 10596 370
## 371 1536 371
## 372 88 372
## 373 337 373
## 374 130 374
## 375 159 375
## 376 1953 376
## 377 3 377
## 378 163 378
## 379 13 379
## 380 6 380
## 381 10 381
## 382 65 382
## 383 42 383
## 384 3 384
## 385 19 385
## 386 5056 386
## 387 86 387
## 388 373 388
## 389 33 389
## 390 4177 390
## 391 3 391
## 392 1330 392
## 393 4060 393
## 394 12 394
## 395 833 395
## 396 12 396
## 397 21 397
## 398 3 398
## 399 93 399
## 400 260 400
## 401 762 401
## 402 290 402
## 403 1933 403
## 404 1229 404
## 405 28 405
## 406 22 406
## 407 42 407
## 408 10937 408
## 409 183 409
## 410 199 410
## 411 6 411
## 412 108 412
## 413 9 413
## 414 36 414
## 415 34 415
## 416 3 416
## 417 78 417
## 418 17 418
## 419 351 419
## 420 56 420
## 421 73 421
## 422 34 422
## 423 12 423
## 424 152 424
## 425 1301 425
## 426 6 426
## 427 24 427
## 428 1412 428
## 429 16 429
## 430 714 430
## 431 57 431
## 432 30 432
## 433 16 433
## 434 24 434
## 435 190 435
## 436 7 436
## 437 574 437
## 438 202 438
## 439 66 439
## 440 32 440
## 441 8174 441
## 442 17928 442
## 443 35 443
## 444 19274 444
## 445 277 445
## 446 3 446
## 447 299 447
## 448 3 448
## 449 3 449
## 450 10 450
## 451 3 451
## 452 87 452
## 453 831 453
## 454 86 454
## 455 71 455
## 456 268 456
## 457 2257 457
## 458 76 458
## 459 3 459
## 460 27 460
## 461 3 461
## 462 9 462
## 463 24 463
## 464 3838 464
## 465 12 465
## 466 4306 466
## 467 3 467
## 468 158 468
## 469 18 469
## 470 1082 470
## 471 10893 471
## 472 4153 472
## 473 12544 473
## 474 2342 474
## 475 4074 475
## 476 21 476
## 477 3 477
## 478 12 478
## 479 1159 479
## 480 42 480
## 481 57 481
## 482 3 482
## 483 205 483
## 484 3 484
## 485 32 485
## 486 69 486
## 487 15 487
## 488 1079 488
## 489 6 489
## 490 6 490
## 491 144 491
## 492 25 492
## 493 3 493
## 494 3269 494
## 495 16 495
## 496 353 496
## 497 191 497
## 498 23 498
## 499 24 499
## 500 15 500
RatioGenST <- as.data.frame((as.data.frame(RatioGenS))) # This is a dataframe of the ratio (F/M) population for each region
RatioGenST$Region <- 1:nrow(RatioGenST)
RatioGenST
## RatioGenS Region
## 1 1.53846 1
## 2 0.95853 2
## 3 0.66667 3
## 4 0.89816 4
## 5 0.70000 5
## 6 0.94118 6
## 7 0.89947 7
## 8 0.99145 8
## 9 1.00564 9
## 10 0.00000 10
## 11 0.90000 11
## 12 1.12500 12
## 13 0.99902 13
## 14 Inf 14
## 15 1.08210 15
## 16 0.83673 16
## 17 Inf 17
## 18 1.01142 18
## 19 1.57971 19
## 20 3.00000 20
## 21 1.25899 21
## 22 0.98157 22
## 23 0.33333 23
## 24 1.50000 24
## 25 0.96402 25
## 26 1.00633 26
## 27 1.00000 27
## 28 1.01042 28
## 29 0.66667 29
## 30 1.56250 30
## 31 2.00000 31
## 32 1.30769 32
## 33 0.80000 33
## 34 0.89362 34
## 35 1.00000 35
## 36 1.25000 36
## 37 1.30000 37
## 38 1.08000 38
## 39 Inf 39
## 40 1.46429 40
## 41 0.75758 41
## 42 1.01845 42
## 43 0.78125 43
## 44 0.99300 44
## 45 1.00399 45
## 46 1.41667 46
## 47 1.33333 47
## 48 0.93548 48
## 49 1.05020 49
## 50 1.40000 50
## 51 Inf 51
## 52 0.83607 52
## 53 0.56410 53
## 54 0.52000 54
## 55 0.77778 55
## 56 1.08953 56
## 57 1.05817 57
## 58 1.50000 58
## 59 0.52941 59
## 60 0.98387 60
## 61 1.00000 61
## 62 0.00000 62
## 63 1.06494 63
## 64 1.19149 64
## 65 0.86726 65
## 66 Inf 66
## 67 0.75000 67
## 68 1.00383 68
## 69 Inf 69
## 70 1.07531 70
## 71 0.00000 71
## 72 0.33929 72
## 73 0.85714 73
## 74 Inf 74
## 75 1.21333 75
## 76 0.91342 76
## 77 1.01015 77
## 78 0.87500 78
## 79 1.19588 79
## 80 1.17004 80
## 81 0.85714 81
## 82 Inf 82
## 83 Inf 83
## 84 0.52632 84
## 85 1.46753 85
## 86 0.43750 86
## 87 0.00000 87
## 88 1.05223 88
## 89 1.22184 89
## 90 0.00000 90
## 91 0.97959 91
## 92 0.98976 92
## 93 1.07053 93
## 94 1.02819 94
## 95 1.33333 95
## 96 0.79699 96
## 97 1.06699 97
## 98 0.63636 98
## 99 0.89575 99
## 100 1.05405 100
## 101 1.01394 101
## 102 1.08382 102
## 103 1.03434 103
## 104 1.20000 104
## 105 0.92857 105
## 106 1.66667 106
## 107 0.83643 107
## 108 0.96758 108
## 109 0.50000 109
## 110 1.00000 110
## 111 0.99748 111
## 112 3.66667 112
## 113 0.93333 113
## 114 0.99533 114
## 115 1.00000 115
## 116 0.70115 116
## 117 1.05764 117
## 118 0.91818 118
## 119 0.99631 119
## 120 0.50000 120
## 121 0.85714 121
## 122 1.04885 122
## 123 3.00000 123
## 124 0.64423 124
## 125 1.06860 125
## 126 0.80992 126
## 127 4.33333 127
## 128 1.02857 128
## 129 1.17391 129
## 130 1.01264 130
## 131 0.97258 131
## 132 0.87459 132
## 133 1.10072 133
## 134 1.00000 134
## 135 0.98826 135
## 136 1.25000 136
## 137 1.06837 137
## 138 0.89683 138
## 139 0.98341 139
## 140 1.00000 140
## 141 1.04991 141
## 142 0.90691 142
## 143 1.04651 143
## 144 2.00000 144
## 145 0.99670 145
## 146 0.95733 146
## 147 0.91131 147
## 148 1.02891 148
## 149 1.09524 149
## 150 0.58929 150
## 151 0.75556 151
## 152 1.33333 152
## 153 1.50000 153
## 154 0.50000 154
## 155 0.00000 155
## 156 1.01103 156
## 157 0.53333 157
## 158 0.77778 158
## 159 0.71053 159
## 160 Inf 160
## 161 1.00000 161
## 162 Inf 162
## 163 1.07692 163
## 164 Inf 164
## 165 0.88406 165
## 166 0.68182 166
## 167 Inf 167
## 168 0.92844 168
## 169 1.00775 169
## 170 1.33333 170
## 171 1.26154 171
## 172 1.12054 172
## 173 0.91803 173
## 174 0.00000 174
## 175 Inf 175
## 176 0.91667 176
## 177 0.72727 177
## 178 1.04286 178
## 179 0.91429 179
## 180 1.03929 180
## 181 0.95345 181
## 182 1.17857 182
## 183 1.04880 183
## 184 0.00000 184
## 185 1.33333 185
## 186 1.01980 186
## 187 1.03778 187
## 188 0.98071 188
## 189 1.44444 189
## 190 0.91382 190
## 191 0.69231 191
## 192 0.97753 192
## 193 1.03412 193
## 194 1.00000 194
## 195 1.33333 195
## 196 0.50000 196
## 197 0.96736 197
## 198 1.00000 198
## 199 1.00000 199
## 200 1.02763 200
## 201 1.00000 201
## 202 1.02119 202
## 203 0.92308 203
## 204 0.94681 204
## 205 0.99925 205
## 206 0.64706 206
## 207 0.38462 207
## 208 0.95726 208
## 209 1.11111 209
## 210 1.28571 210
## 211 0.52174 211
## 212 1.73333 212
## 213 0.00000 213
## 214 1.00496 214
## 215 0.60000 215
## 216 0.95574 216
## 217 3.00000 217
## 218 1.01436 218
## 219 0.90909 219
## 220 1.00966 220
## 221 2.15385 221
## 222 0.97932 222
## 223 1.02535 223
## 224 5.00000 224
## 225 1.00419 225
## 226 0.50000 226
## 227 0.85407 227
## 228 1.26882 228
## 229 0.33333 229
## 230 1.47619 230
## 231 0.00000 231
## 232 0.96063 232
## 233 0.83333 233
## 234 5.00000 234
## 235 0.66038 235
## 236 1.04839 236
## 237 1.00000 237
## 238 1.08163 238
## 239 0.44262 239
## 240 1.12427 240
## 241 1.09539 241
## 242 0.77143 242
## 243 2.66667 243
## 244 1.04338 244
## 245 1.50000 245
## 246 1.00000 246
## 247 1.80769 247
## 248 1.06509 248
## 249 0.75000 249
## 250 1.21233 250
## 251 1.09322 251
## 252 1.02151 252
## 253 0.97794 253
## 254 1.03507 254
## 255 1.17742 255
## 256 0.96175 256
## 257 1.00000 257
## 258 1.05152 258
## 259 1.33333 259
## 260 0.60000 260
## 261 1.03077 261
## 262 0.75000 262
## 263 0.90909 263
## 264 0.46667 264
## 265 0.97945 265
## 266 1.01404 266
## 267 1.40000 267
## 268 1.21739 268
## 269 0.64444 269
## 270 1.23881 270
## 271 0.33333 271
## 272 0.75299 272
## 273 0.88372 273
## 274 1.05841 274
## 275 1.04004 275
## 276 0.98203 276
## 277 0.66667 277
## 278 2.83333 278
## 279 0.25000 279
## 280 0.33333 280
## 281 0.98462 281
## 282 Inf 282
## 283 0.75000 283
## 284 1.60000 284
## 285 1.04558 285
## 286 0.86667 286
## 287 0.98252 287
## 288 1.50000 288
## 289 1.50000 289
## 290 0.99366 290
## 291 0.91787 291
## 292 1.10776 292
## 293 1.00000 293
## 294 1.01824 294
## 295 0.98920 295
## 296 2.50000 296
## 297 0.80000 297
## 298 1.00000 298
## 299 1.05714 299
## 300 0.65306 300
## 301 1.20000 301
## 302 Inf 302
## 303 1.05426 303
## 304 1.09402 304
## 305 0.81818 305
## 306 1.16461 306
## 307 1.02941 307
## 308 2.05000 308
## 309 1.00000 309
## 310 1.03448 310
## 311 0.66667 311
## 312 1.01000 312
## 313 2.58824 313
## 314 0.99296 314
## 315 2.16667 315
## 316 1.00000 316
## 317 5.33333 317
## 318 2.28571 318
## 319 0.88620 319
## 320 0.94898 320
## 321 0.42424 321
## 322 1.06806 322
## 323 2.46154 323
## 324 1.00000 324
## 325 1.22464 325
## 326 0.91150 326
## 327 0.95973 327
## 328 0.85000 328
## 329 1.06007 329
## 330 0.79070 330
## 331 3.33333 331
## 332 0.18750 332
## 333 0.00000 333
## 334 0.21429 334
## 335 0.63636 335
## 336 1.02948 336
## 337 0.88372 337
## 338 1.39394 338
## 339 0.86111 339
## 340 3.60000 340
## 341 0.99407 341
## 342 1.00000 342
## 343 1.03390 343
## 344 0.17647 344
## 345 Inf 345
## 346 1.03529 346
## 347 1.22500 347
## 348 0.00000 348
## 349 Inf 349
## 350 0.99144 350
## 351 0.98671 351
## 352 0.97222 352
## 353 0.87500 353
## 354 0.71930 354
## 355 0.89091 355
## 356 0.91228 356
## 357 0.50000 357
## 358 1.01980 358
## 359 0.00000 359
## 360 0.98836 360
## 361 0.71429 361
## 362 1.03317 362
## 363 1.06349 363
## 364 0.00000 364
## 365 0.25000 365
## 366 0.75115 366
## 367 0.73684 367
## 368 0.82353 368
## 369 0.86792 369
## 370 1.03378 370
## 371 1.21965 371
## 372 0.87234 372
## 373 0.94798 373
## 374 0.83099 374
## 375 1.09211 375
## 376 0.96875 376
## 377 0.00000 377
## 378 0.89535 378
## 379 3.33333 379
## 380 1.00000 380
## 381 1.50000 381
## 382 1.40741 382
## 383 0.90909 383
## 384 Inf 384
## 385 2.16667 385
## 386 1.02889 386
## 387 1.38889 387
## 388 1.00538 388
## 389 1.06250 389
## 390 1.04254 390
## 391 Inf 391
## 392 1.01821 392
## 393 1.01189 393
## 394 1.00000 394
## 395 0.88462 395
## 396 1.00000 396
## 397 2.50000 397
## 398 Inf 398
## 399 1.58333 399
## 400 0.96970 400
## 401 0.83614 401
## 402 1.23077 402
## 403 1.06738 403
## 404 1.07251 404
## 405 0.86667 405
## 406 0.69231 406
## 407 0.68000 407
## 408 1.00348 408
## 409 1.31646 409
## 410 0.85981 410
## 411 0.00000 411
## 412 1.76923 412
## 413 Inf 413
## 414 2.00000 414
## 415 1.00000 415
## 416 Inf 416
## 417 1.22857 417
## 418 3.25000 418
## 419 1.23567 419
## 420 0.93103 420
## 421 1.21212 421
## 422 0.78947 422
## 423 1.00000 423
## 424 0.92405 424
## 425 1.17195 425
## 426 1.00000 426
## 427 0.33333 427
## 428 0.92633 428
## 429 1.28571 429
## 430 1.12500 430
## 431 0.58333 431
## 432 0.42857 432
## 433 0.23077 433
## 434 0.60000 434
## 435 0.97917 435
## 436 0.00000 436
## 437 1.05000 437
## 438 1.06122 438
## 439 1.53846 439
## 440 2.55556 440
## 441 1.15332 441
## 442 1.03727 442
## 443 0.84211 443
## 444 0.86927 444
## 445 0.78710 445
## 446 0.00000 446
## 447 0.94156 447
## 448 0.00000 448
## 449 0.00000 449
## 450 0.00000 450
## 451 0.00000 451
## 452 0.89130 452
## 453 1.00725 453
## 454 1.04762 454
## 455 0.86842 455
## 456 1.25210 456
## 457 1.00266 457
## 458 1.05405 458
## 459 Inf 459
## 460 0.68750 460
## 461 0.00000 461
## 462 0.00000 462
## 463 1.66667 463
## 464 0.96619 464
## 465 3.00000 465
## 466 1.07218 466
## 467 Inf 467
## 468 0.62887 468
## 469 0.50000 469
## 470 0.90493 470
## 471 1.03379 471
## 472 1.03379 472
## 473 1.01445 473
## 474 1.04720 474
## 475 0.96622 475
## 476 0.40000 476
## 477 Inf 477
## 478 1.00000 478
## 479 0.98799 479
## 480 0.50000 480
## 481 2.16667 481
## 482 0.00000 482
## 483 1.32955 483
## 484 0.00000 484
## 485 1.66667 485
## 486 0.56818 486
## 487 0.66667 487
## 488 1.02820 488
## 489 1.00000 489
## 490 0.00000 490
## 491 0.75610 491
## 492 0.56250 492
## 493 0.00000 493
## 494 0.88632 494
## 495 0.77778 495
## 496 1.01714 496
## 497 0.91000 497
## 498 0.64286 498
## 499 3.00000 499
## 500 0.66667 500
PopRatioGen <- merge(RatioGenST, PopGenST, by="Region")
names(PopRatioGen) [2] <- "Ratio_FM"
names(PopRatioGen)[3] <- "PopRegion"
#To delete rows where ratio is inf or ratio < 1000
PopRatioGen<- PopRatioGen[PopRatioGen$Ratio_FM <= 1000, ]
max(PopRatioGen$Ratio_FM) #max ratio is 5.33
## [1] 5.3333
min(PopRatioGen$Ratio_FM) #min ratio is 0
## [1] 0
p17 <- ggplot(PopRatioGen, aes(x=PopRegion, y=Ratio_FM)) +
geom_point()+
geom_smooth(method=lm, color="red")+
labs(title="Ratio of female to male vs Population in Region",
x="Population in Region")+
theme_classic()
p17
# Question 7.2
#The scatter plot in 7.1 indicates the following trends:
#1. In regions where the population is low, the ratio of female/male ranges from very low to high of 5.33.
#2 One possible reason for this trend is related to Question 6.2, where retired people tended to move to small country towns. Female generally live longer than male, and more males have died in these low population regions.
#3 Another possible reason for the high ratio could be that the male of the family who are still fit and healthy to work, will go to more populous regions to find work and send money home. This way the female members could stay put in a region where cost of living is lower, and rely on the income earned by the male members of the family. e.g. This is quite typical in countries like china, where the male members will go from rural villages to the big city to earn an income to support his family in the poor villages.
#4 The red regression line shows that the ratio trends around 1, representing the more common balance between female and male in most regions.
#Question 8.1 Females 18 to 21 have been chosen as the primary customers for the hypothetical product.
#Question 8.2
F18_21 <- filter(A1, A1$gender == "F") #Select females only from the dataset
F18_21A <- filter(F18_21, age > 17) #Select females over 17
F18_21B <- filter(F18_21A, age < 22) #Select females between 18 and 21
F18_21C <- aggregate(F18_21B$population, by=list(F18_21B$region), FUN=sum)
F18_21D <- F18_21C[order(F18_21C$x, decreasing=TRUE), ] # The two regions with the largest population of females between 18 and 21 are SSC22015 (1113) and SSC20492 (2566)
# Females between 18 to 21 are young, impulsive and very active participants in the social media. By targeting the two regions with the highest number of young female in this age group, the multiplying effect will be the greatest when the new product is launched, and increasing the chance of achieving the greatest sales volume in the regions, as well as influencing the sales in other regions through the social media.
# Question 9.1 when n=1
# Selecting n = 1 from the region with most people
s <- 1 #Start from sample 1
n1Store <- list() # Create empty list to store weighted means for each age
while (s < 101) {
n1 <- RL[sample(nrow(RL), 1), ] #Select n=1
Age <- n1[c(1), 2] #Range of ages 0 to 55
Pop <- n1[c(1), 4] # Population for SSC22015, region with most people
WMn1 <- weighted.mean(Age, Pop) # weighted means for SSC22015
WMn1
# n1Store contains the weighted means for SSC22015
n1Store[[s]] <- WMn1
s <- s + 1 #choose next sample
if (s == 101){
break
}
}
# Question 9.1 when n=10
# Selecting n = 10 from the region with most people
s <- 1 #Start from sample 1
n10Store <- list() # Create empty list to store weighted means for each age
while (s < 101) {
n10 <- RL[sample(nrow(RL), 10), ] #Select n=10
Age <- n10[c(1:10), 2] #Range of ages 0 to 55
Pop <- n10[c(1:10), 4] # Population for SSC22015, region with most people
WMn10 <- weighted.mean(Age, Pop) # weighted means for SSC22015
WMn10
# n10Store contains the weighted means for SSC22015
n10Store[[s]] <- WMn10
s <- s + 1 #choose next sample
if (s == 101){
break
}
}
# Question 9.1 when n=100
# Selecting n = 100 from the region with most people
s <- 1 #Start from sample 1
n100Store <- list() # Create empty list to store weighted means for each age
while (s < 101) {
n100 <- RL[sample(nrow(RL), 100), ] #Select n=100
Age <- n100[c(1:100), 2] #Range of ages 0 to 55
Pop <- n100[c(1:100), 4] # Population for SSC22015, region with most people
WMn100 <- weighted.mean(Age, Pop) # weighted means for SSC22015
WMn100
# n100Store contains the weighted means for SSC22015
n100Store[[s]] <- WMn100
s <- s + 1 #choose next sample
if (s == 101){
break
}
}
# Question 9.1 when n=1000
# Selecting n = 1000 from the region with most people
RL1000<- RL[rep(seq_len(nrow(RL)), 9), ] #Repeat the original dataset nine times to get more than 1000 rows.
s <- 1 #Start from sample 1
n1000Store <- list() # Create empty list to store weighted means for each age
while (s < 101) {
n1000 <- RL1000[sample(nrow(RL1000), 1000), ] #Select n=1000
Age <- n1000[c(1:1000), 2] #Range of ages 0 to 55
Pop <- n1000[c(1:1000), 4] # Population for SSC22015, region with most people
WMn1000 <- weighted.mean(Age, Pop) # weighted means for SSC22015
WMn1000
# n100Store contains the weighted means for SSC22015
n1000Store[[s]] <- WMn1000
s <- s + 1 #choose next sample
if (s == 101){
break
}
}
n1Store <- unlist(n1Store)
n10Store <- unlist(n10Store)
n100Store <- unlist(n100Store)
n1000Store <- unlist(n1000Store)
n10Store # This contains the weighted mean for the largest regions SSC22015 for the 100 samples
## [1] 19.969 23.665 27.942 15.497 28.882 27.727 28.208 25.390 26.035 24.693
## [11] 30.294 13.376 20.021 24.298 23.714 31.887 22.664 39.681 24.177 28.669
## [21] 31.147 35.004 27.862 20.570 28.257 31.050 32.942 26.303 25.315 26.490
## [31] 23.051 19.684 28.352 21.790 20.599 24.532 33.870 31.387 32.802 22.112
## [41] 31.630 25.959 24.560 29.605 21.485 26.365 26.011 26.624 23.708 24.802
## [51] 29.727 24.053 25.440 23.942 30.122 32.286 23.693 21.204 22.324 24.469
## [61] 28.811 28.394 24.672 14.967 31.445 24.485 18.149 23.903 25.753 25.625
## [71] 22.884 28.033 25.768 24.748 34.513 27.430 29.783 30.153 28.529 27.464
## [81] 28.881 31.217 28.333 27.442 17.651 36.671 26.827 26.175 22.304 20.357
## [91] 28.283 22.355 28.916 24.177 23.312 27.447 19.744 28.054 29.405 21.543
n1Sum <- as.data.frame((as.data.frame(n1Store))) # This is a dataframe of the weighted mean in a column of 100 rows
n10Sum <- as.data.frame((as.data.frame(n10Store))) # This is a dataframe of the weighted mean in a column of 100 rows
n100Sum <- as.data.frame((as.data.frame(n100Store))) # This is a dataframe of the weighted mean in a column of 100 rows
n1000Sum <- as.data.frame((as.data.frame(n1000Store))) # This is a dataframe of the weighted mean in a column of 100 rows
#For n=1
# set sample size
#Compare the results from $n_{sum}$ = 1, 10 and 100.
n_sample <- 100
# set number of times the samples will be summed
n_sum <- 100
n1S <- n1Sum$n1Store
mean1 <- mean(n1S)
sd1 <- sd(n1S)
binwidth <- 0.005 * sqrt(n_sample) * 4 * (n_sum ^ 0.4) # rule of thumb for aesthetics
title_txt <- sprintf('CLT test after summing %d samples of %d observations (n=1)', n_sum, n_sample)
p18 <- ggplot()
p18 <- p18 + geom_histogram(aes(x = n1S), binwidth = binwidth, colour = "white",
fill = "cornflowerblue", size = 0.1)
p18 <- p18 + stat_function(fun = function(x) dnorm(x, mean = mean1, sd = sd1) * n_sample * binwidth,
color = "darkred", size = 1)
p18 <- p18 + labs(x = 'x', y = 'f(x)', title = title_txt)
#ggsave(here("outputs", "clt_example1.png"), width = 6, height = 5)
p18
#For n=10
# set sample size
#Compare the results from $n_{sum}$ = 1, 10, 100 and 1000
n_sample <- 100
# set number of times the samples will be summed
n_sum <- 100
n10S <- n10Sum$n10Store #(TESTING ONLY)
mean10 <- mean(n10S)
sd10 <- sd(n10S)
binwidth <- 0.005 * sqrt(n_sample) * 4 * (n_sum ^ 0.4) # rule of thumb for aesthetics
title_txt <- sprintf('CLT test after summing %d samples of %d observations (n=10)', n_sum, n_sample)
p19 <- ggplot()
p19 <- p19 + geom_histogram(aes(x = n10S), binwidth = binwidth, colour = "white",
fill = "cornflowerblue", size = 0.1)
p19 <- p19 + stat_function(fun = function(x) dnorm(x, mean = mean10, sd = sd10) * n_sample * binwidth,
color = "darkred", size = 1)
p19 <- p19 + labs(x = 'x', y = 'f(x)', title = title_txt)
#ggsave(here("outputs", "clt_example1.png"), width = 6, height = 5)
p19
#For n=100
# set sample size
#Compare the results from $n_{sum}$ = 1, 10, 100 and 1000
n_sample <- 100
# set number of times the samples will be summed
n_sum <- 100
n100S <- n100Sum$n100Store
mean100 <- mean(n100S)
sd100 <- sd(n100S)
#binwidth <- 0.005 * sqrt(n_sample) * 4 * (n_sum ^ 0.4) # rule of thumb for aesthetics
binwidth <- 0.005 * sqrt(n_sample) * 1 * (n_sum ^ 0.4) # rule of thumb for aesthetics
title_txt <- sprintf('CLT test after summing %d samples of %d observations (n=100)', n_sum, n_sample)
p20 <- ggplot()
p20 <- p20 + geom_histogram(aes(x = n100S), binwidth = binwidth, colour = "white",
fill = "cornflowerblue", size = 0.1)
p20 <- p20 + stat_function(fun = function(x) dnorm(x, mean = mean100, sd = sd100) * n_sample * binwidth,
color = "darkred", size = 1)
p20 <- p20 + labs(x = 'x', y = 'f(x)', title = title_txt)
#ggsave(here("outputs", "clt_example1.png"), width = 6, height = 5)
p20
#For n=1000
# set sample size
#Compare the results from $n_{sum}$ = 1, 10, 100 and 1000
n_sample <- 100
# set number of times the samples will be summed
n_sum <- 100
n1000S <- n1000Sum$n1000Store
mean1000 <- mean(n1000S)
sd1000 <- sd(n1000S)
#binwidth <- 0.005 * sqrt(n_sample) * 4 * (n_sum ^ 0.4) # rule of thumb for aesthetics
binwidth <- 0.005 * sqrt(n_sample) * 0.5 * (n_sum ^ 0.1) # rule of thumb for aesthetics
title_txt <- sprintf('CLT test after summing %d samples of %d observations (n=1000)', n_sum, n_sample)
p21 <- ggplot()
p21 <- p21 + geom_histogram(aes(x = n1000S), binwidth = binwidth, colour = "white",
fill = "cornflowerblue", size = 0.1)
p21 <- p21 + stat_function(fun = function(x) dnorm(x, mean = mean1000, sd = sd1000) * n_sample * binwidth,
color = "darkred", size = 1)
p21 <- p21 + labs(x = 'x', y = 'f(x)', title = title_txt)
#ggsave(here("outputs", "clt_example1.png"), width = 6, height = 5)
p21