write(‘PATH=“\({RTOOLS40_HOME}\\usr\\bin;\){PATH}”’, file = “~/.Renviron”, append = TRUE)

## install.packages(“jsonlite”, type = “source”)

title: “Assessment 1 17122” output: html_document —

library(ggplot2)
library(dplyr)
library(magrittr)
library(data.table)
library(readxl) # Load the readxl package
library(tidyr)
library(stringr)
library(openxlsx)
library(readxl)
library("Hmisc")
library(lattice)
library(latticeExtra)
library(tidyverse)

A1a <- read_excel("pop_dataset.xlsx", sheet = 1)

A1 <- A1a[order(A1a$region), ] #sort original regions into numerical order

#Question 1

Question 1.1 Print out the dimensions of the dataframe

dim (A1)

Question 1.2 Print out the names and type of each of the data frame’s columns.

head(A1)

# 1.3 Print out the number of unique regions in the dataset (500 unique regions, each with 112 observations).

#To create a dataframe with region numbered from 1 to 500

df1 <-aggregate(A1$region, by=list(A1$region), FUN=length) #Aggregate regions into groups

# 1.4 What is the minimum age bin? # 0 year

A1
## # A tibble: 56,000 x 4
##    region     age gender population
##    <chr>    <dbl> <chr>       <dbl>
##  1 SSC20005     0 M               0
##  2 SSC20005     0 F               0
##  3 SSC20005     1 M               0
##  4 SSC20005     1 F               0
##  5 SSC20005     2 M               0
##  6 SSC20005     2 F               0
##  7 SSC20005     3 M               0
##  8 SSC20005     3 F               0
##  9 SSC20005     4 M               0
## 10 SSC20005     4 F               0
## # ... with 55,990 more rows
# 1.5 What is the maximum age bin? # 55 years

# 1.6 What is the bin size for the age field? # 1 year

#Question 2.1 Expected vale of age for whole data sample

# 2.1 Use the expected value for the age to find the mean age for the whole data sample. (Ans: 27.8)

pop_tot <- sum(A1$population) #Total population

pop_tot
## [1] 796015
df2 <-aggregate(A1[,4], by=list(A1$age), FUN=sum) #Population for each age for all regions

str(df2)
## 'data.frame':    56 obs. of  2 variables:
##  $ Group.1   : num  0 1 2 3 4 5 6 7 8 9 ...
##  $ population: num  13179 13810 13645 14189 13833 ...
prob=df2$population/pop_tot #Probability for each age

age=df2$Group.1 #Ages from 0 to 55

ExpV_age <-sum(prob * age) # Sum of prob x age

ExpV_age #2.1 Expected value for the age (mean age) for the whole population
## [1] 27.80027

Question 2.2 Standard Deviation for whole data sample

## 2.2 Provide the standard deviation for the whole data sample.  Ans=Sample Standard Deviation= 15.778,the same as the population Standard Deviation of 15.778 to 3dp.

agesq=df2$Group.1^2
agesq
##  [1]    0    1    4    9   16   25   36   49   64   81  100  121  144  169  196
## [16]  225  256  289  324  361  400  441  484  529  576  625  676  729  784  841
## [31]  900  961 1024 1089 1156 1225 1296 1369 1444 1521 1600 1681 1764 1849 1936
## [46] 2025 2116 2209 2304 2401 2500 2601 2704 2809 2916 3025
ExpV_age1 <-sum(prob * agesq)
ExpV_age1
## [1] 1021.801
ExpV_age2 <- ExpV_age^2
ExpV_age2
## [1] 772.8548
# Sample Variance

SampleVar <-ExpV_age1 - ExpV_age2
SampleVar
## [1] 248.9465
SampleSDAge <- SampleVar^0.5
SampleSDAge
## [1] 15.77804
# Population Variance

N <- 56000 # N = number of observations of Sample

Pop_Var <- N * SampleVar/(N - 1)
Pop_Var
## [1] 248.9509
#Standard Deviation for the age for the whole sample

PopSDAge <- Pop_Var^0.5
PopSDAge
## [1] 15.77818

Question 3 Statistics of mean age for each region

options(digits=5)

Age <-A1[c(1:112), 2] #Range of ages 0 to 55, this the same for all regions

# To calculate the weighted mean of the ages for each region

i <- 1

Region <- 1

format(round(Region, 0), nsmall = 0)
## [1] "1"
WMStore <- list()               # Create empty list to store weighted means for each age

while (i < 56000) {

j <- i + 111

Pop <- A1[c(i:j), 4]             # Population for each region, 112 observations per region

Pop

WM <- weighted.mean(Age, Pop)
WM                               # weighted mean for each region

WMStore[[Region]] <- WM         # WMStore contains the weighted mean for all the 500 regions       
Region <- Region + 1

i <- i+112

if (i ==56000){
break
}
}

WMS <- unlist(WMStore)

WMS # This contains the weighted mean for each of the regions

df_WMS <- as.data.frame((as.data.frame(WMS))) # This is a dataframe of the weighted mean in a column of 500 rows

mean(WMS) # 3.1 Mean=30.608 sd(WMS) # 3.2 SD=7.996 min(WMS) # 3.3 Minimum = 2 quantile(WMS,0.25) # 3.4 First Quartile = 27.426 quantile(WMS,0.50) # 3.5 Median = 29.232 quantile(WMS,0.75) # 3.6 Third Quartile = 33.35 max(WMS) # 3.7 Maximum = 55 IQR(WMS) # 3.8 IQR = 5.924

#3.6 Histogram of the distribution of means

hist(WMS, xlab=“Weighted Mean of Age”, main=“Distribution of means from each region”)

Question 4 Region with smallest population

df3S <-aggregate(A1[,4], by=list(A1$region), FUN=sum) # population for each region

df3Smin <- filter(df3S, df3S$population == min(df3S$population)) # Find all regions with smallest population

df3Smin #Find all regions with smallest population, SSC20099 is one of them with 3 people
##     Group.1 population
## 1  SSC20099          3
## 2  SSC20127          3
## 3  SSC20151          3
## 4  SSC20346          3
## 5  SSC20383          3
## 6  SSC20398          3
## 7  SSC20422          3
## 8  SSC20502          3
## 9  SSC20503          3
## 10 SSC20516          3
## 11 SSC21012          3
## 12 SSC21026          3
## 13 SSC21037          3
## 14 SSC21283          3
## 15 SSC22012          3
## 16 SSC22021          3
## 17 SSC22028          3
## 18 SSC22070          3
## 19 SSC22084          3
## 20 SSC22157          3
## 21 SSC22193          3
## 22 SSC22237          3
## 23 SSC22281          3
## 24 SSC22367          3
## 25 SSC22597          3
## 26 SSC22605          3
## 27 SSC22606          3
## 28 SSC22616          3
## 29 SSC22698          3
## 30 SSC22702          3
## 31 SSC22719          3
## 32 SSC22772          3
## 33 SSC22809          3
## 34 SSC22840          3
## 35 SSC22873          3

Question 5 Region with largest population

#Question 5.1 Distribution of Ages

df4S <-aggregate(A1[,4], by=list(A1$region), FUN=sum) # population for each region

df4Smax <- filter(df4S, df4S$population == max(df4S$population)) # Find region with largest population

df4Smax #Find all regions with largest population, and SSC22015 is the largest with 37948 people
##    Group.1 population
## 1 SSC22015      37948
RL<- A1 %>% filter_at(vars(region), any_vars(. %in% c('SSC22015'))) #Data in the region SSC22015 which has the largest [population]

RLC <-aggregate(RL[,4], by=list(RL$age), FUN=sum) #add male and female population for each age

#Distribution of Ages for region SSC22015

p1 <- ggplot(RLC, aes(x = age, y = population)) +
geom_line()

p1

print(p1 + ggtitle("Distribution of ages for region with most people"))

#Question 5.2 Cumulative Distribution of Ages

p2<- ggplot(RLC, aes(x = age)) +
  stat_ecdf(geom = "step")
  stat_ecdf()
## geom_step: na.rm = FALSE
## stat_ecdf: n = NULL, pad = TRUE, na.rm = FALSE
## position_identity
p3 <- print(p2 + ggtitle("Cumulative Distribution of ages for region with most people"))

p3

# Question 5.3 Cumulative distribution for males and females, maybe not used/finished

RLS <- A1a[order(RL$gender), ] #Population data for largest region in the order of gender 

RLSF <- RLS[1:56, ] #Data for Female

RLSM <- RLS[57:112, ] #Data for Male

#Q5.1 (Alt) #Probably not used

str(RLS)
## tibble [112 x 4] (S3: tbl_df/tbl/data.frame)
##  $ region    : chr [1:112] "SSC21184" "SSC21184" "SSC21184" "SSC21184" ...
##  $ age       : num [1:112] 0 1 2 3 4 5 6 7 8 9 ...
##  $ gender    : chr [1:112] "F" "F" "F" "F" ...
##  $ population: num [1:112] 95 107 120 125 117 147 137 130 157 161 ...
x1 <- RLS$age

x1
##   [1]  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
##  [26] 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
##  [51] 50 51 52 53 54 55  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18
##  [76] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
## [101] 44 45 46 47 48 49 50 51 52 53 54 55
#str(x)

y1 <- RLSF$population  # Female
y2 <- RLSM$population # Male

# merge two data frames by age

RLS <- A1a[order(RL$gender), ]
RLSF <- RLS[1:56, ] #Female
RLSM <- RLS[57:112, ] #Male

RLSF2<- RLSF[ , c(2, 4)]  
RLSM2<- RLSM[ , c(2, 4)]  

RLFM <- merge(RLSF2,RLSM2,by="age")
str(RLFM)
## 'data.frame':    56 obs. of  3 variables:
##  $ age         : num  0 1 2 3 4 5 6 7 8 9 ...
##  $ population.x: num  95 107 120 125 117 147 137 130 157 161 ...
##  $ population.y: num  114 88 122 123 114 140 131 127 139 149 ...
colnames(RLFM)[2] <- "F"
colnames(RLFM)[3] <- "M"

#May need more development to finish Q5.3 using this approach

#p1A <- ggplot(RLFM, aes(x = age, y = F)) +
  
#geom_line()

#p1A
#Question 5.3 (Alternative 1) not using ggplot and check to see if I can change colour and add title

set.seed(42)

RLSF <- RLS[1:56, ] #Data for Female
RLSM <- RLS[57:112, ] #Data for Male

Female <- RLSF$population #Population for female
Male <- RLSM$population #Population for male

gender <- data.frame(Female, Male)

ecdfplot(~ Female + Male, data=gender, auto.key=list(space='right')) 

#Question 5.3 (Alternative 2) Use ggplot 

set.seed(1234)

RLSF <- RLS[1:56, ] #Female
RLSM <- RLS[57:112, ] #Male

Female <- RLSF$population
Male <- RLSM$population

# create a random data frame 

gender <- data.frame(x = c(Female, Male), group = gl(2, 56))

#gender1<- as.numeric(gender$group)

gender$group <- as.numeric(gender$group)

gender$group[1:56] <- "Female"

gender$group[57:112] <- "Male"

p15 <- ggplot(gender, aes(x = x, col = group)) + stat_ecdf() 

p16<- print(p15 + ggtitle("Cumulative Distribution of females and males population") + labs(x = "Population per Region",
         y = "Probability"))

p16

# stat_ecdf() function is used to plot ECDF plot
#Question 6.1 Ratio of old to young vs population scatter plot

options(digits=5)

# To calculate the population young and old, ratio old/young for each region

i <- 1
m <- 81

Region <- 1

format(round(Region, 0), nsmall = 0)
## [1] "1"
PopTotStore <- list()             # Create empty list to store total population for each region
RatioStore <- list()              # Create empty list to store ratio for each region

while (i < 56000) {

j <- i + 79
n <- m + 31

Popyoung <- A1[c(i:j), 4] 
# Population for young, 80 observations per region
Popold <- A1[c(m:n), 4]           # Population for olf, 32 observations per region
PopyoungS <- sum(Popyoung)
PopoldS <- sum(Popold)

#print(Popold)

PopTotStore [[Region]] <- PopyoungS + PopoldS

#print(PopTotStore)

RatioStore [[Region]] <- PopoldS/PopyoungS

#print(RatioStore)

Region <- Region + 1

i <- i + 112
m <- m + 112

if (i == 56000) {
  
  break
  
if (m == 56000) {
  
  break
}  
}
}
PopTotStore <- unlist(PopTotStore) #This contains the total Population for each of the regions
RatioStore <- unlist(RatioStore) #This contains the ratio for each of the regions

PopTot <- as.data.frame((as.data.frame(PopTotStore))) # This is a dataframe of the total population for each region

PopTot$Region <- 1:nrow(PopTot)
PopTot
##     PopTotStore Region
## 1            33      1
## 2           425      2
## 3           100      3
## 4          1137      4
## 5            51      5
## 6           924      6
## 7           359      7
## 8          5821      8
## 9          4978      9
## 10            3     10
## 11           38     11
## 12           17     12
## 13         2049     13
## 14            3     14
## 15         1826     15
## 16          270     16
## 17            3     17
## 18         2643     18
## 19          178     19
## 20           12     20
## 21          314     21
## 22         6666     22
## 23           24     23
## 24           15     24
## 25         3439     25
## 26         3168     26
## 27           18     27
## 28         9837     28
## 29           10     29
## 30           41     30
## 31            9     31
## 32           60     32
## 33           54     33
## 34           89     34
## 35            8     35
## 36           27     36
## 37          161     37
## 38           52     38
## 39            6     39
## 40          138     40
## 41           58     41
## 42         3391     42
## 43           57     43
## 44         9116     44
## 45         9046     45
## 46           29     46
## 47           21     47
## 48           60     48
## 49         3104     49
## 50           36     50
## 51            3     51
## 52          112     52
## 53           61     53
## 54           38     54
## 55           16     55
## 56         9802     56
## 57        11357     57
## 58           40     58
## 59          104     59
## 60          123     60
## 61            6     61
## 62            3     62
## 63          159     63
## 64          103     64
## 65          211     65
## 66            3     66
## 67           21     67
## 68         2615     68
## 69            6     69
## 70         3555     70
## 71            3     71
## 72           75     72
## 73           52     73
## 74            5     74
## 75          166     75
## 76         3735     76
## 77         1188     77
## 78          120     78
## 79          213     79
## 80        16705     80
## 81           39     81
## 82            3     82
## 83            3     83
## 84           29     84
## 85          190     85
## 86           23     86
## 87            3     87
## 88         3772     88
## 89         1282     89
## 90            7     90
## 91          194     91
## 92         6803     92
## 93         5402     93
## 94        15972     94
## 95            7     95
## 96          239     96
## 97         3363     97
## 98          198     98
## 99         9729     99
## 100        5016    100
## 101        1011    101
## 102        1790    102
## 103        7287    103
## 104         132    104
## 105          54    105
## 106          48    106
## 107         494    107
## 108        2306    108
## 109           9    109
## 110          12    110
## 111       17442    111
## 112          14    112
## 113          58    113
## 114         427    114
## 115          18    115
## 116         148    116
## 117        6461    117
## 118         211    118
## 119        1082    119
## 120          45    120
## 121         117    121
## 122       18540    122
## 123          12    123
## 124         171    124
## 125        2563    125
## 126         219    126
## 127          16    127
## 128         284    128
## 129          50    129
## 130        3981    130
## 131        1367    131
## 132         568    132
## 133        1752    133
## 134          18    134
## 135         847    135
## 136          72    136
## 137        7866    137
## 138         239    138
## 139       17809    139
## 140           6    140
## 141       15608    141
## 142        1434    142
## 143          88    143
## 144           9    144
## 145       19340    145
## 146        1055    146
## 147         862    147
## 148       12350    148
## 149         132    149
## 150          89    150
## 151          79    151
## 152          14    152
## 153          15    153
## 154           9    154
## 155          12    155
## 156         547    156
## 157          69    157
## 158          32    158
## 159          65    159
## 160           3    160
## 161         210    161
## 162           5    162
## 163          54    163
## 164           3    164
## 165         130    165
## 166          37    166
## 167           3    167
## 168       17140    168
## 169        1554    169
## 170           7    170
## 171         147    171
## 172         475    172
## 173         468    173
## 174           4    174
## 175           6    175
## 176          46    176
## 177          76    177
## 178         286    178
## 179          67    179
## 180        6281    180
## 181       20939    181
## 182          61    182
## 183        6550    183
## 184           5    184
## 185         217    185
## 186       19180    186
## 187       11218    187
## 188         616    188
## 189          44    189
## 190        1288    190
## 191          22    191
## 192       13730    192
## 193       14546    193
## 194           6    194
## 195           7    195
## 196           9    196
## 197         663    197
## 198          30    198
## 199           6    199
## 200        1908    200
## 201           6    201
## 202        2575    202
## 203          25    203
## 204         183    204
## 205        2661    205
## 206          84    206
## 207          18    207
## 208         229    208
## 209         304    209
## 210         128    210
## 211          35    211
## 212          41    212
## 213           3    213
## 214         808    214
## 215          24    215
## 216        2386    216
## 217          12    217
## 218        9818    218
## 219          21    219
## 220        2289    220
## 221          41    221
## 222        4978    222
## 223        5033    223
## 224          18    224
## 225        2391    225
## 226          18    226
## 227        1207    227
## 228         211    228
## 229          12    229
## 230          52    230
## 231           9    231
## 232         249    232
## 233          22    233
## 234          18    234
## 235         176    235
## 236         127    236
## 237          12    237
## 238         102    238
## 239          88    239
## 240        1094    240
## 241        2548    241
## 242         310    242
## 243          22    243
## 244         942    244
## 245          50    245
## 246          74    246
## 247          73    247
## 248         698    248
## 249          28    249
## 250         323    250
## 251         988    251
## 252         376    252
## 253         538    253
## 254        1799    254
## 255         135    255
## 256        7282    256
## 257          70    257
## 258        3504    258
## 259          21    259
## 260           8    260
## 261         528    261
## 262          28    262
## 263          63    263
## 264          22    264
## 265         289    265
## 266        1148    266
## 267          36    267
## 268          51    268
## 269          74    269
## 270         750    270
## 271          12    271
## 272         440    272
## 273          81    273
## 274        2960    274
## 275       22979    275
## 276        1213    276
## 277          10    277
## 278          23    278
## 279          15    279
## 280          12    280
## 281         129    281
## 282           6    282
## 283          77    283
## 284          26    284
## 285       10770    285
## 286          28    286
## 287        4310    287
## 288          30    288
## 289          15    289
## 290       16669    290
## 291         794    291
## 292         489    292
## 293          34    293
## 294        2545    294
## 295        1289    295
## 296          21    296
## 297          36    297
## 298         148    298
## 299         144    299
## 300          81    300
## 301          55    301
## 302           6    302
## 303         265    303
## 304         245    304
## 305          40    305
## 306         526    306
## 307          69    307
## 308          61    308
## 309          40    309
## 310          59    310
## 311          15    311
## 312         201    312
## 313          61    313
## 314         283    314
## 315          38    315
## 316           6    316
## 317          19    317
## 318          23    318
## 319         779    319
## 320         191    320
## 321          47    321
## 322         790    322
## 323          45    323
## 324          12    324
## 325         307    325
## 326         216    326
## 327         292    327
## 328          37    328
## 329        1166    329
## 330         154    330
## 331          13    331
## 332          19    332
## 333           4    333
## 334          17    334
## 335          72    335
## 336        2823    336
## 337         162    337
## 338          79    338
## 339          67    339
## 340          69    340
## 341        6048    341
## 342          36    342
## 343         120    343
## 344          20    344
## 345           3    345
## 346       37948    346
## 347         267    347
## 348           3    348
## 349           3    349
## 350       13253    350
## 351        7776    351
## 352          71    352
## 353          15    353
## 354          98    354
## 355         208    355
## 356         109    356
## 357           9    357
## 358         204    358
## 359           3    359
## 360        1879    360
## 361          60    361
## 362        1226    362
## 363         130    363
## 364           3    364
## 365          15    365
## 366         380    366
## 367          33    367
## 368          62    368
## 369          99    369
## 370       10596    370
## 371        1536    371
## 372          88    372
## 373         337    373
## 374         130    374
## 375         159    375
## 376        1953    376
## 377           3    377
## 378         163    378
## 379          13    379
## 380           6    380
## 381          10    381
## 382          65    382
## 383          42    383
## 384           3    384
## 385          19    385
## 386        5056    386
## 387          86    387
## 388         373    388
## 389          33    389
## 390        4177    390
## 391           3    391
## 392        1330    392
## 393        4060    393
## 394          12    394
## 395         833    395
## 396          12    396
## 397          21    397
## 398           3    398
## 399          93    399
## 400         260    400
## 401         762    401
## 402         290    402
## 403        1933    403
## 404        1229    404
## 405          28    405
## 406          22    406
## 407          42    407
## 408       10937    408
## 409         183    409
## 410         199    410
## 411           6    411
## 412         108    412
## 413           9    413
## 414          36    414
## 415          34    415
## 416           3    416
## 417          78    417
## 418          17    418
## 419         351    419
## 420          56    420
## 421          73    421
## 422          34    422
## 423          12    423
## 424         152    424
## 425        1301    425
## 426           6    426
## 427          24    427
## 428        1412    428
## 429          16    429
## 430         714    430
## 431          57    431
## 432          30    432
## 433          16    433
## 434          24    434
## 435         190    435
## 436           7    436
## 437         574    437
## 438         202    438
## 439          66    439
## 440          32    440
## 441        8174    441
## 442       17928    442
## 443          35    443
## 444       19274    444
## 445         277    445
## 446           3    446
## 447         299    447
## 448           3    448
## 449           3    449
## 450          10    450
## 451           3    451
## 452          87    452
## 453         831    453
## 454          86    454
## 455          71    455
## 456         268    456
## 457        2257    457
## 458          76    458
## 459           3    459
## 460          27    460
## 461           3    461
## 462           9    462
## 463          24    463
## 464        3838    464
## 465          12    465
## 466        4306    466
## 467           3    467
## 468         158    468
## 469          18    469
## 470        1082    470
## 471       10893    471
## 472        4153    472
## 473       12544    473
## 474        2342    474
## 475        4074    475
## 476          21    476
## 477           3    477
## 478          12    478
## 479        1159    479
## 480          42    480
## 481          57    481
## 482           3    482
## 483         205    483
## 484           3    484
## 485          32    485
## 486          69    486
## 487          15    487
## 488        1079    488
## 489           6    489
## 490           6    490
## 491         144    491
## 492          25    492
## 493           3    493
## 494        3269    494
## 495          16    495
## 496         353    496
## 497         191    497
## 498          23    498
## 499          24    499
## 500          15    500
Ratio <- as.data.frame((as.data.frame(RatioStore))) # This is a dataframe of the ratio old/young for each region

Ratio$Region <- 1:nrow(Ratio)

PopRatio <- merge(Ratio, PopTot, by="Region")

names(PopRatio) [2] <- "Ratio"
names(PopRatio)[3] <- "PopRegion"

#To delete rows where ratio is inf or ratio < 1000

PopRatio1<- PopRatio[PopRatio$Ratio <= 20, ] 

p17 <- ggplot(PopRatio1, aes(x=PopRegion, y=Ratio)) + 
  geom_point()+
  geom_smooth(method=lm, color="red")+
  labs(title="Ratio of old to young vs Population in Region",
       x="Population in Region")+
  theme_classic()  

p17

# Question 6.2 
#The scatter plot in  6.1 indicates the following trends:
  
#1 When the population is low, the ratio of old to young is high.  This suggests that there are more old people than young people when the population of a region is low.
#2 When the population is high, the ratio is low.  This shows that in the more populous regions, 
# there are more young people than old people.  
#3 This is consistent with a trend that older people would move to a small country town where the #cost of housing is cheaper, and they will have a greater spending power with their limited funds #and many of them have likely retired.
#4 The younger couples and people will live in a more populous region for cheaper housing, jobs, #schooling, health facility and other conveniences to support their life styles.
#5 There are many more in between the two extremes, and the trend will depend on a combination of #factors.  e.g stages of their live, wealth leveks, empty nesters, availability of jobs etc.
#Question 7.1 Ratio of female to male vs population scatter plot

options(digits=5)

# To calculate the population female and male, ratio female/male for each region

PopGenS <- 0             # Create empty list to store total population for each region
RatioGenS <- 0   

p <- 1
a <- 56

Region <- 1

format(round(Region, 0), nsmall = 0)
## [1] "1"
PopGenS <- list()             # Create empty list to store total population for each region
RatioGenS <- list()              # Create empty list to store ratio for each region


while (p < 56000) {

q <- p + 111
b <- a +55

PopSort <- A1[c(p:q), ] #Data from row i to j

#RLG <-PopSort[order(PopSort$gender),]

RLG <-PopSort[order(PopSort$gender),] #Sort in the order of gender for each region

PopF <- RLG [c(1:56), 4] # Population for female, 56 observations per region

PopM <- RLG [c(57:112), 4] # Population for male, 56 observations per region

PopFS <- sum(PopF)
PopMS <- sum(PopM)
PopTS <- PopFS+PopMS

PopGenS [[Region]] <- PopTS

RatioGenS [[Region]] <- PopFS/PopMS

#print(RatioGenS)

Region <- Region + 1

p <- p + 112
a <- a + 112

if (p == 56000) {
  
  break
  
if (a == 56000) {
  
  break
}  
}
}
PopGenS <- unlist(PopGenS) #This contains the total Population for each of the regions
RatioGenS <- unlist(RatioGenS) #This contains the ratio for each of the regions

PopGenST <- as.data.frame((as.data.frame(PopGenS))) # This is a dataframe of the total population for each region

PopGenST$Region <- 1:nrow(PopGenST)
PopGenST
##     PopGenS Region
## 1        33      1
## 2       425      2
## 3       100      3
## 4      1137      4
## 5        51      5
## 6       924      6
## 7       359      7
## 8      5821      8
## 9      4978      9
## 10        3     10
## 11       38     11
## 12       17     12
## 13     2049     13
## 14        3     14
## 15     1826     15
## 16      270     16
## 17        3     17
## 18     2643     18
## 19      178     19
## 20       12     20
## 21      314     21
## 22     6666     22
## 23       24     23
## 24       15     24
## 25     3439     25
## 26     3168     26
## 27       18     27
## 28     9837     28
## 29       10     29
## 30       41     30
## 31        9     31
## 32       60     32
## 33       54     33
## 34       89     34
## 35        8     35
## 36       27     36
## 37      161     37
## 38       52     38
## 39        6     39
## 40      138     40
## 41       58     41
## 42     3391     42
## 43       57     43
## 44     9116     44
## 45     9046     45
## 46       29     46
## 47       21     47
## 48       60     48
## 49     3104     49
## 50       36     50
## 51        3     51
## 52      112     52
## 53       61     53
## 54       38     54
## 55       16     55
## 56     9802     56
## 57    11357     57
## 58       40     58
## 59      104     59
## 60      123     60
## 61        6     61
## 62        3     62
## 63      159     63
## 64      103     64
## 65      211     65
## 66        3     66
## 67       21     67
## 68     2615     68
## 69        6     69
## 70     3555     70
## 71        3     71
## 72       75     72
## 73       52     73
## 74        5     74
## 75      166     75
## 76     3735     76
## 77     1188     77
## 78      120     78
## 79      213     79
## 80    16705     80
## 81       39     81
## 82        3     82
## 83        3     83
## 84       29     84
## 85      190     85
## 86       23     86
## 87        3     87
## 88     3772     88
## 89     1282     89
## 90        7     90
## 91      194     91
## 92     6803     92
## 93     5402     93
## 94    15972     94
## 95        7     95
## 96      239     96
## 97     3363     97
## 98      198     98
## 99     9729     99
## 100    5016    100
## 101    1011    101
## 102    1790    102
## 103    7287    103
## 104     132    104
## 105      54    105
## 106      48    106
## 107     494    107
## 108    2306    108
## 109       9    109
## 110      12    110
## 111   17442    111
## 112      14    112
## 113      58    113
## 114     427    114
## 115      18    115
## 116     148    116
## 117    6461    117
## 118     211    118
## 119    1082    119
## 120      45    120
## 121     117    121
## 122   18540    122
## 123      12    123
## 124     171    124
## 125    2563    125
## 126     219    126
## 127      16    127
## 128     284    128
## 129      50    129
## 130    3981    130
## 131    1367    131
## 132     568    132
## 133    1752    133
## 134      18    134
## 135     847    135
## 136      72    136
## 137    7866    137
## 138     239    138
## 139   17809    139
## 140       6    140
## 141   15608    141
## 142    1434    142
## 143      88    143
## 144       9    144
## 145   19340    145
## 146    1055    146
## 147     862    147
## 148   12350    148
## 149     132    149
## 150      89    150
## 151      79    151
## 152      14    152
## 153      15    153
## 154       9    154
## 155      12    155
## 156     547    156
## 157      69    157
## 158      32    158
## 159      65    159
## 160       3    160
## 161     210    161
## 162       5    162
## 163      54    163
## 164       3    164
## 165     130    165
## 166      37    166
## 167       3    167
## 168   17140    168
## 169    1554    169
## 170       7    170
## 171     147    171
## 172     475    172
## 173     468    173
## 174       4    174
## 175       6    175
## 176      46    176
## 177      76    177
## 178     286    178
## 179      67    179
## 180    6281    180
## 181   20939    181
## 182      61    182
## 183    6550    183
## 184       5    184
## 185     217    185
## 186   19180    186
## 187   11218    187
## 188     616    188
## 189      44    189
## 190    1288    190
## 191      22    191
## 192   13730    192
## 193   14546    193
## 194       6    194
## 195       7    195
## 196       9    196
## 197     663    197
## 198      30    198
## 199       6    199
## 200    1908    200
## 201       6    201
## 202    2575    202
## 203      25    203
## 204     183    204
## 205    2661    205
## 206      84    206
## 207      18    207
## 208     229    208
## 209     304    209
## 210     128    210
## 211      35    211
## 212      41    212
## 213       3    213
## 214     808    214
## 215      24    215
## 216    2386    216
## 217      12    217
## 218    9818    218
## 219      21    219
## 220    2289    220
## 221      41    221
## 222    4978    222
## 223    5033    223
## 224      18    224
## 225    2391    225
## 226      18    226
## 227    1207    227
## 228     211    228
## 229      12    229
## 230      52    230
## 231       9    231
## 232     249    232
## 233      22    233
## 234      18    234
## 235     176    235
## 236     127    236
## 237      12    237
## 238     102    238
## 239      88    239
## 240    1094    240
## 241    2548    241
## 242     310    242
## 243      22    243
## 244     942    244
## 245      50    245
## 246      74    246
## 247      73    247
## 248     698    248
## 249      28    249
## 250     323    250
## 251     988    251
## 252     376    252
## 253     538    253
## 254    1799    254
## 255     135    255
## 256    7282    256
## 257      70    257
## 258    3504    258
## 259      21    259
## 260       8    260
## 261     528    261
## 262      28    262
## 263      63    263
## 264      22    264
## 265     289    265
## 266    1148    266
## 267      36    267
## 268      51    268
## 269      74    269
## 270     750    270
## 271      12    271
## 272     440    272
## 273      81    273
## 274    2960    274
## 275   22979    275
## 276    1213    276
## 277      10    277
## 278      23    278
## 279      15    279
## 280      12    280
## 281     129    281
## 282       6    282
## 283      77    283
## 284      26    284
## 285   10770    285
## 286      28    286
## 287    4310    287
## 288      30    288
## 289      15    289
## 290   16669    290
## 291     794    291
## 292     489    292
## 293      34    293
## 294    2545    294
## 295    1289    295
## 296      21    296
## 297      36    297
## 298     148    298
## 299     144    299
## 300      81    300
## 301      55    301
## 302       6    302
## 303     265    303
## 304     245    304
## 305      40    305
## 306     526    306
## 307      69    307
## 308      61    308
## 309      40    309
## 310      59    310
## 311      15    311
## 312     201    312
## 313      61    313
## 314     283    314
## 315      38    315
## 316       6    316
## 317      19    317
## 318      23    318
## 319     779    319
## 320     191    320
## 321      47    321
## 322     790    322
## 323      45    323
## 324      12    324
## 325     307    325
## 326     216    326
## 327     292    327
## 328      37    328
## 329    1166    329
## 330     154    330
## 331      13    331
## 332      19    332
## 333       4    333
## 334      17    334
## 335      72    335
## 336    2823    336
## 337     162    337
## 338      79    338
## 339      67    339
## 340      69    340
## 341    6048    341
## 342      36    342
## 343     120    343
## 344      20    344
## 345       3    345
## 346   37948    346
## 347     267    347
## 348       3    348
## 349       3    349
## 350   13253    350
## 351    7776    351
## 352      71    352
## 353      15    353
## 354      98    354
## 355     208    355
## 356     109    356
## 357       9    357
## 358     204    358
## 359       3    359
## 360    1879    360
## 361      60    361
## 362    1226    362
## 363     130    363
## 364       3    364
## 365      15    365
## 366     380    366
## 367      33    367
## 368      62    368
## 369      99    369
## 370   10596    370
## 371    1536    371
## 372      88    372
## 373     337    373
## 374     130    374
## 375     159    375
## 376    1953    376
## 377       3    377
## 378     163    378
## 379      13    379
## 380       6    380
## 381      10    381
## 382      65    382
## 383      42    383
## 384       3    384
## 385      19    385
## 386    5056    386
## 387      86    387
## 388     373    388
## 389      33    389
## 390    4177    390
## 391       3    391
## 392    1330    392
## 393    4060    393
## 394      12    394
## 395     833    395
## 396      12    396
## 397      21    397
## 398       3    398
## 399      93    399
## 400     260    400
## 401     762    401
## 402     290    402
## 403    1933    403
## 404    1229    404
## 405      28    405
## 406      22    406
## 407      42    407
## 408   10937    408
## 409     183    409
## 410     199    410
## 411       6    411
## 412     108    412
## 413       9    413
## 414      36    414
## 415      34    415
## 416       3    416
## 417      78    417
## 418      17    418
## 419     351    419
## 420      56    420
## 421      73    421
## 422      34    422
## 423      12    423
## 424     152    424
## 425    1301    425
## 426       6    426
## 427      24    427
## 428    1412    428
## 429      16    429
## 430     714    430
## 431      57    431
## 432      30    432
## 433      16    433
## 434      24    434
## 435     190    435
## 436       7    436
## 437     574    437
## 438     202    438
## 439      66    439
## 440      32    440
## 441    8174    441
## 442   17928    442
## 443      35    443
## 444   19274    444
## 445     277    445
## 446       3    446
## 447     299    447
## 448       3    448
## 449       3    449
## 450      10    450
## 451       3    451
## 452      87    452
## 453     831    453
## 454      86    454
## 455      71    455
## 456     268    456
## 457    2257    457
## 458      76    458
## 459       3    459
## 460      27    460
## 461       3    461
## 462       9    462
## 463      24    463
## 464    3838    464
## 465      12    465
## 466    4306    466
## 467       3    467
## 468     158    468
## 469      18    469
## 470    1082    470
## 471   10893    471
## 472    4153    472
## 473   12544    473
## 474    2342    474
## 475    4074    475
## 476      21    476
## 477       3    477
## 478      12    478
## 479    1159    479
## 480      42    480
## 481      57    481
## 482       3    482
## 483     205    483
## 484       3    484
## 485      32    485
## 486      69    486
## 487      15    487
## 488    1079    488
## 489       6    489
## 490       6    490
## 491     144    491
## 492      25    492
## 493       3    493
## 494    3269    494
## 495      16    495
## 496     353    496
## 497     191    497
## 498      23    498
## 499      24    499
## 500      15    500
RatioGenST <- as.data.frame((as.data.frame(RatioGenS))) # This is a dataframe of the ratio (F/M) population for each region

RatioGenST$Region <- 1:nrow(RatioGenST)
RatioGenST
##     RatioGenS Region
## 1     1.53846      1
## 2     0.95853      2
## 3     0.66667      3
## 4     0.89816      4
## 5     0.70000      5
## 6     0.94118      6
## 7     0.89947      7
## 8     0.99145      8
## 9     1.00564      9
## 10    0.00000     10
## 11    0.90000     11
## 12    1.12500     12
## 13    0.99902     13
## 14        Inf     14
## 15    1.08210     15
## 16    0.83673     16
## 17        Inf     17
## 18    1.01142     18
## 19    1.57971     19
## 20    3.00000     20
## 21    1.25899     21
## 22    0.98157     22
## 23    0.33333     23
## 24    1.50000     24
## 25    0.96402     25
## 26    1.00633     26
## 27    1.00000     27
## 28    1.01042     28
## 29    0.66667     29
## 30    1.56250     30
## 31    2.00000     31
## 32    1.30769     32
## 33    0.80000     33
## 34    0.89362     34
## 35    1.00000     35
## 36    1.25000     36
## 37    1.30000     37
## 38    1.08000     38
## 39        Inf     39
## 40    1.46429     40
## 41    0.75758     41
## 42    1.01845     42
## 43    0.78125     43
## 44    0.99300     44
## 45    1.00399     45
## 46    1.41667     46
## 47    1.33333     47
## 48    0.93548     48
## 49    1.05020     49
## 50    1.40000     50
## 51        Inf     51
## 52    0.83607     52
## 53    0.56410     53
## 54    0.52000     54
## 55    0.77778     55
## 56    1.08953     56
## 57    1.05817     57
## 58    1.50000     58
## 59    0.52941     59
## 60    0.98387     60
## 61    1.00000     61
## 62    0.00000     62
## 63    1.06494     63
## 64    1.19149     64
## 65    0.86726     65
## 66        Inf     66
## 67    0.75000     67
## 68    1.00383     68
## 69        Inf     69
## 70    1.07531     70
## 71    0.00000     71
## 72    0.33929     72
## 73    0.85714     73
## 74        Inf     74
## 75    1.21333     75
## 76    0.91342     76
## 77    1.01015     77
## 78    0.87500     78
## 79    1.19588     79
## 80    1.17004     80
## 81    0.85714     81
## 82        Inf     82
## 83        Inf     83
## 84    0.52632     84
## 85    1.46753     85
## 86    0.43750     86
## 87    0.00000     87
## 88    1.05223     88
## 89    1.22184     89
## 90    0.00000     90
## 91    0.97959     91
## 92    0.98976     92
## 93    1.07053     93
## 94    1.02819     94
## 95    1.33333     95
## 96    0.79699     96
## 97    1.06699     97
## 98    0.63636     98
## 99    0.89575     99
## 100   1.05405    100
## 101   1.01394    101
## 102   1.08382    102
## 103   1.03434    103
## 104   1.20000    104
## 105   0.92857    105
## 106   1.66667    106
## 107   0.83643    107
## 108   0.96758    108
## 109   0.50000    109
## 110   1.00000    110
## 111   0.99748    111
## 112   3.66667    112
## 113   0.93333    113
## 114   0.99533    114
## 115   1.00000    115
## 116   0.70115    116
## 117   1.05764    117
## 118   0.91818    118
## 119   0.99631    119
## 120   0.50000    120
## 121   0.85714    121
## 122   1.04885    122
## 123   3.00000    123
## 124   0.64423    124
## 125   1.06860    125
## 126   0.80992    126
## 127   4.33333    127
## 128   1.02857    128
## 129   1.17391    129
## 130   1.01264    130
## 131   0.97258    131
## 132   0.87459    132
## 133   1.10072    133
## 134   1.00000    134
## 135   0.98826    135
## 136   1.25000    136
## 137   1.06837    137
## 138   0.89683    138
## 139   0.98341    139
## 140   1.00000    140
## 141   1.04991    141
## 142   0.90691    142
## 143   1.04651    143
## 144   2.00000    144
## 145   0.99670    145
## 146   0.95733    146
## 147   0.91131    147
## 148   1.02891    148
## 149   1.09524    149
## 150   0.58929    150
## 151   0.75556    151
## 152   1.33333    152
## 153   1.50000    153
## 154   0.50000    154
## 155   0.00000    155
## 156   1.01103    156
## 157   0.53333    157
## 158   0.77778    158
## 159   0.71053    159
## 160       Inf    160
## 161   1.00000    161
## 162       Inf    162
## 163   1.07692    163
## 164       Inf    164
## 165   0.88406    165
## 166   0.68182    166
## 167       Inf    167
## 168   0.92844    168
## 169   1.00775    169
## 170   1.33333    170
## 171   1.26154    171
## 172   1.12054    172
## 173   0.91803    173
## 174   0.00000    174
## 175       Inf    175
## 176   0.91667    176
## 177   0.72727    177
## 178   1.04286    178
## 179   0.91429    179
## 180   1.03929    180
## 181   0.95345    181
## 182   1.17857    182
## 183   1.04880    183
## 184   0.00000    184
## 185   1.33333    185
## 186   1.01980    186
## 187   1.03778    187
## 188   0.98071    188
## 189   1.44444    189
## 190   0.91382    190
## 191   0.69231    191
## 192   0.97753    192
## 193   1.03412    193
## 194   1.00000    194
## 195   1.33333    195
## 196   0.50000    196
## 197   0.96736    197
## 198   1.00000    198
## 199   1.00000    199
## 200   1.02763    200
## 201   1.00000    201
## 202   1.02119    202
## 203   0.92308    203
## 204   0.94681    204
## 205   0.99925    205
## 206   0.64706    206
## 207   0.38462    207
## 208   0.95726    208
## 209   1.11111    209
## 210   1.28571    210
## 211   0.52174    211
## 212   1.73333    212
## 213   0.00000    213
## 214   1.00496    214
## 215   0.60000    215
## 216   0.95574    216
## 217   3.00000    217
## 218   1.01436    218
## 219   0.90909    219
## 220   1.00966    220
## 221   2.15385    221
## 222   0.97932    222
## 223   1.02535    223
## 224   5.00000    224
## 225   1.00419    225
## 226   0.50000    226
## 227   0.85407    227
## 228   1.26882    228
## 229   0.33333    229
## 230   1.47619    230
## 231   0.00000    231
## 232   0.96063    232
## 233   0.83333    233
## 234   5.00000    234
## 235   0.66038    235
## 236   1.04839    236
## 237   1.00000    237
## 238   1.08163    238
## 239   0.44262    239
## 240   1.12427    240
## 241   1.09539    241
## 242   0.77143    242
## 243   2.66667    243
## 244   1.04338    244
## 245   1.50000    245
## 246   1.00000    246
## 247   1.80769    247
## 248   1.06509    248
## 249   0.75000    249
## 250   1.21233    250
## 251   1.09322    251
## 252   1.02151    252
## 253   0.97794    253
## 254   1.03507    254
## 255   1.17742    255
## 256   0.96175    256
## 257   1.00000    257
## 258   1.05152    258
## 259   1.33333    259
## 260   0.60000    260
## 261   1.03077    261
## 262   0.75000    262
## 263   0.90909    263
## 264   0.46667    264
## 265   0.97945    265
## 266   1.01404    266
## 267   1.40000    267
## 268   1.21739    268
## 269   0.64444    269
## 270   1.23881    270
## 271   0.33333    271
## 272   0.75299    272
## 273   0.88372    273
## 274   1.05841    274
## 275   1.04004    275
## 276   0.98203    276
## 277   0.66667    277
## 278   2.83333    278
## 279   0.25000    279
## 280   0.33333    280
## 281   0.98462    281
## 282       Inf    282
## 283   0.75000    283
## 284   1.60000    284
## 285   1.04558    285
## 286   0.86667    286
## 287   0.98252    287
## 288   1.50000    288
## 289   1.50000    289
## 290   0.99366    290
## 291   0.91787    291
## 292   1.10776    292
## 293   1.00000    293
## 294   1.01824    294
## 295   0.98920    295
## 296   2.50000    296
## 297   0.80000    297
## 298   1.00000    298
## 299   1.05714    299
## 300   0.65306    300
## 301   1.20000    301
## 302       Inf    302
## 303   1.05426    303
## 304   1.09402    304
## 305   0.81818    305
## 306   1.16461    306
## 307   1.02941    307
## 308   2.05000    308
## 309   1.00000    309
## 310   1.03448    310
## 311   0.66667    311
## 312   1.01000    312
## 313   2.58824    313
## 314   0.99296    314
## 315   2.16667    315
## 316   1.00000    316
## 317   5.33333    317
## 318   2.28571    318
## 319   0.88620    319
## 320   0.94898    320
## 321   0.42424    321
## 322   1.06806    322
## 323   2.46154    323
## 324   1.00000    324
## 325   1.22464    325
## 326   0.91150    326
## 327   0.95973    327
## 328   0.85000    328
## 329   1.06007    329
## 330   0.79070    330
## 331   3.33333    331
## 332   0.18750    332
## 333   0.00000    333
## 334   0.21429    334
## 335   0.63636    335
## 336   1.02948    336
## 337   0.88372    337
## 338   1.39394    338
## 339   0.86111    339
## 340   3.60000    340
## 341   0.99407    341
## 342   1.00000    342
## 343   1.03390    343
## 344   0.17647    344
## 345       Inf    345
## 346   1.03529    346
## 347   1.22500    347
## 348   0.00000    348
## 349       Inf    349
## 350   0.99144    350
## 351   0.98671    351
## 352   0.97222    352
## 353   0.87500    353
## 354   0.71930    354
## 355   0.89091    355
## 356   0.91228    356
## 357   0.50000    357
## 358   1.01980    358
## 359   0.00000    359
## 360   0.98836    360
## 361   0.71429    361
## 362   1.03317    362
## 363   1.06349    363
## 364   0.00000    364
## 365   0.25000    365
## 366   0.75115    366
## 367   0.73684    367
## 368   0.82353    368
## 369   0.86792    369
## 370   1.03378    370
## 371   1.21965    371
## 372   0.87234    372
## 373   0.94798    373
## 374   0.83099    374
## 375   1.09211    375
## 376   0.96875    376
## 377   0.00000    377
## 378   0.89535    378
## 379   3.33333    379
## 380   1.00000    380
## 381   1.50000    381
## 382   1.40741    382
## 383   0.90909    383
## 384       Inf    384
## 385   2.16667    385
## 386   1.02889    386
## 387   1.38889    387
## 388   1.00538    388
## 389   1.06250    389
## 390   1.04254    390
## 391       Inf    391
## 392   1.01821    392
## 393   1.01189    393
## 394   1.00000    394
## 395   0.88462    395
## 396   1.00000    396
## 397   2.50000    397
## 398       Inf    398
## 399   1.58333    399
## 400   0.96970    400
## 401   0.83614    401
## 402   1.23077    402
## 403   1.06738    403
## 404   1.07251    404
## 405   0.86667    405
## 406   0.69231    406
## 407   0.68000    407
## 408   1.00348    408
## 409   1.31646    409
## 410   0.85981    410
## 411   0.00000    411
## 412   1.76923    412
## 413       Inf    413
## 414   2.00000    414
## 415   1.00000    415
## 416       Inf    416
## 417   1.22857    417
## 418   3.25000    418
## 419   1.23567    419
## 420   0.93103    420
## 421   1.21212    421
## 422   0.78947    422
## 423   1.00000    423
## 424   0.92405    424
## 425   1.17195    425
## 426   1.00000    426
## 427   0.33333    427
## 428   0.92633    428
## 429   1.28571    429
## 430   1.12500    430
## 431   0.58333    431
## 432   0.42857    432
## 433   0.23077    433
## 434   0.60000    434
## 435   0.97917    435
## 436   0.00000    436
## 437   1.05000    437
## 438   1.06122    438
## 439   1.53846    439
## 440   2.55556    440
## 441   1.15332    441
## 442   1.03727    442
## 443   0.84211    443
## 444   0.86927    444
## 445   0.78710    445
## 446   0.00000    446
## 447   0.94156    447
## 448   0.00000    448
## 449   0.00000    449
## 450   0.00000    450
## 451   0.00000    451
## 452   0.89130    452
## 453   1.00725    453
## 454   1.04762    454
## 455   0.86842    455
## 456   1.25210    456
## 457   1.00266    457
## 458   1.05405    458
## 459       Inf    459
## 460   0.68750    460
## 461   0.00000    461
## 462   0.00000    462
## 463   1.66667    463
## 464   0.96619    464
## 465   3.00000    465
## 466   1.07218    466
## 467       Inf    467
## 468   0.62887    468
## 469   0.50000    469
## 470   0.90493    470
## 471   1.03379    471
## 472   1.03379    472
## 473   1.01445    473
## 474   1.04720    474
## 475   0.96622    475
## 476   0.40000    476
## 477       Inf    477
## 478   1.00000    478
## 479   0.98799    479
## 480   0.50000    480
## 481   2.16667    481
## 482   0.00000    482
## 483   1.32955    483
## 484   0.00000    484
## 485   1.66667    485
## 486   0.56818    486
## 487   0.66667    487
## 488   1.02820    488
## 489   1.00000    489
## 490   0.00000    490
## 491   0.75610    491
## 492   0.56250    492
## 493   0.00000    493
## 494   0.88632    494
## 495   0.77778    495
## 496   1.01714    496
## 497   0.91000    497
## 498   0.64286    498
## 499   3.00000    499
## 500   0.66667    500
PopRatioGen <- merge(RatioGenST, PopGenST, by="Region")

names(PopRatioGen) [2] <- "Ratio_FM"
names(PopRatioGen)[3] <- "PopRegion"

#To delete rows where ratio is inf or ratio < 1000

PopRatioGen<- PopRatioGen[PopRatioGen$Ratio_FM <= 1000, ] 

max(PopRatioGen$Ratio_FM) #max ratio is 5.33
## [1] 5.3333
min(PopRatioGen$Ratio_FM) #min ratio is 0
## [1] 0
p17 <- ggplot(PopRatioGen, aes(x=PopRegion, y=Ratio_FM)) + 
  geom_point()+
  geom_smooth(method=lm, color="red")+
  labs(title="Ratio of female to male vs Population in Region",
       x="Population in Region")+
  theme_classic()  

p17

# Question 7.2 

#The scatter plot in  7.1 indicates the following trends:

#1. In regions where the population is low, the ratio of female/male ranges from very low to high of 5.33. 
#2  One possible reason for this trend is related to Question 6.2, where retired people tended to move to small country towns.  Female generally live longer than male, and more males have died in these low population regions.     
#3  Another possible reason for the high ratio could be that the male of the family who are still fit and healthy to work, will go to more populous regions to find work and send money home.  This way the female members could stay put in a region where cost of living is lower, and rely on the income earned by the male members of the family. e.g. This is quite typical in countries like china, where the male members will go from rural villages to the big city to earn an income to support his family in the poor villages.
#4 The red regression line shows that the ratio trends around 1, representing the more common balance between female and male in most regions.   
#Question 8.1 Females 18 to 21 have been chosen as the primary customers for the hypothetical product.

#Question 8.2

F18_21 <- filter(A1, A1$gender == "F") #Select females only from the dataset

F18_21A <- filter(F18_21, age > 17) #Select females over 17

F18_21B <- filter(F18_21A, age < 22) #Select females between 18 and 21

F18_21C <- aggregate(F18_21B$population, by=list(F18_21B$region), FUN=sum)

F18_21D <- F18_21C[order(F18_21C$x, decreasing=TRUE), ]  # The two regions with the largest population of females between 18 and 21 are SSC22015 (1113) and SSC20492 (2566)

# Females between 18 to 21 are young, impulsive and very active participants in the social media.  By targeting the two regions with the highest number of young female in this age group, the multiplying effect will be the greatest when the new product is launched, and increasing the chance of achieving the greatest sales volume in the regions, as well as influencing the sales in other regions through the social media.
# Question 9.1 when n=1

# Selecting n = 1 from the region with most people

s <- 1 #Start from sample 1 

n1Store <- list() # Create empty list to store weighted means for each age

while (s < 101) {
  
n1 <- RL[sample(nrow(RL), 1), ]  #Select n=1

Age <- n1[c(1), 2] #Range of ages 0 to 55

Pop <- n1[c(1), 4]   # Population for SSC22015, region with most people

WMn1 <- weighted.mean(Age, Pop) # weighted means for SSC22015
WMn1                  

# n1Store contains the weighted means for SSC22015       

n1Store[[s]] <- WMn1  

s <- s + 1 #choose next sample

if (s == 101){
break
}
}
# Question 9.1 when n=10

# Selecting n = 10 from the region with most people

s <- 1 #Start from sample 1 

n10Store <- list() # Create empty list to store weighted means for each age

while (s < 101) {
  
n10 <- RL[sample(nrow(RL), 10), ]  #Select n=10

Age <- n10[c(1:10), 2] #Range of ages 0 to 55

Pop <- n10[c(1:10), 4]   # Population for SSC22015, region with most people

WMn10 <- weighted.mean(Age, Pop) # weighted means for SSC22015
WMn10                  

# n10Store contains the weighted means for SSC22015       

n10Store[[s]] <- WMn10  

s <- s + 1 #choose next sample

if (s == 101){
break
}
}
# Question 9.1 when n=100

# Selecting n = 100 from the region with most people

s <- 1 #Start from sample 1 

n100Store <- list() # Create empty list to store weighted means for each age

while (s < 101) {
  
n100 <- RL[sample(nrow(RL), 100), ]  #Select n=100

Age <- n100[c(1:100), 2] #Range of ages 0 to 55

Pop <- n100[c(1:100), 4]   # Population for SSC22015, region with most people

WMn100 <- weighted.mean(Age, Pop) # weighted means for SSC22015
WMn100                  

# n100Store contains the weighted means for SSC22015       

n100Store[[s]] <- WMn100 

s <- s + 1 #choose next sample

if (s == 101){
break
}
}
# Question 9.1 when n=1000

# Selecting n = 1000 from the region with most people

RL1000<- RL[rep(seq_len(nrow(RL)), 9), ] #Repeat the original dataset nine times to get more than 1000 rows.

s <- 1 #Start from sample 1 

n1000Store <- list() # Create empty list to store weighted means for each age

while (s < 101) {
  
n1000 <- RL1000[sample(nrow(RL1000), 1000), ]  #Select n=1000

Age <- n1000[c(1:1000), 2] #Range of ages 0 to 55

Pop <- n1000[c(1:1000), 4]   # Population for SSC22015, region with most people

WMn1000 <- weighted.mean(Age, Pop) # weighted means for SSC22015
WMn1000                  

# n100Store contains the weighted means for SSC22015       

n1000Store[[s]] <- WMn1000 

s <- s + 1 #choose next sample

if (s == 101){
break
}
}
n1Store <- unlist(n1Store)
n10Store <- unlist(n10Store)
n100Store <- unlist(n100Store)
n1000Store <- unlist(n1000Store)

n10Store # This contains the weighted mean for the largest regions SSC22015 for the 100 samples
##   [1] 19.969 23.665 27.942 15.497 28.882 27.727 28.208 25.390 26.035 24.693
##  [11] 30.294 13.376 20.021 24.298 23.714 31.887 22.664 39.681 24.177 28.669
##  [21] 31.147 35.004 27.862 20.570 28.257 31.050 32.942 26.303 25.315 26.490
##  [31] 23.051 19.684 28.352 21.790 20.599 24.532 33.870 31.387 32.802 22.112
##  [41] 31.630 25.959 24.560 29.605 21.485 26.365 26.011 26.624 23.708 24.802
##  [51] 29.727 24.053 25.440 23.942 30.122 32.286 23.693 21.204 22.324 24.469
##  [61] 28.811 28.394 24.672 14.967 31.445 24.485 18.149 23.903 25.753 25.625
##  [71] 22.884 28.033 25.768 24.748 34.513 27.430 29.783 30.153 28.529 27.464
##  [81] 28.881 31.217 28.333 27.442 17.651 36.671 26.827 26.175 22.304 20.357
##  [91] 28.283 22.355 28.916 24.177 23.312 27.447 19.744 28.054 29.405 21.543
n1Sum <- as.data.frame((as.data.frame(n1Store))) # This is a dataframe of the weighted mean in a column of 100 rows

n10Sum <- as.data.frame((as.data.frame(n10Store))) # This is a dataframe of the weighted mean in a column of 100 rows

n100Sum <- as.data.frame((as.data.frame(n100Store))) # This is a dataframe of the weighted mean in a column of 100 rows

n1000Sum <- as.data.frame((as.data.frame(n1000Store))) # This is a dataframe of the weighted mean in a column of 100 rows
#For n=1

# set sample size

#Compare the results from $n_{sum}$ = 1, 10 and 100.

n_sample <- 100

# set number of times the samples will be summed
n_sum <- 100

n1S <- n1Sum$n1Store

mean1 <- mean(n1S)

sd1 <- sd(n1S)

binwidth <- 0.005 * sqrt(n_sample) * 4 * (n_sum ^ 0.4)  # rule of thumb for aesthetics

title_txt <- sprintf('CLT test after summing %d samples of %d observations (n=1)', n_sum, n_sample)

p18 <- ggplot()
p18 <- p18 + geom_histogram(aes(x = n1S), binwidth = binwidth, colour = "white", 
                          fill = "cornflowerblue", size = 0.1)
p18 <- p18 + stat_function(fun = function(x) dnorm(x, mean = mean1, sd = sd1) * n_sample * binwidth, 
                         color = "darkred", size = 1)
p18 <- p18 + labs(x = 'x', y = 'f(x)', title = title_txt)

#ggsave(here("outputs", "clt_example1.png"), width = 6, height = 5)

p18

#For n=10

# set sample size

#Compare the results from $n_{sum}$ = 1, 10, 100 and 1000

n_sample <- 100

# set number of times the samples will be summed
n_sum <- 100

n10S <- n10Sum$n10Store  #(TESTING ONLY)

mean10 <- mean(n10S)

sd10 <- sd(n10S)

binwidth <- 0.005 * sqrt(n_sample) * 4 * (n_sum ^ 0.4)  # rule of thumb for aesthetics

title_txt <- sprintf('CLT test after summing %d samples of %d observations (n=10)', n_sum, n_sample)

p19 <- ggplot()
p19 <- p19 + geom_histogram(aes(x = n10S), binwidth = binwidth, colour = "white", 
                          fill = "cornflowerblue", size = 0.1)
p19 <- p19 + stat_function(fun = function(x) dnorm(x, mean = mean10, sd = sd10) * n_sample * binwidth, 
                         color = "darkred", size = 1)
p19 <- p19 + labs(x = 'x', y = 'f(x)', title = title_txt)

#ggsave(here("outputs", "clt_example1.png"), width = 6, height = 5)

p19

#For n=100

# set sample size

#Compare the results from $n_{sum}$ = 1, 10, 100 and 1000

n_sample <- 100

# set number of times the samples will be summed
n_sum <- 100

n100S <- n100Sum$n100Store

mean100 <- mean(n100S)

sd100 <- sd(n100S)

#binwidth <- 0.005 * sqrt(n_sample) * 4 * (n_sum ^ 0.4)  # rule of thumb for aesthetics

binwidth <- 0.005 * sqrt(n_sample) * 1 * (n_sum ^ 0.4)  # rule of thumb for aesthetics

title_txt <- sprintf('CLT test after summing %d samples of %d observations (n=100)', n_sum, n_sample)

p20 <- ggplot()
p20 <- p20 + geom_histogram(aes(x = n100S), binwidth = binwidth, colour = "white", 
                          fill = "cornflowerblue", size = 0.1)
p20 <- p20 + stat_function(fun = function(x) dnorm(x, mean = mean100, sd = sd100) * n_sample * binwidth, 
                         color = "darkred", size = 1)
p20 <- p20 + labs(x = 'x', y = 'f(x)', title = title_txt)

#ggsave(here("outputs", "clt_example1.png"), width = 6, height = 5)

p20

#For n=1000

# set sample size

#Compare the results from $n_{sum}$ = 1, 10, 100 and 1000

n_sample <- 100

# set number of times the samples will be summed
n_sum <- 100

n1000S <- n1000Sum$n1000Store

mean1000 <- mean(n1000S)

sd1000 <- sd(n1000S)

#binwidth <- 0.005 * sqrt(n_sample) * 4 * (n_sum ^ 0.4)  # rule of thumb for aesthetics

binwidth <- 0.005 * sqrt(n_sample) * 0.5 * (n_sum ^ 0.1)  # rule of thumb for aesthetics

title_txt <- sprintf('CLT test after summing %d samples of %d observations (n=1000)', n_sum, n_sample)

p21 <- ggplot()
p21 <- p21 + geom_histogram(aes(x = n1000S), binwidth = binwidth, colour = "white", 
                          fill = "cornflowerblue", size = 0.1)
p21 <- p21 + stat_function(fun = function(x) dnorm(x, mean = mean1000, sd = sd1000) * n_sample * binwidth, 
                         color = "darkred", size = 1)
p21 <- p21 + labs(x = 'x', y = 'f(x)', title = title_txt)

#ggsave(here("outputs", "clt_example1.png"), width = 6, height = 5)

p21