#Load Professional Data
setwd("C:/Users/plu5638/Desktop/Business Analytics/Module 4.")
eai<-read.csv("professional.csv")
dim(eai)# returns the dimension of the Professional dataset.
## [1] 410 8
head(eai)
## Age Gender RealEstatePurchases ValueOfInvestments_USD NumberOfTransactions
## 1 38 Female No 12200 4
## 2 30 Male No 12400 4
## 3 41 Female No 26800 5
## 4 28 Female Yes 19600 6
## 5 31 Female Yes 15100 5
## 6 32 Male No 39700 3
## BroadbandAccess HouseholdIncome_USD HaveChildren
## 1 Yes 75200 Yes
## 2 Yes 70300 Yes
## 3 Yes 48200 No
## 4 No 95300 No
## 5 No 73300 Yes
## 6 Yes 123400 Yes
hist(eai$Age, col="green")
#Calculate population age parameters
#Population age mean
mean(eai$Age)
## [1] 30.1122
#Population age standard deviation
sd(eai$Age)
## [1] 4.024023
#population proportion of population who has Real Estate Purchases
table(eai$HouseholdIncome_USD)
##
## 16200 24300 25100 27700 28200 29900 30000 30300 30700 31200 32400
## 1 1 1 1 1 1 1 1 1 1 1
## 32500 33400 33500 33700 33800 33900 34400 35000 35100 36500 36700
## 1 1 1 1 1 1 1 2 2 1 1
## 36800 36900 37400 38100 38500 38700 38800 39000 39100 39500 39600
## 1 1 1 1 1 1 1 1 1 1 1
## 39800 40200 40500 40700 40800 41100 41200 41700 41800 42100 42200
## 2 1 1 1 1 1 2 1 1 2 2
## 43100 43400 43500 43700 43800 43900 44500 44800 45100 45400 45500
## 2 1 1 2 1 1 1 1 1 1 1
## 45600 45700 45800 45900 46100 46400 46500 46700 46900 47100 47200
## 1 1 1 1 1 1 2 1 1 1 1
## 47300 47400 47700 47800 48100 48200 48400 48600 48900 49200 49300
## 1 1 1 1 2 3 1 1 1 1 1
## 49400 49600 49700 49800 50100 50200 50300 50500 50800 51100 51400
## 1 2 1 1 1 1 1 1 1 1 1
## 51600 51700 51800 51900 52000 52200 52700 52800 53200 53300 53400
## 2 2 1 1 1 1 2 1 1 2 1
## 53700 54100 54200 54300 54400 54700 54800 55000 55700 55800 56000
## 1 1 1 1 1 1 2 1 2 1 1
## 56100 56200 56600 57000 57100 57200 57300 57400 57600 57700 58000
## 2 1 1 1 1 1 2 2 1 1 1
## 58200 58300 58500 58600 58900 59000 59100 59300 59600 59800 59900
## 1 1 1 1 1 2 1 1 3 1 1
## 60100 60300 60400 60500 60600 60700 60900 61100 61200 61300 61600
## 1 4 1 1 1 1 1 2 2 1 1
## 61700 61800 61900 62100 62300 62400 62500 62600 62800 63300 63700
## 3 1 1 2 1 1 1 1 1 2 1
## 63900 64000 64300 64400 64600 64800 64900 65300 65400 65500 65700
## 1 1 1 2 1 1 2 1 1 3 2
## 65900 66200 66500 67300 67400 67600 67700 67900 68200 68300 68400
## 1 2 1 2 1 1 1 1 1 3 2
## 68800 68900 69700 69800 69900 70000 70100 70200 70300 70400 70700
## 1 1 1 2 1 1 1 2 2 1 1
## 70800 70900 71100 71300 71600 72300 72400 72500 72600 72700 73300
## 2 2 1 3 1 1 1 1 1 1 1
## 73400 73500 73600 73900 74000 74200 74500 74900 75200 75700 76000
## 3 1 2 3 1 1 1 1 1 1 1
## 76400 76500 76600 77100 77400 77500 77900 78700 78900 79300 80500
## 2 1 1 1 2 1 1 2 1 1 1
## 81100 81500 81800 82000 82100 82300 82700 83500 83600 84000 84100
## 1 1 1 1 1 1 2 2 2 1 1
## 85600 85700 85900 86200 86800 87200 87400 87900 88000 88400 88900
## 1 1 1 2 1 1 1 2 1 3 1
## 89500 89600 90100 90200 90700 90900 91200 91900 92600 93100 93200
## 1 1 1 1 1 1 1 1 2 1 2
## 93600 93900 94100 94400 94700 94800 95000 95200 95300 95700 95900
## 1 1 2 1 1 1 1 1 1 1 1
## 96000 96900 97900 98800 100100 100300 100600 100700 101500 101600 102400
## 1 1 2 2 1 1 1 2 1 1 1
## 102700 103000 103500 104300 104700 105500 107900 108000 108200 108700 109400
## 1 1 2 1 1 1 1 1 1 1 1
## 110600 110800 110900 112100 112700 114000 114900 117800 118600 118800 119100
## 1 2 1 1 2 1 1 1 1 1 1
## 120300 120600 120800 123000 123100 123400 124000 124700 125000 127500 128800
## 1 1 1 1 1 1 1 1 1 1 1
## 130000 137800 137900 140100 140300 147400 148000 148600 149300 153000 153300
## 1 1 1 1 1 1 1 1 2 1 1
## 157200 159200 161100 161400 166500 167800 168500 174900 177100 179700 200500
## 1 1 1 1 1 1 1 1 1 1 1
## 201700 202400 205900 322500
## 1 1 1 1
p_pop<-1500/2500
p_pop
## [1] 0.6
#select 30 random samples
set.seed(123)
eai_sample1<-eai[sample(1:nrow(eai),30),] #selects 30 random samples and assigns it to eai_sample1 data frame
dim(eai_sample1) # returns the dimension of the eai_sample1 dataset. It has 30 rows and 3 columns.
## [1] 30 8
print(eai_sample1)
## Age Gender RealEstatePurchases ValueOfInvestments_USD NumberOfTransactions
## 179 29 Female Yes 18800 5
## 14 30 Male No 47900 5
## 195 30 Male Yes 67200 7
## 306 21 Male Yes 35300 6
## 118 29 Male No 24600 4
## 299 28 Male Yes 42600 4
## 229 31 Female Yes 13900 5
## 244 25 Male Yes 24500 6
## 409 25 Female Yes 30300 6
## 374 38 Female No 16200 1
## 153 31 Male No 11100 4
## 90 33 Female Yes 27200 3
## 91 36 Male Yes 43000 2
## 256 32 Male No 23700 1
## 197 32 Male No 33100 12
## 398 29 Male No 13800 5
## 348 30 Female No 83300 13
## 137 33 Male No 16100 5
## 355 27 Female No 16600 1
## 328 32 Female Yes 27700 3
## 26 30 Male No 39800 9
## 7 32 Male Yes 21900 2
## 393 39 Male No 25100 14
## 254 26 Female No 23600 2
## 211 34 Female Yes 10000 4
## 78 25 Male No 18800 5
## 81 35 Male Yes 54700 3
## 43 29 Male No 19900 6
## 359 29 Male Yes 33800 15
## 373 34 Male Yes 20000 7
## BroadbandAccess HouseholdIncome_USD HaveChildren
## 179 Yes 53400 Yes
## 14 Yes 68200 Yes
## 195 No 65300 Yes
## 306 Yes 81500 No
## 118 Yes 82100 No
## 299 No 68400 No
## 229 Yes 98800 Yes
## 244 Yes 201700 Yes
## 409 No 35100 No
## 374 Yes 65500 Yes
## 153 Yes 78700 No
## 90 Yes 39800 No
## 91 No 83500 Yes
## 256 Yes 55000 No
## 197 No 64900 No
## 398 No 96000 No
## 348 Yes 59300 Yes
## 137 No 55800 Yes
## 355 No 69800 Yes
## 328 Yes 61800 No
## 26 Yes 108200 Yes
## 7 Yes 73900 Yes
## 393 No 87900 Yes
## 254 No 67700 Yes
## 211 No 25100 Yes
## 78 Yes 73900 Yes
## 81 Yes 101600 No
## 43 No 75700 No
## 359 Yes 79300 No
## 373 Yes 36500 No
hist(eai_sample1$Age, col="orange")
#Calculate sample parameters
#Sample mean also known as (aka) point estimate of mean
mean(eai_sample1$Age)
## [1] 30.46667
#Sample standard deviation (aka) point estimate of standard deviation
sd(eai_sample1$Age)
## [1] 3.971739
#sample proportion of population who has purchased Real Estate
table(eai_sample1$HouseholdIncome_USD)
##
## 25100 35100 36500 39800 53400 55000 55800 59300 61800 64900 65300
## 1 1 1 1 1 1 1 1 1 1 1
## 65500 67700 68200 68400 69800 73900 75700 78700 79300 81500 82100
## 1 1 1 1 1 2 1 1 1 1 1
## 83500 87900 96000 98800 101600 108200 201700
## 1 1 1 1 1 1 1
p_samp<-19/30
p_samp
## [1] 0.6333333
#Method 1: Using the formula 6.7 on page 245 of Camm text book.
s<-sd(eai_sample1$Age)
n<-30 #sample size
x_bar<-mean (eai_sample1$Age)
x_bar
## [1] 30.46667
t_val<-qt((1-(0.05/2)), n-1) #This is the t value providing an area of alpha/2 in the upper tail of the t distribution with 30-1=29 degrees of freedom. You can also use the t-test table to find the value.
t_val
## [1] 2.04523
margin_of_error<-t_val*(s/sqrt(n))
margin_of_error
## [1] 1.483072
LowerLimit<-x_bar-margin_of_error
UpperLimit<-x_bar+margin_of_error
LowerLimit
## [1] 28.98359
UpperLimit
## [1] 31.94974
#Method 2: Using t.test function of R
t.test(eai_sample1$Age, conf.level = 0.95) #look for 95 percent confidence interval values on the output
##
## One Sample t-test
##
## data: eai_sample1$Age
## t = 42.015, df = 29, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 28.98359 31.94974
## sample estimates:
## mean of x
## 30.46667
Reporting: The 95% confidence interval is 28.98359, or 31.94974.
Interpretation: We can conclude with 95% confidence that the mean age of population is between 28.98359 and 31.94974. If repeated samples were taken and the 95% confidence interval computed for each sample, 95% of the intervals would contain the population mean.
Here we can check to see if our 95% confidence interval estimates using the sample of 30 people contains population mean or not. Recall the population mean for the sample age we calculated is: 30.46667. Our 95% confidence interval is 28.98359 and 31.94974.
s_hi<-sd(eai_sample1$HouseholdIncome_USD)
n_hi<-30 #sample size
x_bar_hi<-mean (eai_sample1$HouseholdIncome_USD)
x_bar
## [1] 30.46667
t_val<-qt((1-(0.05/2)), n-1) #This is the t value providing an area of alpha/2 in the upper tail of the t distribution with 30-1=29 degrees of freedom. You can also use the t-test table to find the value.
t_val
## [1] 2.04523
margin_of_error<-t_val*(s/sqrt(n))
margin_of_error
## [1] 1.483072
LowerLimit<-x_bar-margin_of_error
UpperLimit<-x_bar+margin_of_error
LowerLimit
## [1] 28.98359
UpperLimit
## [1] 31.94974
#Method 2: Using t.test function of R
t.test(eai_sample1$HouseholdIncome_USD, conf.level = 0.95) #look for 95 percent confidence interval values on the output
##
## One Sample t-test
##
## data: eai_sample1$HouseholdIncome_USD
## t = 12.986, df = 29, p-value = 1.3e-13
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 62187.80 85438.87
## sample estimates:
## mean of x
## 73813.33
Reporting: The 95% confidence interval is 62187.80 or 85438.87
Interpretation: We can conclude with 95% confidence that the mean household income of the population is between 62187.80 and 85438.87. If repeated samples were taken and the 95% confidence interval computed for each sample, 95% of the intervals would contain the household income mean.
Here we can check to see if our 95% confidence interval estimates using the sample of 30 people contains household income mean or not. Recall the household income mean for the sample household we calculated is: 73813.33. Our 95% confidence interval is 62187.80 and 85438.87
** Errors: Develop 95% confidence intervals for the proportion of subscribers who have broadband access at home and the proportion of subscribers who have children. (Hint: refer section E of the Sampling_ConfidenceInterval handout). You will calculate two 95% confidence intervals, one for proportion of subscribers who have broadband access at home and another for the proportion of subscribers who have children.
**Analysis: d. Would Young Professional be a good advertising outlet for online brokers? Justify your conclusion with statistical data.
Yes, the median age is between 28.98359 and 31.94974 with the mean of 30.46667
Analysis: e. Would this magazine be a good place to advertise for companies selling educational software and computer games for young children? Unable to determine without data.
set.seed(155)
eai_subset<-eai[sample(1:nrow(eai),385),]
dim(eai_subset)
## [1] 385 8
print(eai_subset)
## Age Gender RealEstatePurchases ValueOfInvestments_USD NumberOfTransactions
## 270 32 Female Yes 21400 8
## 170 38 Female Yes 36300 9
## 57 30 Male No 23100 7
## 106 22 Female No 30400 11
## 49 29 Female No 41900 3
## 52 37 Female No 10000 4
## 244 25 Male Yes 24500 6
## 322 29 Female No 34500 9
## 354 29 Male No 37200 5
## 178 31 Male Yes 0 4
## 256 32 Male No 23700 1
## 300 34 Female Yes 18100 4
## 104 25 Female No 52800 5
## 331 28 Female Yes 37900 7
## 85 30 Female Yes 24000 3
## 243 28 Female Yes 28000 4
## 242 31 Male Yes 44200 1
## 165 27 Female Yes 63500 6
## 267 27 Female No 29200 3
## 231 28 Female No 70400 8
## 53 28 Female No 27200 5
## 102 33 Male No 39500 3
## 79 35 Male No 24400 3
## 239 33 Female No 47200 6
## 123 31 Female No 60900 6
## 134 27 Male No 0 2
## 94 20 Male No 13900 14
## 378 27 Male No 21300 1
## 302 31 Male Yes 30500 9
## 192 35 Female No 25600 4
## 247 32 Male No 32100 6
## 70 28 Male Yes 11700 8
## 402 32 Female Yes 64100 4
## 234 26 Female No 19900 5
## 215 28 Female Yes 26400 6
## 276 34 Male Yes 40200 13
## 51 33 Female No 14000 6
## 355 27 Female No 16600 1
## 160 22 Female No 23300 9
## 81 35 Male Yes 54700 3
## 296 36 Male No 17500 6
## 32 33 Female Yes 36100 7
## 403 30 Male Yes 19300 6
## 301 31 Male No 11200 7
## 23 41 Male Yes 0 10
## 382 26 Female Yes 20700 4
## 330 29 Male No 17500 3
## 63 36 Female Yes 37300 11
## 390 30 Male No 28100 5
## 91 36 Male Yes 43000 2
## 348 30 Female No 83300 13
## 269 31 Female No 16800 5
## 27 29 Female No 17900 2
## 149 34 Female No 32400 9
## 344 30 Male No 25500 6
## 116 29 Male No 30800 3
## 187 35 Male No 15100 6
## 250 35 Male Yes 36000 8
## 144 31 Female No 25900 5
## 260 33 Female No 24600 2
## 227 25 Male Yes 21500 5
## 26 30 Male No 39800 9
## 288 33 Female Yes 38200 5
## 64 24 Male Yes 21100 9
## 333 32 Male Yes 22100 9
## 377 33 Female Yes 18500 2
## 10 34 Female Yes 18400 11
## 131 31 Male Yes 20800 10
## 310 32 Male No 30800 7
## 73 32 Male No 17700 8
## 229 31 Female Yes 13900 5
## 190 30 Female Yes 28700 10
## 96 31 Female Yes 27100 3
## 341 28 Male No 34200 4
## 168 32 Male Yes 15000 5
## 35 28 Male Yes 8600 6
## 357 25 Male Yes 23400 4
## 182 22 Female No 11900 4
## 22 28 Male No 32600 7
## 369 35 Male Yes 65200 2
## 362 33 Male No 18100 18
## 21 30 Female No 34100 6
## 343 26 Female Yes 24500 7
## 195 30 Male Yes 67200 7
## 232 26 Female No 12400 4
## 90 33 Female Yes 27200 3
## 17 33 Male Yes 31000 12
## 138 34 Male No 15800 7
## 335 27 Male Yes 19500 6
## 199 31 Female Yes 18400 5
## 408 24 Male Yes 36000 4
## 309 29 Male Yes 24700 2
## 120 35 Female Yes 26300 16
## 113 23 Female Yes 49800 3
## 167 27 Female No 49700 4
## 172 19 Male No 13500 10
## 391 24 Male No 23600 6
## 89 32 Male No 29800 7
## 389 30 Male No 23900 4
## 201 23 Male No 46300 3
## 207 32 Female Yes 41800 6
## 213 33 Female Yes 32400 7
## 92 32 Female Yes 15500 7
## 65 25 Male No 40900 3
## 13 28 Female No 17300 7
## 286 32 Female No 20600 4
## 15 30 Female No 28200 3
## 385 38 Male Yes 21900 6
## 128 38 Female No 37900 12
## 209 28 Male Yes 15500 8
## 206 25 Male No 17800 2
## 273 30 Male No 9100 5
## 287 34 Female No 63400 17
## 84 34 Female No 16000 9
## 406 32 Female No 27400 7
## 163 37 Male Yes 56300 5
## 72 34 Male No 11900 6
## 340 30 Female Yes 25800 8
## 212 34 Female No 53300 6
## 379 30 Male No 19400 3
## 69 26 Male No 23900 2
## 20 23 Female No 21300 7
## 253 32 Female Yes 33300 13
## 410 28 Female Yes 30500 11
## 151 30 Male No 39300 7
## 363 29 Male Yes 34300 2
## 298 24 Male No 29200 3
## 132 34 Male No 31100 7
## 395 31 Female Yes 36200 6
## 375 34 Male No 27200 9
## 143 25 Female No 27100 2
## 98 24 Female Yes 29500 4
## 4 28 Female Yes 19600 6
## 174 25 Female No 26800 7
## 180 31 Female No 40900 5
## 404 26 Female Yes 27400 4
## 245 29 Male No 18100 9
## 353 36 Male No 25000 2
## 248 32 Male Yes 44500 6
## 274 28 Male No 19200 8
## 101 29 Male Yes 16400 5
## 342 23 Female No 25000 7
## 137 33 Male No 16100 5
## 386 31 Female Yes 61200 7
## 358 32 Male Yes 21500 3
## 68 26 Male Yes 20000 6
## 202 34 Male Yes 32700 5
## 350 24 Male No 29600 2
## 400 27 Male No 30300 4
## 317 29 Male Yes 34500 8
## 268 35 Male Yes 16600 2
## 77 28 Female No 66900 5
## 71 26 Female No 19200 5
## 126 31 Male Yes 0 5
## 392 34 Male No 70800 10
## 210 33 Male No 22900 4
## 76 23 Male Yes 25300 4
## 321 29 Male No 59300 11
## 336 29 Male Yes 23300 4
## 30 30 Male No 54000 7
## 409 25 Female Yes 30300 6
## 329 27 Male No 14600 6
## 5 31 Female Yes 15100 5
## 278 33 Male Yes 27200 3
## 237 37 Female No 14000 9
## 332 33 Male Yes 13800 9
## 305 29 Male No 38400 2
## 352 34 Female Yes 12100 6
## 258 27 Male Yes 19900 10
## 361 27 Male Yes 13100 11
## 339 24 Male No 15300 6
## 95 30 Male No 14200 6
## 162 34 Male Yes 33200 2
## 319 28 Male No 21900 7
## 142 32 Male Yes 18500 7
## 173 33 Male Yes 37500 4
## 193 34 Female Yes 30700 0
## 140 37 Female No 13000 7
## 9 26 Male Yes 16100 4
## 325 29 Female No 18100 7
## 271 34 Male No 12500 7
## 158 19 Male Yes 54600 4
## 216 23 Male Yes 30400 5
## 279 31 Female Yes 20100 4
## 303 30 Male No 39300 9
## 225 30 Female No 29800 3
## 150 35 Female Yes 10300 4
## 141 27 Male Yes 68100 5
## 314 36 Male Yes 15000 2
## 356 30 Male No 58000 2
## 24 29 Male Yes 20800 10
## 233 26 Male Yes 27800 9
## 156 31 Female No 16700 6
## 157 22 Female Yes 19400 6
## 175 27 Male No 30300 5
## 284 30 Female No 10200 11
## 179 29 Female Yes 18800 5
## 401 31 Female Yes 37100 6
## 277 31 Male Yes 55800 5
## 221 30 Male No 34700 12
## 308 27 Male Yes 24300 6
## 282 29 Male No 39500 5
## 236 32 Female Yes 75000 6
## 289 24 Male No 50400 7
## 312 29 Female No 9100 2
## 280 29 Male No 17300 3
## 109 32 Male Yes 69500 7
## 118 29 Male No 24600 4
## 306 21 Male Yes 35300 6
## 105 38 Male No 36500 4
## 147 28 Female No 67900 5
## 281 25 Male Yes 42100 7
## 115 38 Male No 15200 6
## 87 42 Male No 14100 5
## 399 31 Male Yes 47000 10
## 318 22 Male Yes 26300 7
## 316 25 Female Yes 31300 7
## 191 28 Male Yes 50900 6
## 80 31 Male No 45500 9
## 110 33 Female No 33000 5
## 366 23 Female Yes 16900 4
## 12 35 Female Yes 15500 6
## 337 30 Male No 29000 5
## 161 26 Male No 29700 8
## 257 25 Female Yes 35700 4
## 112 24 Male Yes 15200 8
## 255 23 Female No 49400 7
## 78 25 Male No 18800 5
## 125 33 Female No 31700 6
## 1 38 Female No 12200 4
## 304 38 Female No 23100 7
## 34 28 Male No 21400 5
## 119 36 Male Yes 24700 9
## 146 31 Female No 59200 4
## 196 37 Male No 62200 11
## 46 32 Female No 14200 5
## 93 35 Male Yes 8700 6
## 347 33 Female No 21100 9
## 219 29 Male No 11000 5
## 139 31 Male No 27500 4
## 223 22 Male No 20700 5
## 198 21 Male No 26000 4
## 45 29 Female Yes 32400 6
## 145 38 Male No 18400 2
## 122 23 Male No 20300 6
## 111 32 Female Yes 32100 3
## 261 33 Male Yes 18000 1
## 252 33 Male Yes 19300 6
## 396 32 Male Yes 20600 12
## 200 31 Male Yes 53000 12
## 383 29 Male No 17100 4
## 311 32 Male Yes 17400 3
## 184 28 Female No 20400 8
## 360 27 Male No 18400 7
## 197 32 Male No 33100 12
## 50 31 Male No 12000 3
## 186 33 Female No 33900 3
## 226 34 Female Yes 95200 21
## 155 31 Male No 22100 6
## 230 29 Female No 28900 7
## 214 26 Male No 85600 2
## 293 31 Male No 39500 5
## 388 31 Male Yes 23600 5
## 152 32 Female No 13800 6
## 124 28 Male No 15800 11
## 97 28 Male Yes 22300 3
## 235 29 Female No 28000 7
## 373 34 Male Yes 20000 7
## 294 29 Male No 23400 5
## 371 29 Female No 24800 12
## 338 34 Female No 53200 5
## 3 41 Female No 26800 5
## 48 23 Male Yes 32000 5
## 365 32 Male Yes 19200 6
## 217 31 Male Yes 24000 8
## 41 27 Male No 48700 6
## 88 37 Female Yes 17700 11
## 54 27 Female Yes 34500 4
## 56 31 Female No 53400 3
## 8 26 Female Yes 41900 2
## 240 36 Female No 57200 5
## 324 30 Male No 7000 3
## 67 31 Male No 33600 8
## 83 29 Male No 16200 3
## 103 24 Male Yes 20900 6
## 153 31 Male No 11100 4
## 59 33 Male No 10900 10
## 117 28 Male No 21200 3
## 11 33 Female No 33800 3
## 55 33 Female No 26000 6
## 61 24 Female No 24600 16
## 384 36 Male No 21000 5
## 183 29 Male Yes 28100 10
## 159 31 Female Yes 41500 2
## 307 32 Female No 15800 3
## 2 30 Male No 12400 4
## 320 29 Female Yes 27100 7
## 387 31 Female No 10200 8
## 263 33 Female No 27600 4
## 398 29 Male No 13800 5
## 42 30 Female Yes 17100 4
## 6 32 Male No 39700 3
## 292 26 Male No 18100 4
## 359 29 Male Yes 33800 15
## 346 24 Female No 30600 11
## 349 25 Female Yes 50700 5
## 313 35 Male Yes 23300 2
## 100 23 Female Yes 26400 9
## 218 28 Female No 17500 0
## 133 29 Male No 31200 5
## 185 26 Male No 14900 7
## 169 24 Female Yes 8200 7
## 62 35 Female No 28800 6
## 262 29 Male No 0 6
## 241 30 Male Yes 42900 7
## 37 31 Male No 24800 4
## 75 29 Female Yes 27500 4
## 204 28 Female No 19200 2
## 25 33 Female Yes 23100 6
## 135 26 Male Yes 38900 7
## 265 42 Male No 30300 4
## 164 36 Female No 25900 5
## 246 31 Female No 20900 7
## 136 33 Male Yes 29500 4
## 220 30 Female No 9400 5
## 107 32 Female No 39800 7
## 188 27 Male No 133400 5
## 290 30 Female No 9500 5
## 28 33 Male No 33300 2
## 129 28 Male Yes 22400 7
## 328 32 Female Yes 27700 3
## 275 32 Female Yes 15900 5
## 407 26 Female Yes 14400 3
## 82 32 Female No 32200 4
## 58 23 Male No 24900 5
## 154 29 Female No 28600 2
## 208 30 Male Yes 7100 7
## 222 31 Female Yes 44200 3
## 393 39 Male No 25100 14
## 171 33 Female No 50200 7
## 203 31 Male No 50900 4
## 345 26 Male No 19400 14
## 177 29 Male Yes 14400 9
## 33 28 Female Yes 44300 5
## 394 27 Male No 22300 7
## 211 34 Female Yes 10000 4
## 40 28 Male No 27000 4
## 351 35 Male Yes 29700 6
## 181 32 Male Yes 23300 7
## 66 26 Female No 24700 7
## 108 28 Female Yes 18100 9
## 38 33 Female No 26600 5
## 264 33 Female No 48800 4
## 397 37 Male Yes 38300 4
## 86 29 Male No 28800 3
## 283 25 Female No 21400 2
## 295 31 Male No 34100 6
## 299 28 Male Yes 42600 4
## 327 38 Male Yes 11800 4
## 39 32 Male No 33100 8
## 36 35 Male No 23200 3
## 249 38 Male Yes 20100 4
## 166 32 Male Yes 18700 7
## 367 26 Male No 22100 5
## 272 24 Female Yes 49300 13
## 44 28 Male No 13200 9
## 254 26 Female No 23600 2
## 266 28 Female Yes 57700 6
## 16 30 Male Yes 19400 6
## 60 31 Female No 52500 2
## 380 32 Female Yes 17000 5
## 326 30 Female No 15500 5
## 189 33 Female Yes 18300 7
## 43 29 Male No 19900 6
## 114 23 Male No 28100 6
## 238 32 Male No 20900 4
## 205 30 Male Yes 18300 3
## 370 31 Female Yes 11200 5
## 334 35 Female Yes 24200 6
## 18 28 Male No 21300 6
## 368 34 Female Yes 27000 3
## 364 30 Male Yes 23500 8
## 259 22 Male No 30600 9
## 29 30 Female Yes 21800 8
## 374 38 Female No 16200 1
## BroadbandAccess HouseholdIncome_USD HaveChildren
## 270 Yes 62100 Yes
## 170 No 60900 No
## 57 Yes 104700 Yes
## 106 Yes 202400 Yes
## 49 No 50200 No
## 52 Yes 51900 No
## 244 Yes 201700 Yes
## 322 Yes 61300 Yes
## 354 Yes 38500 No
## 178 Yes 60300 No
## 256 Yes 55000 No
## 300 No 68800 No
## 104 Yes 46500 No
## 331 Yes 153300 No
## 85 No 46100 Yes
## 243 Yes 88400 Yes
## 242 Yes 56000 Yes
## 165 Yes 44800 Yes
## 267 Yes 46900 Yes
## 231 No 30000 Yes
## 53 Yes 119100 Yes
## 102 Yes 52800 No
## 79 Yes 83600 No
## 239 Yes 51700 Yes
## 123 Yes 67900 Yes
## 134 No 40200 No
## 94 Yes 57700 No
## 378 Yes 39800 Yes
## 302 Yes 200500 No
## 192 Yes 72700 Yes
## 247 No 57300 Yes
## 70 No 73600 Yes
## 402 Yes 62300 Yes
## 234 Yes 124000 Yes
## 215 No 65500 No
## 276 Yes 73400 No
## 51 No 44500 Yes
## 355 No 69800 Yes
## 160 No 42100 Yes
## 81 Yes 101600 No
## 296 No 62500 No
## 32 Yes 90700 Yes
## 403 No 52700 No
## 301 Yes 108000 No
## 23 Yes 35100 No
## 382 No 71300 Yes
## 330 Yes 54700 Yes
## 63 Yes 103000 Yes
## 390 Yes 45800 No
## 91 No 83500 Yes
## 348 Yes 59300 Yes
## 269 No 91200 No
## 27 No 61100 No
## 149 Yes 35000 No
## 344 No 62400 No
## 116 Yes 85900 Yes
## 187 No 69800 No
## 250 No 93900 No
## 144 Yes 69900 Yes
## 260 No 51600 No
## 227 Yes 120600 Yes
## 26 Yes 108200 Yes
## 288 Yes 61600 No
## 64 Yes 97900 No
## 333 Yes 39500 Yes
## 377 Yes 70700 Yes
## 10 Yes 60100 No
## 131 No 65700 No
## 310 No 70200 No
## 73 Yes 70000 Yes
## 229 Yes 98800 Yes
## 190 Yes 76500 No
## 96 No 43100 No
## 341 Yes 49400 No
## 168 Yes 114900 Yes
## 35 Yes 33800 Yes
## 357 Yes 78900 No
## 182 Yes 27700 Yes
## 22 Yes 68300 No
## 369 No 32400 Yes
## 362 Yes 110900 No
## 21 Yes 92600 No
## 343 No 81800 No
## 195 No 65300 Yes
## 232 Yes 93200 Yes
## 90 Yes 39800 No
## 17 No 82300 No
## 138 Yes 64900 Yes
## 335 Yes 93200 Yes
## 199 No 87900 Yes
## 408 Yes 103500 No
## 309 Yes 67600 No
## 120 No 86200 No
## 113 Yes 65900 No
## 167 Yes 112700 No
## 172 Yes 47200 Yes
## 391 No 51600 Yes
## 89 Yes 125000 No
## 389 No 72300 Yes
## 201 Yes 40700 Yes
## 207 Yes 73500 No
## 213 Yes 70200 Yes
## 92 Yes 38700 Yes
## 65 No 123100 No
## 13 Yes 73600 Yes
## 286 Yes 58300 Yes
## 15 No 61900 Yes
## 385 Yes 130000 Yes
## 128 No 85600 Yes
## 209 Yes 73900 No
## 206 Yes 65500 No
## 273 Yes 47100 Yes
## 287 Yes 47800 Yes
## 84 No 95700 No
## 406 Yes 83500 No
## 163 No 55700 No
## 72 No 74200 No
## 340 No 24300 Yes
## 212 Yes 137900 Yes
## 379 Yes 60600 Yes
## 69 No 33700 Yes
## 20 Yes 31200 Yes
## 253 Yes 77900 No
## 410 Yes 36800 Yes
## 151 Yes 70800 No
## 363 No 109400 Yes
## 298 Yes 102700 No
## 132 Yes 80500 Yes
## 395 Yes 56100 Yes
## 375 Yes 110800 Yes
## 143 Yes 45100 Yes
## 98 Yes 39600 Yes
## 4 No 95300 No
## 174 No 52000 No
## 180 No 46700 Yes
## 404 Yes 59000 No
## 245 Yes 71600 Yes
## 353 No 54200 No
## 248 No 91900 Yes
## 274 No 159200 Yes
## 101 Yes 48100 Yes
## 342 Yes 41200 No
## 137 No 55800 Yes
## 386 Yes 60300 Yes
## 358 No 87200 No
## 68 No 66500 Yes
## 202 No 36700 Yes
## 350 No 88900 Yes
## 400 No 76400 No
## 317 No 33400 No
## 268 Yes 57400 No
## 77 No 45600 No
## 71 Yes 71300 Yes
## 126 Yes 77500 No
## 392 Yes 48200 No
## 210 Yes 120800 Yes
## 76 No 53300 No
## 321 Yes 118800 Yes
## 336 Yes 161400 No
## 30 Yes 61200 No
## 409 No 35100 No
## 329 No 49600 Yes
## 5 No 73300 Yes
## 278 Yes 81100 Yes
## 237 Yes 88400 Yes
## 332 Yes 94400 Yes
## 305 Yes 72600 No
## 352 No 58600 No
## 258 No 148000 Yes
## 361 Yes 70900 No
## 339 Yes 94100 Yes
## 95 Yes 16200 No
## 162 Yes 63300 No
## 319 Yes 76400 Yes
## 142 Yes 46500 Yes
## 173 Yes 70900 Yes
## 193 Yes 140100 No
## 140 No 39100 Yes
## 9 Yes 93100 No
## 325 No 82000 No
## 271 Yes 38800 Yes
## 158 Yes 43400 Yes
## 216 Yes 86200 No
## 279 No 68400 Yes
## 303 No 123000 No
## 225 Yes 41200 Yes
## 150 Yes 114000 No
## 141 No 58200 No
## 314 Yes 30300 Yes
## 356 Yes 66200 No
## 24 Yes 85700 No
## 233 Yes 84100 No
## 156 Yes 67300 No
## 157 Yes 49800 No
## 175 Yes 166500 Yes
## 284 Yes 57200 Yes
## 179 Yes 53400 Yes
## 401 Yes 77100 Yes
## 277 Yes 58900 No
## 221 No 53700 No
## 308 Yes 101500 No
## 282 Yes 59600 No
## 236 Yes 45500 No
## 289 Yes 64000 Yes
## 312 Yes 70800 No
## 280 No 55700 Yes
## 109 No 43900 Yes
## 118 Yes 82100 No
## 306 Yes 81500 No
## 105 Yes 60400 No
## 147 No 62600 Yes
## 281 No 45400 No
## 115 Yes 77400 Yes
## 87 Yes 65400 No
## 399 Yes 105500 No
## 318 Yes 51700 No
## 316 No 103500 No
## 191 Yes 43700 No
## 80 Yes 124700 No
## 110 Yes 52200 Yes
## 366 Yes 149300 Yes
## 12 No 43500 No
## 337 No 56100 No
## 161 Yes 42200 Yes
## 257 No 100700 Yes
## 112 No 77400 Yes
## 255 Yes 63300 Yes
## 78 Yes 73900 Yes
## 125 Yes 83600 No
## 1 Yes 75200 Yes
## 304 Yes 64300 Yes
## 34 No 50500 Yes
## 119 Yes 64400 Yes
## 146 Yes 40500 No
## 196 Yes 161100 Yes
## 46 Yes 41700 Yes
## 93 No 102400 No
## 347 No 94100 Yes
## 219 Yes 82700 Yes
## 139 Yes 41100 Yes
## 223 No 94800 Yes
## 198 No 59800 Yes
## 45 No 42100 Yes
## 145 No 63700 No
## 122 Yes 68300 No
## 111 No 64400 Yes
## 261 Yes 48400 No
## 252 No 128800 Yes
## 396 Yes 93600 Yes
## 200 Yes 174900 Yes
## 383 Yes 110800 Yes
## 311 No 64800 No
## 184 No 56600 No
## 360 No 51800 No
## 197 No 64900 No
## 50 No 61700 No
## 186 Yes 74500 Yes
## 226 No 70400 Yes
## 155 Yes 157200 Yes
## 230 Yes 53300 Yes
## 214 Yes 90900 No
## 293 Yes 51100 No
## 388 No 100700 No
## 152 Yes 60300 Yes
## 124 Yes 57300 Yes
## 97 No 43700 No
## 235 Yes 73400 Yes
## 373 Yes 36500 No
## 294 Yes 90100 No
## 371 No 104300 Yes
## 338 No 95000 Yes
## 3 Yes 48200 No
## 48 Yes 65700 No
## 365 No 49700 Yes
## 217 Yes 45900 No
## 41 No 110600 No
## 88 Yes 149300 Yes
## 54 Yes 49200 No
## 56 Yes 35000 No
## 8 Yes 54300 Yes
## 240 No 78700 Yes
## 324 Yes 67400 Yes
## 67 Yes 54800 Yes
## 83 Yes 69700 No
## 103 Yes 54800 No
## 153 Yes 78700 No
## 59 Yes 74000 No
## 117 No 148600 No
## 11 No 48600 No
## 55 Yes 39000 Yes
## 61 Yes 51400 Yes
## 384 Yes 120300 Yes
## 183 No 48900 Yes
## 159 Yes 71100 Yes
## 307 No 57000 Yes
## 2 Yes 70300 Yes
## 320 Yes 47400 Yes
## 387 No 70300 No
## 263 Yes 153000 No
## 398 No 96000 No
## 42 No 60300 Yes
## 6 Yes 123400 Yes
## 292 Yes 100600 No
## 359 Yes 79300 No
## 346 Yes 100100 Yes
## 349 Yes 89500 Yes
## 313 Yes 50300 Yes
## 100 Yes 33500 No
## 218 No 76000 No
## 133 Yes 88400 Yes
## 185 No 32500 No
## 169 No 112700 Yes
## 62 No 62100 Yes
## 262 Yes 58500 Yes
## 241 Yes 30700 Yes
## 37 No 92600 Yes
## 75 Yes 72500 Yes
## 204 Yes 168500 No
## 25 Yes 140300 Yes
## 135 Yes 84000 Yes
## 265 No 46400 Yes
## 164 Yes 107900 No
## 246 No 59000 Yes
## 136 Yes 34400 Yes
## 220 No 57400 Yes
## 107 No 71300 No
## 188 Yes 48100 Yes
## 290 No 87400 Yes
## 28 No 33900 Yes
## 129 Yes 59900 Yes
## 328 Yes 61800 No
## 275 Yes 52700 Yes
## 407 Yes 28200 Yes
## 82 No 205900 Yes
## 58 No 49300 No
## 154 No 179700 No
## 208 Yes 59100 Yes
## 222 No 29900 Yes
## 393 No 87900 Yes
## 171 Yes 137800 No
## 203 Yes 167800 No
## 345 No 117800 Yes
## 177 No 41800 No
## 33 Yes 95200 No
## 394 No 89600 No
## 211 No 25100 Yes
## 40 No 60500 Yes
## 351 Yes 53200 No
## 181 Yes 59600 Yes
## 66 Yes 322500 No
## 108 No 62800 No
## 38 Yes 66200 No
## 264 No 86800 Yes
## 397 No 100300 Yes
## 86 Yes 118600 No
## 283 Yes 47700 Yes
## 295 Yes 50800 Yes
## 299 No 68400 No
## 327 No 98800 Yes
## 39 No 45700 No
## 36 No 147400 No
## 249 Yes 68300 Yes
## 166 Yes 94700 No
## 367 Yes 68900 No
## 272 No 90200 Yes
## 44 No 70100 Yes
## 254 No 67700 Yes
## 266 Yes 36900 No
## 16 No 57600 No
## 60 Yes 57100 Yes
## 380 No 48200 Yes
## 326 Yes 60700 No
## 189 Yes 95900 Yes
## 43 No 75700 No
## 114 No 54100 Yes
## 238 Yes 74900 No
## 205 Yes 112100 Yes
## 370 Yes 82700 Yes
## 334 Yes 72400 Yes
## 18 No 64600 Yes
## 368 Yes 50100 No
## 364 No 43800 No
## 259 No 59600 No
## 29 Yes 54400 Yes
## 374 Yes 65500 Yes
home_prices<-read.csv("C:/Users/plu5638/Desktop/Business Analytics/Module 4/home_prices_WT.csv")
str(home_prices)
## 'data.frame': 117 obs. of 8 variables:
## $ PRICE : num 1350 1550 1748 1750 1925 ...
## $ SQFT : int 1142 1480 1400 1505 900 2563 1428 1375 1080 1464 ...
## $ YEAR : int 1959 1965 1969 1969 1971 1971 1974 1976 1979 1981 ...
## $ BATHS : num 1.5 1.5 1.5 1.5 1 2 1.5 1.5 1 1.5 ...
## $ FEATS : int 0 4 1 2 3 2 2 1 3 2 ...
## $ NBR : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 1 1 1 1 ...
## $ CORNER: Factor w/ 2 levels "NO","YES": 1 1 2 2 1 1 1 1 1 1 ...
## $ TAX : num 558 1275 1202 1478 978 ...
#a)
summary(home_prices) #this command prints overall summary
## PRICE SQFT YEAR BATHS FEATS
## Min. :1350 Min. : 837 Min. :1959 Min. :1.000 Min. :0.00
## 1st Qu.:1975 1st Qu.:1280 1st Qu.:1991 1st Qu.:1.000 1st Qu.:3.00
## Median :2400 Median :1549 Median :1999 Median :1.500 Median :4.00
## Mean :2649 Mean :1645 Mean :1996 Mean :1.585 Mean :3.53
## 3rd Qu.:3000 3rd Qu.:1894 3rd Qu.:2008 3rd Qu.:2.000 3rd Qu.:4.00
## Max. :5375 Max. :2931 Max. :2013 Max. :3.000 Max. :8.00
## NBR CORNER TAX
## NO :39 NO :95 Min. : 557.5
## YES:78 YES:22 1st Qu.:1477.5
## Median :1807.5
## Mean :1989.8
## 3rd Qu.:2307.5
## Max. :4412.5
#b) Let's look at a box plot to see if we can make a decision visually
with(home_prices, boxplot(home_prices, main="nbr, feats", ylab="corner"))
home_prices_subset_yes<-subset(home_prices, subset=(NBR=="YES"))$home_prices #subset the data with rural salaries
home_prices_subset_no<-subset(home_prices, subset=(NBR=="NO"))$home_prices #subset the data with non-rural salaries