Task 1

After going over the scoring file, a star schema was created with the information.


Task 2

The scoring file is read into R studio.

mydata = read.csv(file="data/scoring.csv")
head(mydata)
# The income column is extracted and called from the data set
income = mydata$Income
income
   [1] 129 131 200 182 107 214 125  80 107  80 125
  [12] 121 199 170  50 131 330 200 130 137 107 324
  [23] 112 140 143 130 180 251  85 150 122 198 150
  [34] 170 119 208 115  99 120  90 137 230 142  71
  [45] 120 233 289 128 150 145  90 301 200 150 100
  [56] 100 100 155 715 245 150  70 190 152 126 181
  [67] 185 170 176 238 115 200 411  93 108 500  45
  [78] 250 100  70 150  70 263 200  78 120 125  50
  [89] 146  70 105 413 500 350 200 138  80 208 137
 [100]  58 130 123 180 140 315 164 325 135 109 185
 [111] 217 300  77 253 101 200 124 143 250 135 115
 [122] 160 214 390 500  95 200  85 214 140  60 180
 [133] 300 200 242 155 100 105 166 120 115 350 214
 [144] 442 101 122 250  90 160 300  83 200  60 205
 [155] 133 179  69 195 112 210 155 394 149 120 400
 [166] 165 125  74  86 185 165 110 138 300 147 348
 [177] 112 350 110  75 230  85 210 125 149 201 105
 [188] 183 113 126 300 160 160  89  95 125 120 359
 [199]  80  67 148 298 318 185  39 194  80 147 156
 [210] 178 130  63  88 140 115  83 144 200 172 200
 [221] 318 177 133 150 208 145 157 190  86  90 100
 [232] 214 100 117 110 168 150 166 283 149 250 120
 [243] 236 277  55 200 235  80 200 140 125 185 190
 [254] 150 102 170 315 130 156 177 341 240 142 333
 [265] 125 170 220 230 157 340 120  91 150  88 120
 [276]  70 106 162  77 128 189 300  92 380 500 130
 [287] 141 220 146 250 132 150 127  90 166  69 182
 [298]  77  50 131  62  60  86 143 120 283 138  90
 [309]  99 160 330 100 210 100  97  65 227 140 115
 [320] 150  90 275 176 110 140 500  66 273 145  67
 [331] 232  80 130 200 154 187 135 160 133 230 154
 [342]  50 189 202  20  40  50 125 100 115 160  70
 [353] 220  99 107 160 150 232 120 225 100  80 208
 [364] 105  98 532 140  90 155 120 159 122 156  90
 [375] 130  45 118 152 135 125 208  81 160 300 135
 [386] 121 145 165 428 233 245 232 200  42  87 200
 [397] 250 350 105 100 160 132 140 160 148 113  97
 [408] 107 206 275 176 175 223  70  87  42  60 300
 [419] 144 217 180  69 110  50  86 172 109  95 203
 [430] 114 186 155 195 246 113 290 103 125 199 133
 [441] 210 124  78 200 100  95 183  43 120 198 100
 [452] 150 133 110 202 140 123 275 200 180  55 105
 [463] 146 100 136  90 144 116 128 105 125  68 400
 [474] 251 225 140  34 320 124  87 161  90  92 110
 [485]  87  93 110  81  98 155 167 190 110  76 170
 [496] 214  70  55  80  81 139 206  74 120 300 224
 [507]  80 166 110 161  80 150 170 100 225 166 200
 [518] 110 110 160 204 135 158 290 100 115 149 143
 [529] 150 199 142  80  93 113  92 205 210 256 260
 [540] 106 160 148 220  63 106  81 100 260 243  88
 [551] 186 105  80 154  35  63 297 161 200 140 125
 [562]  78  57 300  66 202 175 150 158  92 123 125
 [573] 113 167 148 156 123 143 180  92 150  92 139
 [584] 250 315 121 180 129 214  99 110  85 138  93
 [595] 100 160 215  90 464 167  86 125 274 135 300
 [606] 300 128  90 115 107 300 230 122  43 250 190
 [617] 100 246  85 173 150  86 183 100  94 150 140
 [628] 107 117 230 470 102 210  42 130  90 120 117
 [639] 160  90  50 100 182 135  67 167 137 140  85
 [650] 324  77 100  42 251 180 400 158 300  90 146
 [661] 255 250  50 150 232 190 178 254 110  70 100
 [672] 137 258 242  85 179 125 500 140  70 150 245
 [683] 193 110  70 115  85 400 130 120 250 108 225
 [694] 300 250 150 156 319 100  51 145 168  54 100
 [705] 115 209 180  63 130 170 157  79  64  99  35
 [716] 150  65 100 160 215 112 126 150 136 123  80
 [727]  72 110 111  60 184 104 290 173 125  60 160
 [738] 126 148 170 230 166  67 190  80  60  91  96
 [749] 180 130  67 100 830 125  30 237 152  95 139
 [760] 145 300 135  80 100 275 250 125 100 130 131
 [771] 350 538 276 107 105 156 110 260  34  81  76
 [782]  98 174 132 105 300 117 119 140  70 265 180
 [793]  62 189  75  70 188 150 124 113  98 184 195
 [804] 159 959 170  28 268 155 240 500 135 225 100
 [815] 190 200 154 210 145 210 102  91  75 150 120
 [826]  36 129 125 156  72 166  75 125 250 128 155
 [837] 459 360 100  80 250 105 300 137 110 127  56
 [848]  75  52 100 170 150 188 175  64 110 186 140
 [859] 200 135 130 200  33  69  90 175 197 125 170
 [870] 247 250 130 207  65 180 250 129 159 116  88
 [881] 120 100 100 113  80 150  52  71  99  63 166
 [892] 108 134 138 110 160 176  70  60 130 176 130
 [903] 108 144 150 183 108 200 180 130 140 141 127
 [914] 117 130 240 425 800  80  95  71 135  47 250
 [925] 150 118 130 125 171 265 100  69 293  87 110
 [936]  75  40 140 122 161  70  65  66  73 178 140
 [947]  50  78  60 350 140 125 121 155 133 117 125
 [958] 136  25 176 430 125  79  92 176  85 188 300
 [969] 100 120  60 125 464 176  90 200 415 318 163
 [980]  72 156  49 110 140  92  74 145 104 400  95
 [991] 150 160 177  85 380 107 134  81 191 100
 [ reached getOption("max.print") -- omitted 3446 entries ]
# The expenses column is extracted and called from the data set
expenses= mydata$Expenses
expenses
   [1]  73  48  90  63  46  75  75  35  90  90  60
  [12]  60  75  75  35  75  35  65  45  35  46  45
  [23] 105  74  45  60  75  75  75  45  45  75  75
  [34]  35  45  45  75  70  45  44  75  35  45  75
  [45]  35  35  75  48  60  60  75 105  75  60  85
  [56]  35  45  60  90  70  90  35  75  75  75  75
  [67]  60  45  93  60  60  45  75  75  86  90  45
  [78]  70  45  60  60  60  35  75  35  47  60  40
  [89]  60  35  35 101  95 140  75  45  35 120  89
 [100]  45  73  75  78  35  75  76  90  45  35  79
 [111]  45  90  35  60  75  35  75  45  35  75  57
 [122]  75  66 114  45  35  75  35  90  75  45  75
 [133]  60  65  75  60  45  60  45  47  60  90  75
 [144] 119  35 105  60  35  45  60  35  45  60  35
 [155]  75  78  35  45  75  90  75  35  60  57  60
 [166]  45  75  42  75  45  77  75  74  75  35  35
 [177]  60  75  45  45 113  75  75  75  75  75  60
 [188]  78  46  45 120  75  90  45  60  75  87 126
 [199]  45  35  90  45  75  75  60  45  35  75 105
 [210]  75  75  35  35  45  45  60  60  45  90 105
 [221]  60  60  60  35  35  75  60  45  35  35 100
 [232]  56  45  57  45  62  45  45  90  90  90  60
 [243]  35  45  35  35  45  45  45  45  73  94 105
 [254]  60  60  77  60  88  45  93  60 105  60  75
 [265]  35  45  75  60  60  60  60  44  60  35  35
 [276]  75  56  35  45  75  35  60  60  60  75  60
 [287]  45  75  60  75  60  90  73  60  90  45  45
 [298]  45  45  35  35  75  90  59  87  35  60  90
 [309]  85  75  60  60  96  45  45  45  45  45  87
 [320]  60  35  90  35  71  75  45  75  60  75  60
 [331]  75  90  35  45  75  90  60  45  60 173  60
 [342]  60  60  60  90  60 110  45  70  60  75  60
 [353]  35  45  35  75  75  45  45  75  35  75  35
 [364]  90  60 113  60  75  76  45  90  45  75  35
 [375]  35  90  72  60  49  58  66  35  75  75  60
 [386]  60  35  35  75  68  90  60  75  60  35  45
 [397]  75  70  60  60  75  35  49  45  75  45  45
 [408]  60  66  45  45  63  45  67  84  90  35  35
 [419]  75  75  60  82  86  35  35  60  71  60  80
 [430]  45  54  51  60  75  86  60  75  45  95  45
 [441]  60  35  45  55  45  85  63  35  75  45  45
 [452]  50  60  45  60  60  45  45  75  35  35  45
 [463]  75  75  49  60  60  60  75  60  90  45  75
 [474]  60  58  35  45 107  45  54  35  90  75  60
 [485]  60  54  45  43  45  60  60  60  75  35  62
 [496]  35  35  35  45  75  35  66  35  35  90  75
 [507]  68  90  60  45  45  75  52  45  68  77  45
 [518]  45  46  45  35  45  45  75  60  60  45  75
 [529]  60  35  59  75  69  45  45  75  35  60 101
 [540]  45  60  60  75  75  75  75  35  75  60  44
 [551]  75  75  75  60  64  66  45  91  35  45  90
 [562]  35  35 135  60  75  90  45  45  45  45  88
 [573]  45  45  75  75  60 120  93  35  90  60  45
 [584]  75  75  35  35 105  90  45  75  75  90  60
 [595]  75  76  45  35  90  35  84  60 105  75  60
 [606]  75 103  90  35  56  75 105  45  35  90  90
 [617]  45  60  69  77  75  90  45  55  60  60  74
 [628]  56  60  60  82  60  75  35  48  45  60  60
 [639]  75  69  45  75  75  74  45  45  60  59  35
 [650]  45  60  45  39  60  35  60  91  75  44  90
 [661]  75  35  45  60  45  35  63  75  56  75  85
 [672]  60  90  60  90  45  58  90  60  52  60  90
 [683]  75  35  52  87  35  85  35 102 130  75  75
 [694] 105  75  35  75 107  85  40  45  52  65  85
 [705]  60  35  60  35  60  60  35  35  90  45  35
 [716]  75  45  35  91  60  75  45  90  35  45  90
 [727]  45  60  56  35  75  45  75  35  60  35  75
 [738]  73  60  75  68  60  35  35  60  66  35  35
 [749]  75  60  45  75  60  45  60  69  60  60  74
 [760]  45  45  60  35  60  45  90  75  90 118  75
 [771]  60  60  75  35  45  45  90  60  45  68  75
 [782]  60 135  60  35  75  45  60  90  35  35  78
 [793]  60  35  43  35  75  35  45  45  75  35  90
 [804]  61 135  92  35 102  60  84  90  75  35  45
 [815]  60  60 105  45  60  90  35  45 105  75  35
 [826]  64  90  45  75  60  75  35  90  70  45  60
 [837]  60  75  85  53  90  45 105  60  90  35  35
 [848]  90  35  35  62  35  75  75  45  35  45  75
 [859]  45  90  45  60  60  75  45  60  90  90  45
 [870] 100  45  75  75  97  90  60  45  90  35  75
 [881]  72  70  60  35 105  60  75  35  55  35  75
 [892]  45  58  74  60  45  60  35  51  45  35  90
 [903]  35  60  60 108  56  90  78  58  74  75  60
 [914]  60  88  75  35  75  35  45  35  45  35  90
 [925]  35  60  60  45  77  45  45  82  75  35  45
 [936]  75  75  45  60  45  35  75  60  42  78  35
 [947]  35  45  75  60  59  75 102  35  35  75  35
 [958]  75  75  93  45  88  35  69  45  75  90  75
 [969]  35  57  35 103  90  35  35  60  35  45  35
 [980]  97  45  35  75  45  60  35  60  75  75  45
 [991]  60  35  60  60 113  60  45  35 124  75
 [ reached getOption("max.print") -- omitted 3446 entries ]
#The mean function was used to determine the average income
meanIncome = mean(income)
meanIncome
[1] 140.6298
#The mean function was used to determine the average Expenses
meanExpenses = mean(expenses)
meanExpenses
[1] 55.60144
#Standard Deviation of Income
sdIncome = sd(income)
sdIncome
[1] 80.1779
#Standard Deviation of Expenses
sdExpenses = sd(expenses)
sdExpenses
[1] 19.52084

After these calculations, the SNR (signal to noise ratio) can be calculated

#SNR Ratio for Income
snrIncome = meanIncome/sdIncome
snrIncome
[1] 1.753972
#SNR Ratio for Expenses
snrExpenses = meanExpenses/sdIncome
snrExpenses
[1] 0.6934759

The variable with the most noise is Expenses since the SNR is less then 1:1. Income has the higher SNR therefore more signal located in that column.

Task 3

These charts demonstrate the highest level earning income by age cohort and marital status in relation to income.

LS0tCnRpdGxlOiAiQnVzaW5lc3MgQW5hbHl0aWNzIExhYiBXb3Jrc2hlZXQgMDEiCmF1dGhvcjogIkthcmluYSBSb2NoYSIKZGF0ZTogIlN1bW1lciAyMDE3IgpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazogZGVmYXVsdAogIGh0bWxfZG9jdW1lbnQ6IGRlZmF1bHQKICBwZGZfZG9jdW1lbnQ6IGRlZmF1bHQKc3VidGl0bGU6IENNRSBHcm91cCBGb3VuZGF0aW9uIEJ1c2luZXNzIEFuYWx5dGljcyBMYWIKLS0tCgoKIyMjIFRhc2sgMQpBZnRlciBnb2luZyBvdmVyIHRoZSBzY29yaW5nIGZpbGUsIGEgc3RhciBzY2hlbWEgd2FzIGNyZWF0ZWQgd2l0aCB0aGUgaW5mb3JtYXRpb24uIAoKCiFbXShpbWdzL2ltZzA3LnBuZykKCi0tLS0tLS0tLS0tLS0KCiMjIyBUYXNrIDIKIFRoZSBzY29yaW5nIGZpbGUgaXMgcmVhZCBpbnRvIFIgc3R1ZGlvLiAKYGBge3J9Cm15ZGF0YSA9IHJlYWQuY3N2KGZpbGU9ImRhdGEvc2NvcmluZy5jc3YiKQpoZWFkKG15ZGF0YSkKYGBgCgoKCmBgYHtyfQojIFRoZSBpbmNvbWUgY29sdW1uIGlzIGV4dHJhY3RlZCBhbmQgY2FsbGVkIGZyb20gdGhlIGRhdGEgc2V0CmluY29tZSA9IG15ZGF0YSRJbmNvbWUKaW5jb21lCmBgYAoKYGBge3J9CiMgVGhlIGV4cGVuc2VzIGNvbHVtbiBpcyBleHRyYWN0ZWQgYW5kIGNhbGxlZCBmcm9tIHRoZSBkYXRhIHNldApleHBlbnNlcz0gbXlkYXRhJEV4cGVuc2VzCmV4cGVuc2VzCmBgYAoKYGBge3J9CiNUaGUgbWVhbiBmdW5jdGlvbiB3YXMgdXNlZCB0byBkZXRlcm1pbmUgdGhlIGF2ZXJhZ2UgaW5jb21lCm1lYW5JbmNvbWUgPSBtZWFuKGluY29tZSkKbWVhbkluY29tZQpgYGAKCmBgYHtyfQojVGhlIG1lYW4gZnVuY3Rpb24gd2FzIHVzZWQgdG8gZGV0ZXJtaW5lIHRoZSBhdmVyYWdlIEV4cGVuc2VzCm1lYW5FeHBlbnNlcyA9IG1lYW4oZXhwZW5zZXMpCm1lYW5FeHBlbnNlcwpgYGAKCmBgYHtyfQojU3RhbmRhcmQgRGV2aWF0aW9uIG9mIEluY29tZQpzZEluY29tZSA9IHNkKGluY29tZSkKc2RJbmNvbWUKYGBgCgpgYGB7cn0KI1N0YW5kYXJkIERldmlhdGlvbiBvZiBFeHBlbnNlcwpzZEV4cGVuc2VzID0gc2QoZXhwZW5zZXMpCnNkRXhwZW5zZXMKYGBgCkFmdGVyIHRoZXNlIGNhbGN1bGF0aW9ucywgdGhlIFNOUiAoc2lnbmFsIHRvIG5vaXNlIHJhdGlvKSBjYW4gYmUgY2FsY3VsYXRlZAoKYGBge3J9CiNTTlIgUmF0aW8gZm9yIEluY29tZQpzbnJJbmNvbWUgPSBtZWFuSW5jb21lL3NkSW5jb21lCnNuckluY29tZQpgYGAKCmBgYHtyfQojU05SIFJhdGlvIGZvciBFeHBlbnNlcwpzbnJFeHBlbnNlcyA9IG1lYW5FeHBlbnNlcy9zZEluY29tZQpzbnJFeHBlbnNlcwpgYGAKClRoZSB2YXJpYWJsZSB3aXRoIHRoZSBtb3N0IG5vaXNlIGlzIEV4cGVuc2VzIHNpbmNlIHRoZSBTTlIgaXMgbGVzcyB0aGVuIDE6MS4gSW5jb21lIGhhcyB0aGUgaGlnaGVyIFNOUiB0aGVyZWZvcmUgbW9yZSBzaWduYWwgbG9jYXRlZCBpbiB0aGF0IGNvbHVtbi4KLS0tLS0tLS0tLS0tCgojIyMgVGFzayAzCgohW10oaW1ncy9zcy5wbmcpCgohW10oaW1ncy9yci5wbmcpCgpUaGVzZSBjaGFydHMgZGVtb25zdHJhdGUgdGhlIGhpZ2hlc3QgbGV2ZWwgZWFybmluZyBpbmNvbWUgYnkgYWdlIGNvaG9ydCBhbmQgbWFyaXRhbCBzdGF0dXMgaW4gcmVsYXRpb24gdG8gaW5jb21lLiAKCg==