600 employees participated in a company-wide experiment to test if an educational program would be effective at increasing employee satisfaction. Half of the employees were assigned to be in the control group, while the other half were assigned to be in the experimental group. The experimental group was the only group that received the educational intervention. All groups were given an employee satisfaction scale at time one to measure their initial levels of satisfaction. The same scale was then used half way through the program and at the end of the program. The goal of the experiment was to assess satisfaction to see if it increased across the measurements during the program as compared to a control group.
a) Gender (1 = male, 2 = female)
b) Group (1 = control group, 2 = experimental group)
c) 3 satisfaction scores, ranging from 2-100 points. Decimals are possible! The control group was measured at the same three time points, but did not take part in the educational program.
i) Before the program
ii) Half way through the program
iii) After the program
data=read.csv("06_data.csv", header=TRUE, sep=",")
head(data)
a) Include output and indicate how the data are not accurate.
b) Include output to show how you fixed the accuracy errors, and describe what you did.
data$Gender = factor(data$Gender, levels =c(1,2), labels =c("Male", "Female"))
data$Group =factor(data$Group, levels =c(1,2),labels =c("control group", "experimental group"))
table(data$Gender)
##
## Male Female
## 194 198
table(data$Group)
##
## control group experimental group
## 193 199
summary(data)
## Gender Group Begin Middle
## Male :194 control group :193 Min. : 61.15 Min. :37.35
## Female:198 experimental group:199 1st Qu.: 94.72 1st Qu.:59.88
## NA's : 8 NA's : 8 Median :102.26 Median :64.12
## Mean :102.17 Mean :63.86
## 3rd Qu.:110.04 3rd Qu.:68.18
## Max. :148.25 Max. :83.79
## NA's :8 NA's :8
## After
## Min. : 48.15
## 1st Qu.: 89.99
## Median : 97.42
## Mean : 95.83
## 3rd Qu.:103.73
## Max. :120.41
## NA's :8
table(data$Begin)
##
## 61.1517414867813 67.1514791279436 69.7662539510953 70.8681168565285
## 1 1 1 1
## 73.7383331667716 74.0293420910071 74.9458543252283 76.6337798845039
## 1 1 1 1
## 77.2043506334251 77.3777351361481 78.3015472751624 79.1411829395332
## 1 1 1 1
## 79.99833683478 80.7310947311745 81.8138414011776 82.2418620558794
## 1 1 1 1
## 82.2945247458954 82.3318817922586 82.7831441611202 82.8159490741293
## 1 1 1 1
## 82.8809252669846 82.9568760980098 82.959466159303 83.395748532771
## 1 1 1 1
## 83.4152478374041 83.5229741070761 83.703069584031 84.2586695030709
## 1 1 1 1
## 84.3582837925009 84.4203931987448 84.4245107459309 84.5051290948509
## 1 1 1 1
## 84.744271869474 84.8877478411169 85.1126247213872 85.2576577109047
## 1 1 1 1
## 85.4883734796657 85.4888941627571 85.6819394783246 85.970113798223
## 1 1 1 1
## 86.1581536771132 86.3843822689459 86.997184381672 87.0485837847803
## 1 1 1 1
## 87.0555259528393 87.1039665110656 87.2511477101741 87.369038597578
## 1 1 1 1
## 87.3841610534768 87.6308300011426 87.715116306441 88.0046968288506
## 1 1 1 1
## 88.1927905026495 88.5618375534794 88.5644032460828 88.5872263491721
## 1 1 1 1
## 88.6778545806295 88.7273259023489 88.8014861163707 88.9551143769654
## 1 1 1 1
## 89.0319926173762 89.0589300256884 89.1296900718606 89.2274822638174
## 1 1 1 1
## 89.3802289185815 89.4144058029989 89.6817805777029 89.7437409676296
## 1 1 1 1
## 89.8004617169741 89.9181943669027 91.0002619030721 91.3991274226209
## 1 1 1 1
## 91.5476918590267 91.6125775584385 91.8115982386781 92.1597439620526
## 1 1 1 1
## 92.4296937880272 92.4653329335099 92.7913876282777 92.9747362598558
## 1 1 1 1
## 93.0244572344004 93.0722320087239 93.2811604379461 93.4021749177394
## 1 1 1 1
## 93.5042548773129 93.5226888315596 93.6072268322768 93.6607814759116
## 1 1 1 1
## 93.7893211966588 93.9531369362382 94.1724262981647 94.2243374300984
## 1 1 1 1
## 94.323523121338 94.3727945738873 94.4148806469266 94.4678524038531
## 1 1 1 1
## 94.6149944061209 94.6154078455809 94.7554051183167 94.849398387548
## 1 1 1 1
## 94.9000841856689 94.9689343540523 95.0416419219692 95.0728318213363
## 1 1 1 1
## 95.148849769359 95.3216536170304 95.325973487824 95.3321829953352
## 1 1 1 1
## 95.3595921829213 95.4015575421504 95.5943334291164 95.6342077695822
## 1 1 1 1
## 95.6955369716614 95.7177619878273 95.904009067595 95.9875551483062
## 1 1 1 1
## 96.060307991104 96.0749495653137 96.1071410866353 96.217038751933
## 1 1 1 1
## 96.2672501843385 96.4433027334257 96.4664953171689 96.6689912873199
## 1 1 1 1
## 96.8015459718017 96.9017976639136 96.9457480103538 96.96296017109
## 1 1 1 1
## 97.0854110358953 97.1522648034001 97.2346836106306 97.2963951397144
## 1 1 1 1
## 97.3126070092487 97.3183045968958 97.506193036272 97.6064486437099
## 1 1 1 1
## 97.6444211512845 97.8892847570626 98.0275670220123 98.1326606300911
## 1 1 1 1
## 98.1937580390728 98.250253290392 98.2984833860391 98.323052115374
## 1 1 1 1
## 98.3710097883983 98.3860177335765 98.5628921042367 98.602248014107
## 1 1 1 1
## 98.6039434976668 98.8146762281773 99.0942946662488 99.1358736986511
## 1 1 1 1
## 99.1536496645049 99.1668240281839 99.2940824596958 99.3109264011663
## 1 1 1 1
## 99.3293997243332 99.7712053625981 99.7870105555304 99.8474589683344
## 1 1 1 1
## 99.8884308086895 99.9142859896424 100.050976564544 100.0645731551
## 1 1 1 1
## 100.148894490988 100.202747160804 100.258221480307 100.527128811274
## 1 1 1 1
## 100.550923820647 100.607317213962 100.614992421078 100.781333720429
## 1 1 1 1
## 100.795122112223 100.90424497157 100.997464552552 101.009523488187
## 1 1 1 1
## 101.054592695228 101.134610684113 101.19411357389 101.270871400634
## 1 1 1 1
## 101.272153766804 101.34180331435 101.385238778553 101.435471914819
## 1 1 1 1
## 101.473031105661 101.51173384279 101.665302585825 101.668649610141
## 1 1 1 1
## 101.69828754013 101.774069921933 101.854223829106 102.111663514243
## 1 1 1 1
## 102.121747369968 102.135594320337 102.188016535033 102.244941127834
## 1 1 1 1
## 102.269038820775 102.289077862157 102.313615612262 102.423970315447
## 1 1 1 1
## 102.540881248478 102.555850160034 102.618740064695 102.643622695241
## 1 1 1 1
## 102.673388324613 102.678735026957 102.725788295741 102.776217491649
## 1 1 1 1
## 102.81551870494 102.918168364947 103.033008907644 103.043769586729
## 1 1 1 1
## 103.134178783674 103.194713783384 103.216193108809 103.277053348862
## 1 1 1 1
## 103.35842826365 103.36184757239 103.366009616274 103.415767266628
## 1 1 1 1
## 103.591068456706 103.600327800718 103.611298977318 103.644342144799
## 1 1 1 1
## 103.689934976546 103.692150526362 103.855670926804 103.874820971359
## 1 1 1 1
## 103.987296941854 104.058591513831 104.075649769253 104.17758089062
## 1 1 1 1
## 104.209657123751 104.294314166976 104.300075760382 104.303322799019
## 1 1 1 1
## 104.328502670096 104.392880261306 104.430282579858 104.43724201016
## 1 1 1 1
## 104.629786461933 104.794091515599 104.803900274246 104.807181143794
## 1 1 1 1
## 104.94855655703 104.97420749423 105.104262566806 105.2794842266
## 1 1 1 1
## 105.373741022187 105.434703294943 105.499891500561 105.640999770856
## 1 1 1 1
## 105.74865863924 105.953044961298 106.110934682977 106.29715829145
## 1 1 1 1
## 106.539930485261 106.723568938361 106.775999795229 106.893117396896
## 1 1 1 1
## 106.990272637393 107.017515827981 107.084049034877 107.171892711892
## 1 1 1 1
## 107.198560983716 107.232517416962 107.247655811922 107.337751805094
## 1 1 1 1
## 107.350976006512 107.392524890768 107.528536905669 107.574117238909
## 1 1 1 1
## 107.839674531273 107.849321742236 107.872254475764 107.954886121401
## 1 1 1 1
## 108.098761530938 108.155358960971 108.166786700923 108.455028360843
## 1 1 1 1
## 108.546885985181 108.606066861127 108.677855619456 108.721963518785
## 1 1 1 1
## 108.895645538298 109.151944094518 109.17449173075 109.250441048304
## 1 1 1 1
## 109.255130412479 109.640010992244 109.822837872877 109.848131405327
## 1 1 1 1
## 109.970668019337 110.013461540157 110.135144205357 110.321695222065
## 1 1 1 1
## 110.339971213066 110.404538847187 110.435728996122 110.445501975315
## 1 1 1 1
## 110.582242326076 110.687139929009 110.693431583436 110.829453597433
## 1 1 1 1
## 110.889818749295 110.938146232535 111.086394860064 111.153463694991
## 1 1 1 1
## 111.241385655184 111.241885059382 111.684505470591 111.71572843611
## 1 1 1 1
## 111.880724879106 112.000558795847 112.084849379929 112.340356573126
## 1 1 1 1
## 112.373026813576 112.427365956688 112.517027005586 112.521372972644
## 1 1 1 1
## 112.565447200567 112.661450445193 112.921880441453 113.092891853084
## 1 1 1 1
## 113.128870606978 113.556446808088 113.7206378045 113.832320590169
## 1 1 1 1
## 113.924733973802 113.987827529826 114.213757987575 114.238880786021
## 1 1 1 1
## 114.258753233226 114.431628088837 114.646619586684 114.65940063447
## 1 1 1 1
## 114.868936616942 114.94373326149 115.104523672167 115.374284365944
## 1 1 1 1
## 115.547535906953 115.583503225968 115.701125649237 115.717448793685
## 1 1 1 1
## 115.719061145305 115.83086038906 115.934085138489 116.062304533836
## 1 1 1 1
## 116.064734896053 116.27292970231 116.466778630241 116.681062427326
## 1 1 1 1
## 116.817723677839 117.048822193089 117.112091981034 117.126750146877
## 1 1 1 1
## 117.249652296197 118.011845575996 118.21194520091 118.405507479662
## 1 1 1 1
## 118.426997091646 118.61803020352 119.030871604403 119.402171439409
## 1 1 1 1
## 119.799965060639 120.062438982909 120.46290922411 120.746648197476
## 1 1 1 1
## 120.89301849009 121.380109400536 121.765558574353 121.997422126502
## 1 1 1 1
## 122.583096828975 123.570141948037 123.893096872146 124.583146628441
## 1 1 1 1
## 124.739123895559 125.017371292953 125.223084577078 125.235019967648
## 1 1 1 1
## 126.187950055265 126.390507576106 127.852986784811 132.086804738862
## 1 1 1 1
## 133.015153096455 133.483583840114 135.07943876267 135.571451337937
## 1 1 1 1
## 137.298227451143 141.622135518881 142.305120934051 148.248630921813
## 1 1 1 1
data$Begin[data$Begin > 100] = NA
data$Begin[data$Begin <= 2] = NA
table(data$Begin)
##
## 61.1517414867813 67.1514791279436 69.7662539510953 70.8681168565285
## 1 1 1 1
## 73.7383331667716 74.0293420910071 74.9458543252283 76.6337798845039
## 1 1 1 1
## 77.2043506334251 77.3777351361481 78.3015472751624 79.1411829395332
## 1 1 1 1
## 79.99833683478 80.7310947311745 81.8138414011776 82.2418620558794
## 1 1 1 1
## 82.2945247458954 82.3318817922586 82.7831441611202 82.8159490741293
## 1 1 1 1
## 82.8809252669846 82.9568760980098 82.959466159303 83.395748532771
## 1 1 1 1
## 83.4152478374041 83.5229741070761 83.703069584031 84.2586695030709
## 1 1 1 1
## 84.3582837925009 84.4203931987448 84.4245107459309 84.5051290948509
## 1 1 1 1
## 84.744271869474 84.8877478411169 85.1126247213872 85.2576577109047
## 1 1 1 1
## 85.4883734796657 85.4888941627571 85.6819394783246 85.970113798223
## 1 1 1 1
## 86.1581536771132 86.3843822689459 86.997184381672 87.0485837847803
## 1 1 1 1
## 87.0555259528393 87.1039665110656 87.2511477101741 87.369038597578
## 1 1 1 1
## 87.3841610534768 87.6308300011426 87.715116306441 88.0046968288506
## 1 1 1 1
## 88.1927905026495 88.5618375534794 88.5644032460828 88.5872263491721
## 1 1 1 1
## 88.6778545806295 88.7273259023489 88.8014861163707 88.9551143769654
## 1 1 1 1
## 89.0319926173762 89.0589300256884 89.1296900718606 89.2274822638174
## 1 1 1 1
## 89.3802289185815 89.4144058029989 89.6817805777029 89.7437409676296
## 1 1 1 1
## 89.8004617169741 89.9181943669027 91.0002619030721 91.3991274226209
## 1 1 1 1
## 91.5476918590267 91.6125775584385 91.8115982386781 92.1597439620526
## 1 1 1 1
## 92.4296937880272 92.4653329335099 92.7913876282777 92.9747362598558
## 1 1 1 1
## 93.0244572344004 93.0722320087239 93.2811604379461 93.4021749177394
## 1 1 1 1
## 93.5042548773129 93.5226888315596 93.6072268322768 93.6607814759116
## 1 1 1 1
## 93.7893211966588 93.9531369362382 94.1724262981647 94.2243374300984
## 1 1 1 1
## 94.323523121338 94.3727945738873 94.4148806469266 94.4678524038531
## 1 1 1 1
## 94.6149944061209 94.6154078455809 94.7554051183167 94.849398387548
## 1 1 1 1
## 94.9000841856689 94.9689343540523 95.0416419219692 95.0728318213363
## 1 1 1 1
## 95.148849769359 95.3216536170304 95.325973487824 95.3321829953352
## 1 1 1 1
## 95.3595921829213 95.4015575421504 95.5943334291164 95.6342077695822
## 1 1 1 1
## 95.6955369716614 95.7177619878273 95.904009067595 95.9875551483062
## 1 1 1 1
## 96.060307991104 96.0749495653137 96.1071410866353 96.217038751933
## 1 1 1 1
## 96.2672501843385 96.4433027334257 96.4664953171689 96.6689912873199
## 1 1 1 1
## 96.8015459718017 96.9017976639136 96.9457480103538 96.96296017109
## 1 1 1 1
## 97.0854110358953 97.1522648034001 97.2346836106306 97.2963951397144
## 1 1 1 1
## 97.3126070092487 97.3183045968958 97.506193036272 97.6064486437099
## 1 1 1 1
## 97.6444211512845 97.8892847570626 98.0275670220123 98.1326606300911
## 1 1 1 1
## 98.1937580390728 98.250253290392 98.2984833860391 98.323052115374
## 1 1 1 1
## 98.3710097883983 98.3860177335765 98.5628921042367 98.602248014107
## 1 1 1 1
## 98.6039434976668 98.8146762281773 99.0942946662488 99.1358736986511
## 1 1 1 1
## 99.1536496645049 99.1668240281839 99.2940824596958 99.3109264011663
## 1 1 1 1
## 99.3293997243332 99.7712053625981 99.7870105555304 99.8474589683344
## 1 1 1 1
## 99.8884308086895 99.9142859896424
## 1 1
data[,4:5][data[,4:5] > 100] = NA
summary(data)
## Gender Group Begin Middle
## Male :194 control group :193 Min. :61.15 Min. :37.35
## Female:198 experimental group:199 1st Qu.:86.21 1st Qu.:59.88
## NA's : 8 NA's : 8 Median :93.05 Median :64.12
## Mean :90.77 Mean :63.86
## 3rd Qu.:96.40 3rd Qu.:68.18
## Max. :99.91 Max. :83.79
## NA's :238 NA's :8
## After
## Min. :48.15
## 1st Qu.:85.81
## Median :92.35
## Mean :89.69
## 3rd Qu.:96.94
## Max. :99.99
## NA's :153
a) Include output that shows you have missing data.
b) Include output and a description that shows what you did with the missing data.
i) Replace all participant data if they have less than or equal to 20% of missing data by row.
ii) You can leave out the other participants (i.e. you do not have to create allrows).
apply(data,2,function(x) sum(is.na(x)))
## Gender Group Begin Middle After
## 8 8 238 8 153
percentmiss = function(x){sum(is.na(x))/length(x)*100}
apply(data,1,percentmiss)
## [1] 40 0 0 20 40 20 40 40 20 20 20 40 20 20 0 20 20 20 0 20 40 20 0 40 40
## [26] 40 20 20 0 40 0 40 60 0 20 20 40 20 40 20 20 20 40 40 20 40 40 20 20 20
## [51] 40 20 40 0 20 40 20 20 0 40 20 20 40 40 20 0 40 40 40 40 20 40 0 20 20
## [76] 20 20 40 40 20 40 20 20 0 0 0 20 20 20 40 20 20 20 40 0 20 20 20 0 20
## [101] 40 20 0 60 20 20 40 40 0 20 20 20 0 20 0 20 20 40 40 20 40 20 40 40 0
## [126] 20 40 20 20 20 40 20 0 20 20 20 40 20 20 0 20 20 20 20 40 20 20 0 60 20
## [151] 40 40 20 20 40 40 0 20 20 40 20 20 40 20 20 20 0 40 40 40 60 40 40 40 60
## [176] 20 20 40 40 20 40 0 40 20 40 40 20 20 40 20 40 20 40 0 0 20 40 40 20 20
## [201] 40 20 20 20 20 20 0 20 20 20 60 20 40 20 0 40 20 20 40 20 20 20 0 0 40
## [226] 0 20 20 20 20 20 0 40 20 40 40 20 0 40 20 20 20 20 20 0 0 20 0 20 0
## [251] 40 20 0 20 0 60 20 0 20 20 20 20 0 20 20 40 20 20 20 20 20 20 0 20 20
## [276] 20 0 20 20 0 20 20 20 0 20 40 20 40 20 20 0 20 0 20 20 40 0 0 0 20
## [301] 0 20 40 0 0 20 0 0 0 20 0 20 0 20 40 20 20 40 0 0 0 20 20 0 0
## [326] 40 0 40 0 0 20 0 40 20 20 20 40 0 0 0 40 0 20 0 0 20 20 20 0 0
## [351] 20 0 0 20 40 20 20 0 20 0 40 20 20 20 20 40 20 0 0 20 20 40 0 20 20
## [376] 0 20 20 20 0 20 20 20 20 0 0 20 20 0 0 0 40 20 20 20 20 0 20 0 40
missing = apply(data,1,percentmiss)
table(missing)
## missing
## 0 20 40 60
## 95 202 96 7
replace = subset(data, missing <= 20)
missing1 = apply(replace,1,percentmiss)
table(missing1)
## missing1
## 0 20
## 95 202
dont = subset(data, missing > 20)
missing2 = apply(dont,1,percentmiss)
table(missing2)
## missing2
## 40 60
## 96 7
apply(data,2,percentmiss)
## Gender Group Begin Middle After
## 2.00 2.00 59.50 2.00 38.25
replace_col = replace[,-c(1,2)]
dont_col = replace[,c(1,2)]
library(mice)
##
## Attaching package: 'mice'
## The following objects are masked from 'package:base':
##
## cbind, rbind
temp_no_miss = mice(replace_col)
##
## iter imp variable
## 1 1 Begin Middle After
## 1 2 Begin Middle After
## 1 3 Begin Middle After
## 1 4 Begin Middle After
## 1 5 Begin Middle After
## 2 1 Begin Middle After
## 2 2 Begin Middle After
## 2 3 Begin Middle After
## 2 4 Begin Middle After
## 2 5 Begin Middle After
## 3 1 Begin Middle After
## 3 2 Begin Middle After
## 3 3 Begin Middle After
## 3 4 Begin Middle After
## 3 5 Begin Middle After
## 4 1 Begin Middle After
## 4 2 Begin Middle After
## 4 3 Begin Middle After
## 4 4 Begin Middle After
## 4 5 Begin Middle After
## 5 1 Begin Middle After
## 5 2 Begin Middle After
## 5 3 Begin Middle After
## 5 4 Begin Middle After
## 5 5 Begin Middle After
no_miss = complete(temp_no_miss,1)
summary(no_miss)
## Begin Middle After
## Min. :61.15 Min. :37.35 Min. :48.15
## 1st Qu.:85.49 1st Qu.:58.99 1st Qu.:85.45
## Median :92.43 Median :63.69 Median :91.19
## Mean :90.18 Mean :63.24 Mean :88.99
## 3rd Qu.:96.07 3rd Qu.:67.74 3rd Qu.:96.91
## Max. :99.91 Max. :80.26 Max. :99.99
a) Include a summary of your mahal scores that are greater than the cutoff.
b) What are the df for your Mahalanobis cutoff?
c) What is the cut off score for your Mahalanobis measure?
d) How many outliers did you have?
e) Delete all outliers.
str(no_miss)
## 'data.frame': 297 obs. of 3 variables:
## $ Begin : num 69.8 87.3 94.8 97.2 98.6 ...
## $ Middle: num 63.8 56.2 63.5 66.9 57.8 ...
## $ After : num 98.4 93 96.9 74 90.4 ...
no_miss[,-c(1,2)]
## [1] 98.36746 93.02209 96.94363 74.04094 90.38513 97.94671 99.97198 99.11881
## [9] 97.94671 96.35607 96.94363 89.54846 97.44352 93.39801 74.04094 89.54846
## [17] 99.70706 86.86006 96.32636 99.13164 96.04052 95.46501 98.43568 92.69804
## [25] 87.89063 88.92172 99.61101 68.35777 89.54846 86.86006 98.74091 87.61826
## [33] 99.50870 98.83446 98.66710 89.54846 74.04094 90.10766 96.94363 96.49779
## [41] 92.34569 99.28899 96.23488 96.06659 98.49245 97.93273 96.03595 86.86006
## [49] 68.35777 99.61101 95.92079 99.37276 97.93295 99.41324 94.47679 95.98890
## [57] 74.04094 85.60677 95.30728 91.81620 95.13702 99.30856 97.40468 64.27408
## [65] 90.84591 96.17352 90.44968 97.96085 95.70022 98.37393 98.63480 94.06489
## [73] 90.85004 74.04094 96.23390 99.99309 99.77121 89.54846 98.54626 86.82251
## [81] 94.28267 99.89978 99.32554 98.35922 98.66092 92.21549 93.53885 99.92582
## [89] 87.61826 74.04094 85.60677 96.49779 91.81620 93.32995 96.91308 97.85586
## [97] 99.61846 98.61883 99.35213 64.27408 97.65042 89.76847 95.82168 98.89695
## [105] 97.70289 96.94363 95.83229 64.27408 85.60677 97.23965 96.96641 74.04094
## [113] 97.48796 88.92172 95.21089 95.70022 99.30856 98.12412 99.03291 97.34217
## [121] 74.04094 95.25814 99.85900 97.48359 93.77204 88.67651 85.60677 86.53000
## [129] 97.27474 99.85900 99.45954 90.50113 81.80996 91.82338 94.80158 65.73372
## [137] 87.28671 84.06709 61.31248 86.81044 91.98422 85.92321 74.04094 90.36571
## [145] 74.04094 80.53894 66.84285 88.54140 86.20937 91.45683 92.41265 76.72077
## [153] 84.21828 75.22829 85.91221 90.85004 77.79626 96.16559 94.89232 92.58477
## [161] 86.60257 84.15932 87.41775 75.56022 88.54927 90.63650 92.92103 86.67520
## [169] 86.82251 87.49876 85.14916 99.85900 64.27408 92.52786 85.88984 96.93442
## [177] 97.94672 91.81620 85.10037 86.86006 94.13765 94.66428 94.44297 87.90389
## [185] 82.60019 82.48097 94.46291 93.53346 91.00391 88.67651 95.88574 92.60262
## [193] 99.97198 86.41121 85.45350 94.30465 89.54846 87.88539 97.64312 85.73774
## [201] 91.32318 88.06614 94.35340 79.70414 68.35777 71.81539 70.38913 97.12829
## [209] 79.70414 94.47046 93.37862 92.17244 77.39114 91.03409 96.17310 77.71706
## [217] 94.62616 86.00785 65.07271 99.92582 48.15125 88.62860 95.40124 88.92172
## [225] 92.67837 96.93442 82.89749 86.76032 73.44220 79.77082 97.94113 80.21430
## [233] 77.17038 97.79978 91.19258 77.20135 95.61543 72.74867 88.74987 76.41806
## [241] 83.31816 96.71426 84.86503 77.13131 91.08803 59.62783 86.63301 73.82998
## [249] 84.14637 74.04094 85.60677 81.18580 83.49161 94.42237 92.69104 90.06798
## [257] 82.11522 92.08126 85.18093 99.55327 89.63435 96.97343 96.47715 90.14530
## [265] 89.54846 91.99120 97.34217 93.74733 89.20570 82.06130 60.29609 64.91101
## [273] 71.77475 96.98771 82.15855 97.31622 53.69310 85.60677 72.05297 84.63832
## [281] 89.72125 89.17554 82.06130 75.00576 96.94363 87.61826 83.23520 84.39569
## [289] 89.17924 82.07452 88.89509 97.79978 93.40585 89.54846 58.46257 92.51329
## [297] 96.03615
maha = mahalanobis(no_miss, colMeans(no_miss, na.rm=TRUE), cov(no_miss, use ="pairwise.complete.obs"))
maha
## [1] 8.09995447 1.49865877 1.00092669 3.53293166 1.85887639 2.12207759
## [7] 6.07392544 2.15677310 1.91018911 2.67972796 1.41427888 0.05780087
## [13] 2.35363546 0.64714769 3.97643013 2.01785554 2.86440368 2.43660239
## [19] 2.05716265 2.09048220 1.26656543 3.01018881 1.24054361 0.72622548
## [25] 1.46680881 1.33977175 4.42600878 4.89497324 0.26526298 0.41432720
## [31] 2.22087116 0.61414223 2.80284584 7.46750998 1.55184253 0.95177636
## [37] 2.99739773 1.78242820 1.03084944 0.59015235 1.12897533 2.10496512
## [43] 0.83112977 1.01245876 2.75119807 1.01202072 2.58555833 0.33516634
## [49] 13.85061110 1.92410702 2.82173390 4.24454812 1.30115140 1.13708670
## [55] 0.36044159 1.38450588 3.29122179 0.74656532 1.77730827 2.68866249
## [61] 1.50963789 2.30444569 2.19783819 6.34252101 0.98761083 2.07844073
## [67] 0.47310309 2.97779316 2.06535359 16.07916965 1.11544939 0.59992808
## [73] 4.67125594 4.42460481 1.74846114 2.91425549 1.34689314 1.49345612
## [79] 2.46412361 0.80686224 8.23495675 1.85236723 1.86270582 4.12022801
## [85] 1.85929077 0.27524340 4.82744315 1.65451998 2.20981999 5.64772209
## [91] 2.50211390 3.06743714 0.40058443 1.27672321 1.62990343 1.06917703
## [97] 1.46749778 1.25909105 4.05114584 6.88704450 1.65939488 2.00721864
## [103] 3.52660205 1.41261612 3.83578832 2.27681209 0.84926010 6.33889077
## [109] 0.46683305 1.52652254 1.56143327 3.12836615 0.91541036 1.06492274
## [115] 1.20648653 2.61580796 2.84554821 3.98160339 2.96495912 1.77146924
## [121] 5.46613273 0.97995785 3.13863563 1.55091429 15.55338596 0.92763514
## [127] 1.28427481 6.82550348 3.03332763 1.86524235 2.05919581 0.80713731
## [133] 3.91494412 1.58384641 0.67705156 8.97202031 1.11615345 3.10493497
## [139] 8.80163789 1.23721223 0.23388289 2.30041608 3.91178427 1.12860641
## [145] 7.82255151 2.05464164 7.25697443 1.10223306 3.52982553 1.26570363
## [151] 0.93506926 3.31484084 0.46191321 2.49886312 1.30943746 5.16405504
## [157] 2.75513253 1.72408182 1.37564109 0.48787428 0.19544696 3.09494053
## [163] 5.33199981 8.15590631 0.04793910 0.67633704 0.84555571 9.14767768
## [169] 0.97618973 2.95590818 0.35758230 3.67579584 9.32207374 2.96217430
## [175] 0.25324691 1.47285536 4.02262848 1.77573021 0.19176338 2.77836040
## [181] 0.90889032 5.97912772 1.33234229 2.25256658 0.58933113 1.85261121
## [187] 0.96946324 2.72910557 0.32337907 1.15408713 0.52790660 1.82666910
## [193] 1.83727918 0.16998295 1.96382752 3.65173914 1.59251886 1.80268203
## [199] 1.90610113 0.78253653 1.16095507 1.22106397 2.94163906 1.40160204
## [205] 5.37440971 4.42628397 6.41691215 1.72194625 3.03230891 1.43746537
## [211] 2.04279616 6.44669976 1.67720049 1.15043136 1.65180264 5.51300796
## [217] 0.64033352 2.24367064 9.58019835 2.61636604 18.81567890 1.30335817
## [223] 2.48472925 2.91187243 2.12961474 5.58984298 1.15115843 1.37242087
## [229] 3.74817992 7.93292210 2.06301260 2.71072629 4.00057920 3.75599495
## [235] 0.46302903 1.81968071 0.95635158 3.54912707 4.14790033 4.51523669
## [241] 1.42722262 2.53845828 0.52704252 1.90664131 3.84022647 9.51205591
## [247] 0.61980333 3.44457598 1.31127490 4.08232348 2.51237941 1.38752241
## [253] 1.58817916 0.87789065 1.59533178 4.18113846 5.85326150 1.83628908
## [259] 0.78309805 3.02956481 2.58059159 1.28811734 2.05401468 1.03725819
## [265] 3.49993507 2.45897382 1.14143241 7.69592442 0.60903075 2.06276198
## [271] 11.14454081 9.38389955 3.24114729 3.55461360 5.11727387 3.96767838
## [277] 14.74505840 5.63963039 3.09021025 10.81851630 1.64368749 1.93708447
## [283] 1.40673969 4.54722614 8.25075844 4.91635191 0.43031031 2.47308707
## [289] 0.72651854 14.82995307 3.37560639 0.94170611 3.36139216 4.93866058
## [295] 25.63369320 1.11621284 5.53076793
cutoff = qchisq(1-.001,ncol(no_miss))
print(cutoff)
## [1] 16.26624
summary(maha < cutoff)
## Mode FALSE TRUE
## logical 2 295
noout_data = subset(no_miss, maha < cutoff)
str(noout_data)
## 'data.frame': 295 obs. of 3 variables:
## $ Begin : num 69.8 87.3 94.8 97.2 98.6 ...
## $ Middle: num 63.8 56.2 63.5 66.9 57.8 ...
## $ After : num 98.4 93 96.9 74 90.4 ...
a) Include the symnum bivariate correlation table of your continuous measures.
b) Do you meet the assumption for additivity?
library(corrplot)
## corrplot 0.84 loaded
cor(noout_data)
## Begin Middle After
## Begin 1.00000000 -0.02269047 0.02234954
## Middle -0.02269047 1.00000000 -0.02079275
## After 0.02234954 -0.02079275 1.00000000
corrplot(cor(noout_data))
symnum(cor(noout_data))
## B M A
## Begin 1
## Middle 1
## After 1
## attr(,"legend")
## [1] 0 ' ' 0.3 '.' 0.6 ',' 0.8 '+' 0.9 '*' 0.95 'B' 1
a) Include a picture that shows how you might assess multivariate linearity.
b) Do you think you've met the assumption for linearity?
random = rchisq(nrow(noout_data), 7)
data2 = lm(random~., data = noout_data)
summary(data2)
##
## Call:
## lm(formula = random ~ ., data = noout_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.323 -2.601 -0.559 2.354 17.229
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.455056 3.821244 2.213 0.0277 *
## Begin -0.005054 0.027406 -0.184 0.8538
## Middle -0.013263 0.032807 -0.404 0.6863
## After -0.002389 0.022302 -0.107 0.9148
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.6 on 291 degrees of freedom
## Multiple R-squared: 0.0007033, Adjusted R-squared: -0.009599
## F-statistic: 0.06827 on 3 and 291 DF, p-value: 0.9768
standardized = rstudent(data2)
qqnorm(standardized)
abline(0,1)
a) Include a picture that shows how you might assess multivariate normality.
b) Do you think you've met the assumption for normality?
library(moments)
skewness(noout_data, na.rm = T)
## Begin Middle After
## -1.1757550 -0.1982868 -1.1899243
kurtosis(noout_data, na.rm = T)
## Begin Middle After
## 4.554567 2.933593 3.999765
hist(standardized, breaks = 15)
a) Include a picture that shows how you might assess multivariate homogeneity.
b) Do you think you've met the assumption for homogeneity?
c) Do you think you've met the assumption for homoscedasticity?
plot(scale(data2$fitted.values), standardized)
abline(0,0)
abline(v = 0)