EXERCISE 2 ANSWER KEY
A. Using the data set patients.csv:
- Load the data into the object ds1.
#Kindly change the working directory where your patients.csv is located. THIS APPLIES TO ALL 'read.csv' and 'write.csv'
ds1 <- read.csv(file=("C:\\Users\\USER\\Desktop\\2019 Predictive Analytics Course in R\\R Scripts\\PA Course 1\\patients.csv"), header=TRUE)
ds1
- Generate appropriate descriptive summary statistics for all the variables in the data set.
#Some Descriptive Statistics:
#Getting the sum of food expense
sum(ds1$foodexpense)
[1] 316658
#Getting the maximum of food expense
max(ds1$foodexpense)
[1] 9659
#Getting the range of age
range(ds1$age)
[1] 18 60
#Getting the correlation of weight and food expense
cor(ds1$weight,ds1$foodexpense)
[1] 0.2392273
#Getting the mean of weight
mean(ds1$weight)
[1] 65.86
#Getting the median of age
median(ds1$age)
[1] 41.5
#Install the package and call the library 'e1071'
library(e1071)
#Skewness (symmetry of distribution) of weight
skewness(ds1$weight)
[1] 0.04235242
#The skewness of weight is 0.04235242. It indicates that the eruption duration distribution is skewed towards the right.
#Graph the density plot to visualize the distribution
plot(density(ds1$weight))

- Is there a possible pairwise linear relationship among the quantitative variables? Provide graphs and interpret the results.
#Getting the correlation of age and weight
cor(ds1$age,ds1$weight)
[1] -0.2155912
library("corrplot")
corr <- cor(ds1)
corr
patient age weight maritalstatus foodexpense
patient 1.00000000 0.040889543 0.1569736 -0.04097957 0.150323967
age 0.04088954 1.000000000 -0.2155912 -0.19204346 0.006795935
weight 0.15697358 -0.215591218 1.0000000 -0.14157948 0.239227259
maritalstatus -0.04097957 -0.192043456 -0.1415795 1.00000000 -0.103970135
foodexpense 0.15032397 0.006795935 0.2392273 -0.10397013 1.000000000
#Sample Interpretation: age vs weight
# Correlation = -0.2155912. There is a weak negative linear relationship between age and weight.
#Seven different visualization methods can be used : circle, square, ellipse, number, shade, color, pie.
#I used the method: color
corrplot(corr, method = c("color"))

B. Using the experiment data set experiment.csv:
Experiment: A study was conducted to determine the effect of 2 new feed formulation (1, 2) on the weight of eggs. Three species of ducks (A, B, C) were purposively selected for the study. The following data were generated.
| A-1 |
5.6 |
B-2 |
7.3 |
| A-1 |
5.8 |
B-2 |
7.1 |
| A-2 |
6.1 |
C-1 |
6.3 |
| A-2 |
6.3 |
C-1 |
6.2 |
| B-1 |
8.1 |
C-2 |
6.8 |
| B-1 |
8.2 |
C-2 |
6.9 |
- Load the data into the object ds2.
ds2 <- read.csv(file="C:\\Users\\USER\\Desktop\\2019 Predictive Analytics Course in R\\R Scripts\\PA Course 1\\experiment.csv")
ds2
- Generate some descriptive summary statistics for all the variables in the data set.
#Some Descriptive Statistics:
#Getting the sum of weight
sum(ds2$weight)
[1] 80.7
#Getting the maximum of weight
max(ds2$weight)
[1] 8.2
#Getting the range of weight
range(ds2$weight)
[1] 5.6 8.2
#Getting the mean of weight
mean(ds2$weight)
[1] 6.725
#Getting the median of weight
median(ds2$weight)
[1] 6.55
#Install the package and call the library 'e1071'
library(e1071)
#Skewness (symmetry of distribution) of weight
skewness(ds2$weight)
[1] 0.4674504
#The skewness of weight is 0.04235242. It indicates that the eruption duration distribution is skewed towards the right.
#Graph the density plot to visualize the distribution in terms of weight
plot(density(ds2$weight))

- Are the distributions of the weight similar across species?
#Install the package and call the library 'lattice'
library(lattice)
#Graph the density plot to visualize the distribution of the different species in terms of weight
densityplot(~weight, group = ds2$species, data = ds2, auto.key = TRUE)

- Are the distributions of the weight similar across feed formulations?
#Install the package and call the library 'lattice'
library(lattice)
#Graph the density plot to visualize the distribution of the different feed formulation in terms of weight
densityplot(~weight, group = ds2$feed, data = ds2, auto.key = TRUE)

LS0tDQp0aXRsZTogIlBBIENvdXJzZSAxIEV4ZXJjaXNlIDIgd2l0aCBBbnN3ZXIgS2V5Ig0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCioqKg0KIyNFWEVSQ0lTRSAyDQojIyNCQVNJQyBTVEFUSVNUSUNTIEFORCBEQVRBIFZJU1VBTElaQVRJT04gSU4gUg0KDQojIyMjSS4gT0JKRUNUSVZFUw0KDQpBdCB0aGUgZW5kIG9mIHRoaXMgZXhlcmNpc2UsIHRoZSBwYXJ0aWNpcGFudCBtdXN0IGJlIGFibGUgdG86ICANCg0KYS4gZ2VuZXJhdGUgYW5kIGludGVycHJldCBkZXNjcmlwdGl2ZSBzdGF0aXN0aWNzIGZvciB2YXJpYWJsZXMgYW5kICANCg0KYi4gZ2VuZXJhdGUgYW5kIGludGVycHJldCBncmFwaHMgYXBwcm9wcmlhdGUgdG8gdGhlIHR5cGUgb2YgZGF0YSBpbiBSLiAgDQoNCiMjIyNJSS4gTUVUSE9EUw0KDQpBLiBVc2luZyB0aGUgZGF0YSBzZXQgKipwYXRpZW50cy5jc3YqKjogIA0KDQoxLiBMb2FkIHRoZSBkYXRhIGludG8gdGhlIG9iamVjdCAqKmRzMSoqLiAgDQoNCjIuIEdlbmVyYXRlIGFwcHJvcHJpYXRlIGRlc2NyaXB0aXZlIHN1bW1hcnkgc3RhdGlzdGljcyBmb3IgYWxsIHRoZSB2YXJpYWJsZXMgaW4gdGhlIGRhdGEgc2V0LiAgDQoNCjMuIElzIHRoZXJlIGEgcG9zc2libGUgcGFpcndpc2UgbGluZWFyIHJlbGF0aW9uc2hpcCBhbW9uZyB0aGUgcXVhbnRpdGF0aXZlIHZhcmlhYmxlcz8gUHJvdmlkZSBncmFwaHMgYW5kIGludGVycHJldCB0aGUgcmVzdWx0cy4gIA0KDQpCLiBVc2luZyB0aGUgZXhwZXJpbWVudCBkYXRhIHNldCAqKmV4cGVyaW1lbnQuY3N2Kio6ICANCg0KKipFeHBlcmltZW50Kio6IEEgc3R1ZHkgd2FzIGNvbmR1Y3RlZCB0byBkZXRlcm1pbmUgdGhlIGVmZmVjdCBvZiAyIG5ldyBmZWVkIGZvcm11bGF0aW9uICgxLCAyKSBvbiB0aGUgd2VpZ2h0IG9mIGVnZ3MuIFRocmVlIHNwZWNpZXMgb2YgZHVja3MgKEEsIEIsIEMpIHdlcmUgcHVycG9zaXZlbHkgc2VsZWN0ZWQgZm9yIHRoZSBzdHVkeS4gVGhlIGZvbGxvd2luZyBkYXRhIHdlcmUgZ2VuZXJhdGVkLg0KDQpTcGVjaWVzLUZlZWR8V2VpZ2h0fFNwZWNpZXMtRmVlZHxXZWlnaHQNCi0tLXwtLS18LS0tfC0tLQ0KQS0xfDUuNnxCLTJ8Ny4zDQpBLTF8NS44fEItMnw3LjENCkEtMnw2LjF8Qy0xfDYuMw0KQS0yfDYuM3xDLTF8Ni4yDQpCLTF8OC4xfEMtMnw2LjgNCkItMXw4LjJ8Qy0yfDYuOQ0KDQoxLiBMb2FkIHRoZSBkYXRhIGludG8gdGhlIG9iamVjdCAqKmRzMioqLiAgDQoyLiBHZW5lcmF0ZSBzb21lIGRlc2NyaXB0aXZlIHN1bW1hcnkgc3RhdGlzdGljcyBmb3IgYWxsIHRoZSB2YXJpYWJsZXMgaW4gdGhlIGRhdGEgc2V0LiAgDQozLiBBcmUgdGhlIGRpc3RyaWJ1dGlvbnMgb2YgdGhlIHdlaWdodCBzaW1pbGFyIGFjcm9zcyBzcGVjaWVzPyAgDQo0LiBBcmUgdGhlIGRpc3RyaWJ1dGlvbnMgb2YgdGhlIHdlaWdodCBzaW1pbGFyIGFjcm9zcyBmZWVkIGZvcm11bGF0aW9ucz8gIA0KDQotLS0NCiMjRVhFUkNJU0UgMiBBTlNXRVIgS0VZDQoNCkEuIFVzaW5nIHRoZSBkYXRhIHNldCAqKnBhdGllbnRzLmNzdioqOiAgDQoNCjEuIExvYWQgdGhlIGRhdGEgaW50byB0aGUgb2JqZWN0ICoqZHMxKiouICANCmBgYHtyfQ0KI0tpbmRseSBjaGFuZ2UgdGhlIHdvcmtpbmcgZGlyZWN0b3J5IHdoZXJlIHlvdXIgcGF0aWVudHMuY3N2IGlzIGxvY2F0ZWQuIFRISVMgQVBQTElFUyBUTyBBTEwgJ3JlYWQuY3N2JyBhbmQgJ3dyaXRlLmNzdicNCmRzMSA8LSByZWFkLmNzdihmaWxlPSgiQzpcXFVzZXJzXFxVU0VSXFxEZXNrdG9wXFwyMDE5IFByZWRpY3RpdmUgQW5hbHl0aWNzIENvdXJzZSBpbiBSXFxSIFNjcmlwdHNcXFBBIENvdXJzZSAxXFxwYXRpZW50cy5jc3YiKSwgaGVhZGVyPVRSVUUpDQpkczENCmBgYA0KDQoyLiBHZW5lcmF0ZSBhcHByb3ByaWF0ZSBkZXNjcmlwdGl2ZSBzdW1tYXJ5IHN0YXRpc3RpY3MgZm9yIGFsbCB0aGUgdmFyaWFibGVzIGluIHRoZSBkYXRhIHNldC4gIA0KYGBge3J9DQojU29tZSBEZXNjcmlwdGl2ZSBTdGF0aXN0aWNzOg0KI0dldHRpbmcgdGhlIHN1bSBvZiBmb29kIGV4cGVuc2UNCnN1bShkczEkZm9vZGV4cGVuc2UpDQojR2V0dGluZyB0aGUgbWF4aW11bSBvZiBmb29kIGV4cGVuc2UNCm1heChkczEkZm9vZGV4cGVuc2UpDQojR2V0dGluZyB0aGUgcmFuZ2Ugb2YgYWdlDQpyYW5nZShkczEkYWdlKQ0KI0dldHRpbmcgdGhlIGNvcnJlbGF0aW9uIG9mIHdlaWdodCBhbmQgZm9vZCBleHBlbnNlDQpjb3IoZHMxJHdlaWdodCxkczEkZm9vZGV4cGVuc2UpDQojR2V0dGluZyB0aGUgbWVhbiBvZiB3ZWlnaHQNCm1lYW4oZHMxJHdlaWdodCkNCiNHZXR0aW5nIHRoZSBtZWRpYW4gb2YgYWdlDQptZWRpYW4oZHMxJGFnZSkNCiNJbnN0YWxsIHRoZSBwYWNrYWdlIGFuZCBjYWxsIHRoZSBsaWJyYXJ5ICdlMTA3MScNCmxpYnJhcnkoZTEwNzEpDQojU2tld25lc3MgKHN5bW1ldHJ5IG9mIGRpc3RyaWJ1dGlvbikgb2Ygd2VpZ2h0DQpza2V3bmVzcyhkczEkd2VpZ2h0KQ0KI1RoZSBza2V3bmVzcyBvZiB3ZWlnaHQgaXMgMC4wNDIzNTI0Mi4gSXQgaW5kaWNhdGVzIHRoYXQgdGhlIGVydXB0aW9uIGR1cmF0aW9uIGRpc3RyaWJ1dGlvbiBpcyBza2V3ZWQgdG93YXJkcyB0aGUgcmlnaHQuDQpgYGANCmBgYHtyfQ0KI0dyYXBoIHRoZSBkZW5zaXR5IHBsb3QgdG8gdmlzdWFsaXplIHRoZSBkaXN0cmlidXRpb24NCnBsb3QoZGVuc2l0eShkczEkd2VpZ2h0KSkNCmBgYA0KDQozLiBJcyB0aGVyZSBhIHBvc3NpYmxlIHBhaXJ3aXNlIGxpbmVhciByZWxhdGlvbnNoaXAgYW1vbmcgdGhlIHF1YW50aXRhdGl2ZSB2YXJpYWJsZXM/IFByb3ZpZGUgZ3JhcGhzIGFuZCBpbnRlcnByZXQgdGhlIHJlc3VsdHMuICANCmBgYHtyfQ0KI0dldHRpbmcgdGhlIGNvcnJlbGF0aW9uIG9mIGFnZSBhbmQgd2VpZ2h0DQpjb3IoZHMxJGFnZSxkczEkd2VpZ2h0KQ0KDQpsaWJyYXJ5KCJjb3JycGxvdCIpDQpjb3JyIDwtIGNvcihkczEpDQpjb3JyDQojU2FtcGxlIEludGVycHJldGF0aW9uOiBhZ2UgdnMgd2VpZ2h0DQojIENvcnJlbGF0aW9uID0gLTAuMjE1NTkxMi4gVGhlcmUgaXMgYSB3ZWFrIG5lZ2F0aXZlIGxpbmVhciByZWxhdGlvbnNoaXAgYmV0d2VlbiBhZ2UgYW5kIHdlaWdodC4NCmBgYA0KYGBge3J9DQojU2V2ZW4gZGlmZmVyZW50IHZpc3VhbGl6YXRpb24gbWV0aG9kcyBjYW4gYmUgdXNlZCA6ICJjaXJjbGUiLCAic3F1YXJlIiwgImVsbGlwc2UiLCAibnVtYmVyIiwgInNoYWRlIiwgImNvbG9yIiwgInBpZSIuDQojSSB1c2VkIHRoZSBtZXRob2Q6IGNvbG9yDQpjb3JycGxvdChjb3JyLCBtZXRob2QgPSBjKCJjb2xvciIpKQ0KYGBgDQoNCkIuIFVzaW5nIHRoZSBleHBlcmltZW50IGRhdGEgc2V0ICoqZXhwZXJpbWVudC5jc3YqKjogIA0KDQoqKkV4cGVyaW1lbnQqKjogQSBzdHVkeSB3YXMgY29uZHVjdGVkIHRvIGRldGVybWluZSB0aGUgZWZmZWN0IG9mIDIgbmV3IGZlZWQgZm9ybXVsYXRpb24gKDEsIDIpIG9uIHRoZSB3ZWlnaHQgb2YgZWdncy4gVGhyZWUgc3BlY2llcyBvZiBkdWNrcyAoQSwgQiwgQykgd2VyZSBwdXJwb3NpdmVseSBzZWxlY3RlZCBmb3IgdGhlIHN0dWR5LiBUaGUgZm9sbG93aW5nIGRhdGEgd2VyZSBnZW5lcmF0ZWQuDQoNClNwZWNpZXMtRmVlZHxXZWlnaHR8U3BlY2llcy1GZWVkfFdlaWdodA0KLS0tfC0tLXwtLS18LS0tDQpBLTF8NS42fEItMnw3LjMNCkEtMXw1Ljh8Qi0yfDcuMQ0KQS0yfDYuMXxDLTF8Ni4zDQpBLTJ8Ni4zfEMtMXw2LjINCkItMXw4LjF8Qy0yfDYuOA0KQi0xfDguMnxDLTJ8Ni45DQoNCjEuIExvYWQgdGhlIGRhdGEgaW50byB0aGUgb2JqZWN0ICoqZHMyKiouDQpgYGB7cn0NCmRzMiA8LSByZWFkLmNzdihmaWxlPSJDOlxcVXNlcnNcXFVTRVJcXERlc2t0b3BcXDIwMTkgUHJlZGljdGl2ZSBBbmFseXRpY3MgQ291cnNlIGluIFJcXFIgU2NyaXB0c1xcUEEgQ291cnNlIDFcXGV4cGVyaW1lbnQuY3N2IikNCmRzMg0KYGBgDQoNCjIuIEdlbmVyYXRlIHNvbWUgZGVzY3JpcHRpdmUgc3VtbWFyeSBzdGF0aXN0aWNzIGZvciBhbGwgdGhlIHZhcmlhYmxlcyBpbiB0aGUgZGF0YSBzZXQuICANCmBgYHtyfQ0KI1NvbWUgRGVzY3JpcHRpdmUgU3RhdGlzdGljczoNCiNHZXR0aW5nIHRoZSBzdW0gb2Ygd2VpZ2h0DQpzdW0oZHMyJHdlaWdodCkNCiNHZXR0aW5nIHRoZSBtYXhpbXVtIG9mIHdlaWdodA0KbWF4KGRzMiR3ZWlnaHQpDQojR2V0dGluZyB0aGUgcmFuZ2Ugb2Ygd2VpZ2h0DQpyYW5nZShkczIkd2VpZ2h0KQ0KI0dldHRpbmcgdGhlIG1lYW4gb2Ygd2VpZ2h0DQptZWFuKGRzMiR3ZWlnaHQpDQojR2V0dGluZyB0aGUgbWVkaWFuIG9mIHdlaWdodA0KbWVkaWFuKGRzMiR3ZWlnaHQpDQojSW5zdGFsbCB0aGUgcGFja2FnZSBhbmQgY2FsbCB0aGUgbGlicmFyeSAnZTEwNzEnDQpsaWJyYXJ5KGUxMDcxKQ0KI1NrZXduZXNzIChzeW1tZXRyeSBvZiBkaXN0cmlidXRpb24pIG9mIHdlaWdodA0Kc2tld25lc3MoZHMyJHdlaWdodCkNCiNUaGUgc2tld25lc3Mgb2Ygd2VpZ2h0IGlzIDAuNDY3NDUwNC4gSXQgaW5kaWNhdGVzIHRoYXQgdGhlIHdlaWdodCBkaXN0cmlidXRpb24gaXMgc2tld2VkIHRvd2FyZHMgdGhlIHJpZ2h0Lg0KDQpgYGANCg0KYGBge3J9DQojR3JhcGggdGhlIGRlbnNpdHkgcGxvdCB0byB2aXN1YWxpemUgdGhlIGRpc3RyaWJ1dGlvbiBpbiB0ZXJtcyBvZiB3ZWlnaHQNCnBsb3QoZGVuc2l0eShkczIkd2VpZ2h0KSkNCmBgYA0KDQozLiBBcmUgdGhlIGRpc3RyaWJ1dGlvbnMgb2YgdGhlIHdlaWdodCBzaW1pbGFyIGFjcm9zcyBzcGVjaWVzPyAgDQpgYGB7cn0NCiNJbnN0YWxsIHRoZSBwYWNrYWdlIGFuZCBjYWxsIHRoZSBsaWJyYXJ5ICdsYXR0aWNlJw0KbGlicmFyeShsYXR0aWNlKQ0KI0dyYXBoIHRoZSBkZW5zaXR5IHBsb3QgdG8gdmlzdWFsaXplIHRoZSBkaXN0cmlidXRpb24gb2YgdGhlIGRpZmZlcmVudCBzcGVjaWVzIGluIHRlcm1zIG9mIHdlaWdodA0KZGVuc2l0eXBsb3QofndlaWdodCwgZ3JvdXAgPSBkczIkc3BlY2llcywgZGF0YSA9IGRzMiwgYXV0by5rZXkgPSBUUlVFKQ0KYGBgDQoNCjQuIEFyZSB0aGUgZGlzdHJpYnV0aW9ucyBvZiB0aGUgd2VpZ2h0IHNpbWlsYXIgYWNyb3NzIGZlZWQgZm9ybXVsYXRpb25zPyAgDQpgYGB7cn0NCiNJbnN0YWxsIHRoZSBwYWNrYWdlIGFuZCBjYWxsIHRoZSBsaWJyYXJ5ICdsYXR0aWNlJw0KbGlicmFyeShsYXR0aWNlKQ0KI0dyYXBoIHRoZSBkZW5zaXR5IHBsb3QgdG8gdmlzdWFsaXplIHRoZSBkaXN0cmlidXRpb24gb2YgdGhlIGRpZmZlcmVudCBmZWVkIGZvcm11bGF0aW9uIGluIHRlcm1zIG9mIHdlaWdodA0KZGVuc2l0eXBsb3QofndlaWdodCwgZ3JvdXAgPSBkczIkZmVlZCwgZGF0YSA9IGRzMiwgYXV0by5rZXkgPSBUUlVFKQ0KYGBgDQoNCg==