ansur_men$bmi <- (ansur_men$weightkg/10)/(ansur_men$stature/1000)**2
ansur_women$bmi <- (ansur_women$weightkg/10)/(ansur_women$stature/1000)**2
summary(ansur_men$bmi)
Min. 1st Qu. Median Mean 3rd Qu. Max.
15.35 24.87 27.46 27.69 30.29 43.45
summary(ansur_women$bmi)
Min. 1st Qu. Median Mean 3rd Qu. Max.
16.37 23.08 25.27 25.50 27.61 40.78
merge(ansur_men, ansur_women, all=T)
length(names(ansur_women)) == length(names(ansur_men))
[1] TRUE
nrow(ansur) == nrow(ansur_men) + nrow(ansur_women)
[1] TRUE
summary(ansur_men[,1:10])
subjectid abdominalextensiondepthsitting acromialheight acromionradialelength anklecircumference axillaheight balloffootcircumference balloffootlength biacromialbreadth bicepscircumferenceflexed
Min. :10027 Min. :163.0 Min. :1194 Min. :270.0 Min. :156.0 Min. :1106 Min. :186 Min. :156.0 Min. :337.0 Min. :246.0
1st Qu.:14270 1st Qu.:227.0 1st Qu.:1398 1st Qu.:324.0 1st Qu.:219.2 1st Qu.:1289 1st Qu.:243 1st Qu.:194.0 1st Qu.:403.0 1st Qu.:335.0
Median :17237 Median :251.0 Median :1439 Median :335.0 Median :228.0 Median :1328 Median :252 Median :201.0 Median :415.0 Median :357.0
Mean :20003 Mean :254.7 Mean :1441 Mean :335.2 Mean :229.3 Mean :1329 Mean :252 Mean :200.9 Mean :415.7 Mean :358.1
3rd Qu.:27316 3rd Qu.:279.0 3rd Qu.:1481 3rd Qu.:346.0 3rd Qu.:239.0 3rd Qu.:1367 3rd Qu.:261 3rd Qu.:208.0 3rd Qu.:428.0 3rd Qu.:380.0
Max. :29452 Max. :451.0 Max. :1683 Max. :393.0 Max. :293.0 Max. :1553 Max. :306 Max. :245.0 Max. :489.0 Max. :490.0
sapply(ansur_men[c("stature", "sittingheight", "weightkg","bmi")], mean)
stature sittingheight weightkg bmi
1756.21460 918.28981 855.23959 27.68631
sapply(ansur_women[c("stature", "sittingheight", "weightkg","bmi")], mean)
stature sittingheight weightkg bmi
1628.47331 856.62588 677.58207 25.49597
sapply(ansur_men[c("stature", "sittingheight", "weightkg","bmi")], sd)
stature sittingheight weightkg bmi
68.550788 35.698183 142.206992 4.039462
sapply(ansur_women[c("stature", "sittingheight", "weightkg","bmi")], sd)
stature sittingheight weightkg bmi
64.199512 33.054242 109.846458 3.491673
#Compute the following quantiles for stature, sittingheight, weightkg, and BMI for women: 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99
sapply(ansur_women[c("stature", "sittingheight", "weightkg","bmi")], quantile, probs = c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99))
stature sittingheight weightkg bmi
1% 1482.55 784.00 466.55 18.80290
5% 1525.00 803.00 513.00 20.25971
10% 1547.00 813.00 546.00 21.28893
25% 1586.25 834.00 601.00 23.07754
50% 1626.00 857.00 668.00 25.26675
75% 1672.00 879.00 745.75 27.60936
90% 1713.00 900.00 824.00 29.91074
95% 1740.00 912.00 870.75 31.65882
99% 1777.60 932.15 974.50 34.81696
#Compute the following quantiles for stature, sittingheight, weightkg, and BMI for women: 0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99
# compute some quantiles
qlist_stature <- c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99)
qvals_stature <- quantile(ansur_women$stature, qlist_stature)
women_stature_mean <- mean(ansur_women$stature)
women_stature_sd <- sd(ansur_women$stature)
par(las=1, bty='n')
with(ansur_women, plot(density(stature), main="women", xlab="Stature (mm)"))
# add the quantiles to the plot
abline(v=qvals_stature)
text(qvals_stature, sapply(qvals_stature,toString), y=c(seq(0.001,0.004, 0.001), seq(0.003, 0.001, -0.001)) )
curve(dnorm(x, mean=women_stature_mean, sd=women_stature_sd),
col="darkblue", lwd=2, add=TRUE, yaxt="n")
# and as a bonus let's add the quantiles from the normal approximation
qvals_women_norm <- qnorm(qlist, mean=women_stature_mean, sd=women_stature_sd)
abline(v=qvals_women_norm, col='blue')
text(qvals_stature, sapply(qvals_women_norm,function(x) substr(toString(x), 1, 4)), y=0.0015+c(seq(0.001,0.004, 0.001), seq(0.003, 0.001, -0.001)), col='blue' )

# compute some quantiles
qlist_sittingheight <- c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99)
qvals_sittingheight <- quantile(ansur_women$sittingheight, qlist_sittingheight)
women_sittingheight_mean <- mean(ansur_women$sittingheight)
women_sittingheight_sd <- sd(ansur_women$sittingheight)
par(las=1, bty='n')
with(ansur_women, plot(density(sittingheight), main="women", xlab="sittingheight (mm)"))
# add the quantiles to the plot
abline(v=qvals_sittingheight)
text(qvals_sittingheight, sapply(qvals_sittingheight,toString), y=c(seq(0.001,0.004, 0.001), seq(0.003, 0.001, -0.001)) )
curve(dnorm(x, mean=women_sittingheight_mean, sd=women_sittingheight_sd),
col="darkorange", lwd=2, add=TRUE, yaxt="n")
# and as a bonus let's add the quantiles from the normal approximation
qvals_women_norm <- qnorm(qlist, mean=women_sittingheight_mean, sd=women_sittingheight_sd)
abline(v=qvals_women_norm, col='darkorange')
text(qvals_sittingheight, sapply(qvals_women_norm,function(x) substr(toString(x), 1, 4)), y=0.0015+c(seq(0.001,0.004, 0.001), seq(0.003, 0.001, -0.001)), col='darkorange' )

# compute some quantiles
qlist_weightkg <- c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99)
qvals_weightkg <- quantile(ansur_women$weightkg, qlist_weightkg)
women_weightkg_mean <- mean(ansur_women$weightkg)
women_weightkg_sd <- sd(ansur_women$weightkg)
par(las=1, bty='n')
with(ansur_women, plot(density(weightkg), main="women", xlab="weight (kg)"))
# add the quantiles to the plot
abline(v=qvals_weightkg)
text(qvals_weightkg, sapply(qvals_weightkg,toString), y=c(seq(0.001,0.004, 0.001), seq(0.003, 0.001, -0.001)) )
curve(dnorm(x, mean=women_weightkg_mean, sd=women_weightkg_sd),
col="red", lwd=2, add=TRUE, yaxt="n")
# and as a bonus let's add the quantiles from the normal approximation
qvals_women_norm <- qnorm(qlist, mean=women_weightkg_mean, sd=women_weightkg_sd)
abline(v=qvals_women_norm, col='red')
text(qvals_weightkg, sapply(qvals_women_norm,function(x) substr(toString(x), 1, 4)), y=0.0015+c(seq(0.001,0.004, 0.001), seq(0.003, 0.001, -0.001)), col='red' )

qlist_bmi <- c(0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99)
qvals_bmi <- quantile(ansur_women$bmi, qlist_bmi)
women_bmi_mean <- mean(ansur_women$bmi)
women_stature_sd <- sd(ansur_women$bmi)
par(las=1, bty='n')
with(ansur_women, plot(density(bmi), main="women", xlab="bmi (kg/m2)"))
abline(v=qvals_bmi)
text(qvals_bmi, sapply(qvals_bmi,toString), y=c(seq(0.01,0.04, 0.01), seq(0.03, 0.01, -0.01)) )
curve(dnorm(x, mean=women_bmi_mean, sd=women_bmi_sd),
col="red", lwd=2, add=TRUE, yaxt="n")
qvals_women_norm <- qnorm(qlist, mean=women_bmi_mean, sd=women_bmi_sd)
abline(v=qvals_women_norm, col='red')
text(qvals_bmi, sapply(qvals_women_norm,function(x) substr(toString(x), 1, 4)), y=0.015+c(seq(0.01,0.04, 0.01), seq(0.03, 0.01, -0.01)), col='red' )

##### Answer for question 2:
# ranked from the closest to normally distributed by inspecting the plots are:
#1- setting height
#2- stature
#3- BMI
#4- Weight (kg)
#Make a plot of knee height sitting vs buttock-knee length for men.
#Add lines showing the 95th%ile of each variable.DONE
#What percentage of men in this dataset have both knee height and buttock-knee length less than the respective 95th%iles? Hints: If v is a vector of boolean (True/False) values, mean(v) returns the fraction that are true (sum(v) returns the number that are true). The ampersand (&) is “and” in conditionals.
plot(ansur_men$kneeheightsitting,ansur_men$buttockkneelength, pch=20, main=paste("r =", cor(ansur_men$kneeheightsitting, ansur_men$buttockkneelength)))
abline(h=quantile(ansur_men$buttockkneelength, 0.95))
abline(v=quantile(ansur_men$kneeheightsitting, 0.95))

NA
NA
NA
vec <- ansur_men$kneeheightsitting < quantile(ansur_men$kneeheightsitting, 0.95) & ansur_men$buttockkneelength < quantile(ansur_men$buttockkneelength, 0.95)
mean(vec)
[1] 0.9272415
# In this case, with a correlation between the variables of 0.84, the percentage accommodated on both variables is only slightly smaller than the univariate accommodation (0.93 vs. 0.95).
#Repeat this analysis with buttock-politeal length and hip breadth for women, except assess the number of women who have buttock popliteal length greater than the 5th percentile and hip breadth sitting less than the 95th percentile. Is this the value you expect? Why is the fraction of the population who meet the criteria different for this analysis than for the preceding male analysis? Hints: look at the quadrants of the plots; what are the correlations between the variables?
par(las=1, bty='n')
plot(ansur_women$buttockpopliteallength, ansur_women$hipbreadthsitting, pch=20, main=paste("r =", cor(ansur_women$buttockpopliteallength, ansur_women$hipbreadthsitting)))
abline(v=quantile(ansur_women$buttockpopliteallength, 0.05))
abline(h=quantile(ansur_women$hipbreadthsitting, 0.95))

#Why is the fraction of the population who meet the criteria different for this analysis than for the preceding male analysis? Hints: look at the quadrants of the plots; what are the correlations between the variables?
# Answer: by looking at the plot, especially at the corner, we see women disacommodated on the both variables are fully seperate, no omwen in the women poplulation is disacoomodated on both. Therefore, the correlation is weak.
male_means <- apply(a2m[,7:ncol(a2m)], 2, mean)
female_means <- apply(a2f[,7:ncol(a2f)], 2, mean)
mean_diffs <- male_means-female_means
Warning in male_means - female_means :
longer object length is not a multiple of shorter object length
which.max(mean_diffs)
weightkg
86
which.min(mean_diffs)
hipbreadthsitting
46
difference <- mean_diffs/(male_means+female_means)
Warning in male_means + female_means :
longer object length is not a multiple of shorter object length
which.max(difference)
Weightlbs
91
which.min(difference)
hipbreadthsitting
46
#######Answer for question 4:
#if you were to pick a single dimension by which to classify a soldier as male or female, which should you pick? Which would be the worst choice?
## According to the results, the smallest fraction is the hip breadth sitting, therefore it is the best distinguish between females and males.
#The weight in Kg is the largest fraction between mean and women, therefore its not suitable as a distinguish.
# I am surprised about the results, because I thought the weight could be one of the largest different between men and women.
male_hipbreadthlength_sd <- sd(ansur_men$hipbreadthsitting)
female_hipbreadthlsitting_mean <- mean(ansur_men$hipbreadthsitting)
female_hipbreadthlength_sd <- sd(ansur_men$hipbreadthsitting)
male_hipbreadthsitting_mean <- mean(ansur_men$hipbreadthsitting)
