Unfortunately, I forgot where I found this dataset so that’s bad luck for anyone who wants the dataset
library(readr)
BodyFat <- read_csv("C:/Documents/My Excel/BodyFat.csv")
## Warning: Missing column names filled in: 'X18' [18]
## Parsed with column specification:
## cols(
## IDNO = col_integer(),
## BODYFAT = col_double(),
## DENSITY = col_double(),
## AGE = col_integer(),
## WEIGHT = col_double(),
## HEIGHT = col_double(),
## ADIPOSITY = col_double(),
## NECK = col_double(),
## CHEST = col_double(),
## ABDOMEN = col_double(),
## HIP = col_double(),
## THIGH = col_double(),
## KNEE = col_double(),
## ANKLE = col_double(),
## BICEPS = col_double(),
## FOREARM = col_double(),
## WRIST = col_double(),
## X18 = col_character()
## )
BodyFat$X18<-NULL #This drops the unknown column called "X18"
View(BodyFat)
print(str(BodyFat))
## Classes 'tbl_df', 'tbl' and 'data.frame': 252 obs. of 17 variables:
## $ IDNO : int 1 2 3 4 5 6 7 8 9 10 ...
## $ BODYFAT : num 12.6 6.9 24.6 10.9 27.8 20.6 19 12.8 5.1 12 ...
## $ DENSITY : num 1.07 1.09 1.04 1.08 1.03 ...
## $ AGE : int 23 22 22 26 24 24 26 25 25 23 ...
## $ WEIGHT : num 154 173 154 185 184 ...
## $ HEIGHT : num 67.8 72.2 66.2 72.2 71.2 ...
## $ ADIPOSITY: num 23.7 23.4 24.7 24.9 25.6 26.5 26.2 23.6 24.6 25.8 ...
## $ NECK : num 36.2 38.5 34 37.4 34.4 39 36.4 37.8 38.1 42.1 ...
## $ CHEST : num 93.1 93.6 95.8 101.8 97.3 ...
## $ ABDOMEN : num 85.2 83 87.9 86.4 100 94.4 90.7 88.5 82.5 88.6 ...
## $ HIP : num 94.5 98.7 99.2 101.2 101.9 ...
## $ THIGH : num 59 58.7 59.6 60.1 63.2 66 58.4 60 62.9 63.1 ...
## $ KNEE : num 37.3 37.3 38.9 37.3 42.2 42 38.3 39.4 38.3 41.7 ...
## $ ANKLE : num 21.9 23.4 24 22.8 24 25.6 22.9 23.2 23.8 25 ...
## $ BICEPS : num 32 30.5 28.8 32.4 32.2 35.7 31.9 30.5 35.9 35.6 ...
## $ FOREARM : num 27.4 28.9 25.2 29.4 27.7 30.6 27.8 29 31.1 30 ...
## $ WRIST : num 17.1 18.2 16.6 18.2 17.7 18.8 17.7 18.8 18.2 19.2 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 18
## .. ..$ IDNO : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ BODYFAT : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ DENSITY : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ AGE : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ WEIGHT : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ HEIGHT : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ ADIPOSITY: list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ NECK : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ CHEST : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ ABDOMEN : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ HIP : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ THIGH : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ KNEE : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ ANKLE : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ BICEPS : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ FOREARM : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ WRIST : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ X18 : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
## NULL
summary(BodyFat)
## IDNO BODYFAT DENSITY AGE
## Min. : 1.00 Min. : 0.00 Min. :0.995 Min. :22.00
## 1st Qu.: 63.75 1st Qu.:12.80 1st Qu.:1.041 1st Qu.:35.75
## Median :126.50 Median :19.00 Median :1.055 Median :43.00
## Mean :126.50 Mean :18.94 Mean :1.056 Mean :44.88
## 3rd Qu.:189.25 3rd Qu.:24.60 3rd Qu.:1.070 3rd Qu.:54.00
## Max. :252.00 Max. :45.10 Max. :1.109 Max. :81.00
## WEIGHT HEIGHT ADIPOSITY NECK
## Min. :118.5 Min. :29.50 Min. :18.10 Min. :31.10
## 1st Qu.:159.0 1st Qu.:68.25 1st Qu.:23.10 1st Qu.:36.40
## Median :176.5 Median :70.00 Median :25.05 Median :38.00
## Mean :178.9 Mean :70.15 Mean :25.44 Mean :37.99
## 3rd Qu.:197.0 3rd Qu.:72.25 3rd Qu.:27.32 3rd Qu.:39.42
## Max. :363.1 Max. :77.75 Max. :48.90 Max. :51.20
## CHEST ABDOMEN HIP THIGH
## Min. : 79.30 Min. : 69.40 Min. : 85.0 Min. :47.20
## 1st Qu.: 94.35 1st Qu.: 84.58 1st Qu.: 95.5 1st Qu.:56.00
## Median : 99.65 Median : 90.95 Median : 99.3 Median :59.00
## Mean :100.82 Mean : 92.56 Mean : 99.9 Mean :59.41
## 3rd Qu.:105.38 3rd Qu.: 99.33 3rd Qu.:103.5 3rd Qu.:62.35
## Max. :136.20 Max. :148.10 Max. :147.7 Max. :87.30
## KNEE ANKLE BICEPS FOREARM
## Min. :33.00 Min. :19.1 Min. :24.80 Min. :21.00
## 1st Qu.:36.98 1st Qu.:22.0 1st Qu.:30.20 1st Qu.:27.30
## Median :38.50 Median :22.8 Median :32.05 Median :28.70
## Mean :38.59 Mean :23.1 Mean :32.27 Mean :28.66
## 3rd Qu.:39.92 3rd Qu.:24.0 3rd Qu.:34.33 3rd Qu.:30.00
## Max. :49.10 Max. :33.9 Max. :45.00 Max. :34.90
## WRIST
## Min. :15.80
## 1st Qu.:17.60
## Median :18.30
## Mean :18.23
## 3rd Qu.:18.80
## Max. :21.40
length(unique(BodyFat$AGE))
## [1] 51
unique(BodyFat$AGE)
## [1] 23 22 26 24 25 27 32 30 35 34 28 33 31 29 41 49 40 50 46 45 44 48 39
## [24] 43 47 51 42 54 58 62 61 56 57 55 69 81 66 67 64 70 72 53 38 52 36 37
## [47] 60 63 65 68 74
Looking at the graphs, there is some upward correlation between age and bodyfat, but the data is mostly scattered all over the place. There is some correlation but not so much since various people of various ages can have different bodyfat levels depending on their diet and health
library(ggplot2)
ggplot(BodyFat,aes(AGE,BODYFAT))+geom_point(aes(x=AGE,y=BODYFAT,color=BODYFAT,size=AGE))
ggplot(BodyFat,aes(AGE,BODYFAT))+geom_jitter() #There is some correlation b/t age and bodyfat
No correlation whatsoever. This can tell me that age has nothing do do with weight.
ggplot(BodyFat,aes(AGE,WEIGHT))+geom_jitter(aes(X=AGE,y=WEIGHT,color=WEIGHT,size=AGE))
## Warning: Ignoring unknown aesthetics: X
ggplot(BodyFat,aes(AGE,WEIGHT))+geom_jitter() #No correlation b/t age and weight
ggplot(BodyFat) + geom_density(aes(WEIGHT))
From the density graph shown above, most of the people weigh around 150lbs
There is a strong correlation between height and weight as shown by these 2 graphs
ggplot(BodyFat,aes(WEIGHT,HEIGHT))+geom_jitter(aes(X=WEIGHT,y=HEIGHT,color=WEIGHT,size=HEIGHT))
## Warning: Ignoring unknown aesthetics: X
ggplot(BodyFat,aes(WEIGHT,HEIGHT))+geom_jitter()
BodyFat[order(BodyFat$BODYFAT,decreasing=T)[1:5],] #What can I say about this?
## # A tibble: 5 x 17
## IDNO BODYFAT DENSITY AGE WEIGHT HEIGHT ADIPOSITY NECK CHEST ABDOMEN
## <int> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 216 45.1 0.995 51 219 64 37.6 41.2 120. 122.
## 2 36 38.2 1.01 49 192. 65 32 38.4 118. 113.
## 3 192 36.5 1.01 42 244. 76 29.8 41.8 115. 114.
## 4 169 34.7 1.02 35 228. 69.5 33.3 40.4 115. 116.
## 5 39 33.8 1.02 46 363. 72.2 48.9 51.2 136. 148.
## # ... with 7 more variables: HIP <dbl>, THIGH <dbl>, KNEE <dbl>,
## # ANKLE <dbl>, BICEPS <dbl>, FOREARM <dbl>, WRIST <dbl>
BodyFat[order(BodyFat$BODYFAT,decreasing=F)[1:5],] #What can I say about this?
## # A tibble: 5 x 17
## IDNO BODYFAT DENSITY AGE WEIGHT HEIGHT ADIPOSITY NECK CHEST ABDOMEN
## <int> <dbl> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 182 0 1.11 40 118. 68 18.1 33.8 79.3 69.4
## 2 172 1.9 1.10 35 126. 65.5 20.6 34 90.8 75
## 3 171 4.1 1.09 35 152. 67.8 23.4 37 92.2 81.9
## 4 26 4.6 1.09 27 159. 71.5 21.9 35.7 89.6 79.7
## 5 29 4.7 1.09 27 133. 64.8 22.4 36.4 93.5 73.9
## # ... with 7 more variables: HIP <dbl>, THIGH <dbl>, KNEE <dbl>,
## # ANKLE <dbl>, BICEPS <dbl>, FOREARM <dbl>, WRIST <dbl>
BfvsAge<-lm(BODYFAT~AGE,data=BodyFat)
summary(BfvsAge) #Does the R2 support and conclude the graph above
##
## Call:
## lm(formula = BODYFAT ~ AGE, data = BodyFat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.0697 -5.7025 0.2846 4.8301 25.0739
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.95546 1.73576 6.312 1.25e-09 ***
## AGE 0.17786 0.03724 4.776 3.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.435 on 250 degrees of freedom
## Multiple R-squared: 0.08362, Adjusted R-squared: 0.07996
## F-statistic: 22.81 on 1 and 250 DF, p-value: 3.045e-06
BfvsWeight<-lm(BODYFAT~WEIGHT,data=BodyFat)
summary(BfvsWeight)
##
## Call:
## lm(formula = BODYFAT ~ WEIGHT, data = BodyFat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.434 -4.315 0.079 4.540 19.681
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.99515 2.38906 -4.184 3.97e-05 ***
## WEIGHT 0.16171 0.01318 12.273 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.135 on 250 degrees of freedom
## Multiple R-squared: 0.376, Adjusted R-squared: 0.3735
## F-statistic: 150.6 on 1 and 250 DF, p-value: < 2.2e-16
BfvsHeight<-lm(BODYFAT~HEIGHT,data=BodyFat)
summary(BfvsHeight) #Explain why the R2 is too low
##
## Call:
## lm(formula = BODYFAT ~ HEIGHT, data = BodyFat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -19.3437 -6.1422 0.4013 5.5710 25.0021
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.1654 9.3635 3.435 0.000693 ***
## HEIGHT -0.1886 0.1333 -1.415 0.158453
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.735 on 250 degrees of freedom
## Multiple R-squared: 0.00794, Adjusted R-squared: 0.003972
## F-statistic: 2.001 on 1 and 250 DF, p-value: 0.1585
So I calculated the 3 regressions shown above where bodyfat(dependant variable) is compared against Age, Weight, and height, all 3 of which are independant variables. For bodyfat vs Age and Height. I found that the \(R^2<1 percent\) which tells me that Age and height has nothing do do with bodyfat. This is true due to many factors such as diet and excercise.
Now for bodyfat vs weight. The \(R^2\) was around 37 percent, which was much higher than Age and Height. What this is telling me is that some of the patients are heavy due to being fat, and some are heavy due to having lots of muscle. Therefore this model is a little accurate, but not so much.
BfvsALL<-lm(BODYFAT~DENSITY+AGE+WEIGHT+HEIGHT+ADIPOSITY+NECK+CHEST+ABDOMEN+HIP+THIGH+
KNEE+ANKLE+BICEPS+FOREARM+WRIST,data=BodyFat)
summary(BfvsALL)
##
## Call:
## lm(formula = BODYFAT ~ DENSITY + AGE + WEIGHT + HEIGHT + ADIPOSITY +
## NECK + CHEST + ABDOMEN + HIP + THIGH + KNEE + ANKLE + BICEPS +
## FOREARM + WRIST, data = BodyFat)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.7632 -0.3308 -0.0954 0.2078 13.9487
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.190e+02 9.802e+00 42.750 <2e-16 ***
## DENSITY -3.816e+02 7.559e+00 -50.481 <2e-16 ***
## AGE 1.078e-02 8.808e-03 1.224 0.222
## WEIGHT 1.197e-02 1.467e-02 0.816 0.415
## HEIGHT -1.782e-02 3.019e-02 -0.590 0.556
## ADIPOSITY -5.493e-02 8.113e-02 -0.677 0.499
## NECK -2.062e-02 6.427e-02 -0.321 0.749
## CHEST 2.993e-02 2.856e-02 1.048 0.296
## ABDOMEN 2.260e-02 3.016e-02 0.749 0.454
## HIP 1.611e-02 4.023e-02 0.401 0.689
## THIGH 1.354e-03 3.980e-02 0.034 0.973
## KNEE -3.978e-02 6.705e-02 -0.593 0.554
## ANKLE -7.170e-02 6.073e-02 -1.181 0.239
## BICEPS -6.291e-02 4.688e-02 -1.342 0.181
## FOREARM 4.324e-02 5.447e-02 0.794 0.428
## WRIST 3.640e-02 1.480e-01 0.246 0.806
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.166 on 236 degrees of freedom
## Multiple R-squared: 0.9787, Adjusted R-squared: 0.9774
## F-statistic: 723.9 on 15 and 236 DF, p-value: < 2.2e-16
From this regression, I got a \(R^2\) value of 97.74%. This model is extremely accurate and it appears that bodyfat is dependant on all the other independant variables/measurements.