library(lessR)
##
## lessR 4.3.9 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read text, Excel, SPSS, SAS, or R data file
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:base':
##
## sort_by
library(table1)
##
## Attaching package: 'table1'
## The following object is masked from 'package:lessR':
##
## label
## The following objects are masked from 'package:base':
##
## units, units<-
ob = Read("/Users/121493/Dropbox/_Conferences and Workshops/SiS Lectures 1-2025/Data/Obesity data.csv")
##
## >>> Suggestions
## Recommended binary format for data files: feather
## Create with Write(d, "your_file", format="feather")
## To read a csv or Excel file of variable labelsvar_labels=TRUE
## Each row of the file: Variable Name, Variable Label
## Read into a data frame named l (the letter el)
##
## More details about your data, Enter: details() for d, or details(name)
##
## Data Types
## ------------------------------------------------------------
## character: Non-numeric data values
## integer: Numeric data values, integers only
## double: Numeric data values with decimal digits
## ------------------------------------------------------------
##
## Variable Missing Unique
## Name Type Values Values Values First and last values
## ------------------------------------------------------------------------------------------
## 1 id integer 1217 0 1217 1 2 3 ... 1225 1226 1227
## 2 gender character 1217 0 2 F M F ... F F F
## 3 height integer 1217 0 48 150 165 157 ... 149 144 141
## 4 weight integer 1217 0 55 49 52 57 ... 50 49 45
## 5 bmi double 1217 0 149 21.8 19.1 23.1 ... 22.5 23.6 22.6
## 6 age integer 1217 0 74 53 65 64 ... 57 67 58
## 7 bmc integer 1217 0 797 1312 1309 1230 ... 1409 1266 1228
## 8 bmd double 1217 0 67 0.88 0.84 0.84 ... 0.93 0.9 0.91
## 9 fat integer 1217 0 1182 17802 8381 19221 ... 16777 20094 14567
## 10 lean integer 1217 0 1177 28600 40229 ... 27272 28111
## 11 pcfat double 1217 0 309 37.3 16.8 34 ... 34.4 41.3 33.2
## ------------------------------------------------------------------------------------------
head(ob)
## id gender height weight bmi age bmc bmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
ob$status [ob$bmi < 18.5] = "Underweight"
ob$status [ob$bmi >= 18.5 & ob$bmi < 25.0] = "Normal"
ob$status [ob$bmi > 25.0 & ob$bmi < 30.0] = "Overweight"
ob$status [ob$bmi >= 30.0] = "Obese"
ob$status = factor(ob$status, levels=c("Underweight", "Normal", "Overweight", "Obese"))
head(ob)
## id gender height weight bmi age bmc bmd fat lean pcfat status
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 Normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 Normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 Normal
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 Normal
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 Normal
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 Normal
table1(~age + height + weight + pcfat + status, data=ob)
Overall (N=1217) |
|
---|---|
age | |
Mean (SD) | 47.2 (17.3) |
Median [Min, Max] | 48.0 [13.0, 88.0] |
height | |
Mean (SD) | 157 (7.98) |
Median [Min, Max] | 155 [136, 185] |
weight | |
Mean (SD) | 55.1 (9.40) |
Median [Min, Max] | 54.0 [34.0, 95.0] |
pcfat | |
Mean (SD) | 31.6 (7.18) |
Median [Min, Max] | 32.4 [9.20, 48.4] |
status | |
Underweight | 107 (8.8%) |
Normal | 865 (71.1%) |
Overweight | 215 (17.7%) |
Obese | 15 (1.2%) |
Missing | 15 (1.2%) |
table1(~age + height + weight + pcfat + status | gender, data=ob)
F (N=862) |
M (N=355) |
Overall (N=1217) |
|
---|---|---|---|
age | |||
Mean (SD) | 48.6 (16.4) | 43.7 (18.8) | 47.2 (17.3) |
Median [Min, Max] | 49.0 [14.0, 85.0] | 44.0 [13.0, 88.0] | 48.0 [13.0, 88.0] |
height | |||
Mean (SD) | 153 (5.55) | 165 (6.73) | 157 (7.98) |
Median [Min, Max] | 153 [136, 170] | 165 [146, 185] | 155 [136, 185] |
weight | |||
Mean (SD) | 52.3 (7.72) | 62.0 (9.59) | 55.1 (9.40) |
Median [Min, Max] | 51.0 [34.0, 95.0] | 62.0 [38.0, 95.0] | 54.0 [34.0, 95.0] |
pcfat | |||
Mean (SD) | 34.7 (5.19) | 24.2 (5.76) | 31.6 (7.18) |
Median [Min, Max] | 34.7 [14.6, 48.4] | 24.6 [9.20, 39.0] | 32.4 [9.20, 48.4] |
status | |||
Underweight | 76 (8.8%) | 31 (8.7%) | 107 (8.8%) |
Normal | 626 (72.6%) | 239 (67.3%) | 865 (71.1%) |
Overweight | 139 (16.1%) | 76 (21.4%) | 215 (17.7%) |
Obese | 11 (1.3%) | 4 (1.1%) | 15 (1.2%) |
Missing | 10 (1.2%) | 5 (1.4%) | 15 (1.2%) |
Histogram(pcfat, data=ob)
## >>> Suggestions
## bin_width: set the width of each bin
## bin_start: set the start of the first bin
## bin_end: set the end of the last bin
## Histogram(pcfat, density=TRUE) # smoothed curve + histogram
## Plot(pcfat) # Violin/Box/Scatterplot (VBS) plot
##
## --- pcfat ---
##
## n miss mean sd min mdn max
## 1217 0 31.604786 7.182862 9.200000 32.400000 48.400000
##
##
## --- Outliers --- from the box plot: 10
##
## Small Large
## ----- -----
## 9.2
## 9.7
## 9.8
## 10.3
## 10.3
## 10.7
## 11.0
## 11.4
## 11.7
## 11.9
##
##
## Bin Width: 5
## Number of Bins: 9
##
## Bin Midpnt Count Prop Cumul.c Cumul.p
## -------------------------------------------------
## 5 > 10 7.5 3 0.00 3 0.00
## 10 > 15 12.5 26 0.02 29 0.02
## 15 > 20 17.5 61 0.05 90 0.07
## 20 > 25 22.5 128 0.11 218 0.18
## 25 > 30 27.5 244 0.20 462 0.38
## 30 > 35 32.5 338 0.28 800 0.66
## 35 > 40 37.5 294 0.24 1094 0.90
## 40 > 45 42.5 107 0.09 1201 0.99
## 45 > 50 47.5 16 0.01 1217 1.00
BarChart(status, data=ob)
## >>> Suggestions
## BarChart(status, horiz=TRUE) # horizontal bar chart
## BarChart(status, fill="reds") # red bars of varying lightness
## PieChart(status) # doughnut (ring) chart
## Plot(status) # bubble plot
## Plot(status, stat="count") # lollipop plot
##
## --- status ---
##
## Missing Values: 15
##
## Underweight Normal Overweight Obese Total
## Frequencies: 107 865 215 15 1202
## Proportions: 0.089 0.720 0.179 0.012 1.000
##
## Chi-squared test of null hypothesis of equal probabilities
## Chisq = 1480.609, df = 3, p-value = 0.000
Plot(bmi, pcfat, xlab="BMI", ylab="Percent body fat", data=ob)
## >>> Suggestions or enter: style(suggest=FALSE)
## Plot(bmi, pcfat, enhance=TRUE) # many options
## Plot(bmi, pcfat, color="red") # exterior edge color of points
## Plot(bmi, pcfat, fit="lm", fit_se=c(.90,.99)) # fit line, stnd errors
## Plot(bmi, pcfat, out_cut=.10) # label top 10% from center as outliers
##
##
## >>> Pearson's product-moment correlation
##
## Number of paired values with neither missing, n = 1217
## Sample Correlation of bmi and pcfat: r = 0.441
##
## Hypothesis Test of 0 Correlation: t = 17.123, df = 1215, p-value = 0.000
## 95% Confidence Interval for Correlation: 0.394 to 0.485
##