Loading packages

library(lessR)

## 
## lessR 4.3.9                         feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")   Read text, Excel, SPSS, SAS, or R data file
##   d is default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, 
## graphics, testing means and proportions, regression, factor analysis,
## customization, and descriptive statistics from pivot tables
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including time series forecasting
##   Enter: news(package="lessR")
## 
## Interactive data analysis
##   Enter: interact()

## 
## Attaching package: 'lessR'

## The following object is masked from 'package:base':
## 
##     sort_by

library(table1)

## 
## Attaching package: 'table1'

## The following object is masked from 'package:lessR':
## 
##     label

## The following objects are masked from 'package:base':
## 
##     units, units<-

This is an exercise of using R

First, reading data using lessR

ob = Read("/Users/121493/Dropbox/_Conferences and Workshops/SiS Lectures 1-2025/Data/Obesity data.csv")

## 
## >>> Suggestions
## Recommended binary format for data files: feather
##   Create with Write(d, "your_file", format="feather")
## To read a csv or Excel file of variable labelsvar_labels=TRUE
##   Each row of the file:  Variable Name, Variable Label
## Read into a data frame named l  (the letter el)
## 
## More details about your data, Enter:  details()  for d, or  details(name)
## 
## Data Types
## ------------------------------------------------------------
## character: Non-numeric data values
## integer: Numeric data values, integers only
## double: Numeric data values with decimal digits
## ------------------------------------------------------------
## 
##     Variable                  Missing  Unique 
##         Name     Type  Values  Values  Values   First and last values
## ------------------------------------------------------------------------------------------
##  1        id   integer   1217       0    1217   1  2  3 ... 1225  1226  1227
##  2    gender character   1217       0       2   F  M  F ... F  F  F
##  3    height   integer   1217       0      48   150  165  157 ... 149  144  141
##  4    weight   integer   1217       0      55   49  52  57 ... 50  49  45
##  5       bmi    double   1217       0     149   21.8  19.1  23.1 ... 22.5  23.6  22.6
##  6       age   integer   1217       0      74   53  65  64 ... 57  67  58
##  7       bmc   integer   1217       0     797   1312  1309  1230 ... 1409  1266  1228
##  8       bmd    double   1217       0      67   0.88  0.84  0.84 ... 0.93  0.9  0.91
##  9       fat   integer   1217       0    1182   17802  8381  19221 ... 16777  20094  14567
## 10      lean   integer   1217       0    1177   28600  40229 ... 27272  28111
## 11     pcfat    double   1217       0     309   37.3  16.8  34 ... 34.4  41.3  33.2
## ------------------------------------------------------------------------------------------

head(ob)

##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2

First, coding data

ob$status [ob$bmi < 18.5] = "Underweight"
ob$status [ob$bmi >= 18.5 & ob$bmi < 25.0] = "Normal"
ob$status [ob$bmi > 25.0 & ob$bmi < 30.0] = "Overweight"
ob$status [ob$bmi >= 30.0] = "Obese"

ob$status = factor(ob$status, levels=c("Underweight", "Normal", "Overweight", "Obese"))

head(ob)

##   id gender height weight  bmi age  bmc  bmd   fat  lean pcfat status
## 1  1      F    150     49 21.8  53 1312 0.88 17802 28600  37.3 Normal
## 2  2      M    165     52 19.1  65 1309 0.84  8381 40229  16.8 Normal
## 3  3      F    157     57 23.1  64 1230 0.84 19221 36057  34.0 Normal
## 4  4      F    156     53 21.8  56 1171 0.80 17472 33094  33.8 Normal
## 5  5      M    160     51 19.9  54 1681 0.98  7336 40621  14.8 Normal
## 6  6      F    153     47 20.1  52 1358 0.91 14904 30068  32.2 Normal

Getting a frequency table

table1(~age + height + weight + pcfat + status, data=ob)

	Overall (N=1217)
age
Mean (SD)	47.2 (17.3)
Median [Min, Max]	48.0 [13.0, 88.0]
height
Mean (SD)	157 (7.98)
Median [Min, Max]	155 [136, 185]
weight
Mean (SD)	55.1 (9.40)
Median [Min, Max]	54.0 [34.0, 95.0]
pcfat
Mean (SD)	31.6 (7.18)
Median [Min, Max]	32.4 [9.20, 48.4]
status
Underweight	107 (8.8%)
Normal	865 (71.1%)
Overweight	215 (17.7%)
Obese	15 (1.2%)
Missing	15 (1.2%)

table1(~age + height + weight + pcfat + status | gender, data=ob)

	F (N=862)	M (N=355)	Overall (N=1217)
age
Mean (SD)	48.6 (16.4)	43.7 (18.8)	47.2 (17.3)
Median [Min, Max]	49.0 [14.0, 85.0]	44.0 [13.0, 88.0]	48.0 [13.0, 88.0]
height
Mean (SD)	153 (5.55)	165 (6.73)	157 (7.98)
Median [Min, Max]	153 [136, 170]	165 [146, 185]	155 [136, 185]
weight
Mean (SD)	52.3 (7.72)	62.0 (9.59)	55.1 (9.40)
Median [Min, Max]	51.0 [34.0, 95.0]	62.0 [38.0, 95.0]	54.0 [34.0, 95.0]
pcfat
Mean (SD)	34.7 (5.19)	24.2 (5.76)	31.6 (7.18)
Median [Min, Max]	34.7 [14.6, 48.4]	24.6 [9.20, 39.0]	32.4 [9.20, 48.4]
status
Underweight	76 (8.8%)	31 (8.7%)	107 (8.8%)
Normal	626 (72.6%)	239 (67.3%)	865 (71.1%)
Overweight	139 (16.1%)	76 (21.4%)	215 (17.7%)
Obese	11 (1.3%)	4 (1.1%)	15 (1.2%)
Missing	10 (1.2%)	5 (1.4%)	15 (1.2%)

Graphs

Histogram(pcfat, data=ob)

## >>> Suggestions 
## bin_width: set the width of each bin 
## bin_start: set the start of the first bin 
## bin_end: set the end of the last bin 
## Histogram(pcfat, density=TRUE)  # smoothed curve + histogram 
## Plot(pcfat)  # Violin/Box/Scatterplot (VBS) plot 
## 
## --- pcfat --- 
##  
##        n   miss            mean              sd             min             mdn             max 
##      1217      0       31.604786        7.182862        9.200000       32.400000       48.400000 
## 
##   
## --- Outliers ---     from the box plot: 10 
##  
## Small       Large 
## -----       ----- 
##   9.2            
##   9.7            
##   9.8            
##  10.3            
##  10.3            
##  10.7            
##  11.0            
##  11.4            
##  11.7            
##  11.9            
## 
## 
## Bin Width: 5 
## Number of Bins: 9 
##  
##      Bin  Midpnt  Count    Prop  Cumul.c  Cumul.p 
## ------------------------------------------------- 
##   5 > 10     7.5      3    0.00        3     0.00 
##  10 > 15    12.5     26    0.02       29     0.02 
##  15 > 20    17.5     61    0.05       90     0.07 
##  20 > 25    22.5    128    0.11      218     0.18 
##  25 > 30    27.5    244    0.20      462     0.38 
##  30 > 35    32.5    338    0.28      800     0.66 
##  35 > 40    37.5    294    0.24     1094     0.90 
##  40 > 45    42.5    107    0.09     1201     0.99 
##  45 > 50    47.5     16    0.01     1217     1.00

BarChart(status, data=ob)

## >>> Suggestions
## BarChart(status, horiz=TRUE)  # horizontal bar chart
## BarChart(status, fill="reds")  # red bars of varying lightness
## PieChart(status)  # doughnut (ring) chart
## Plot(status)  # bubble plot
## Plot(status, stat="count")  # lollipop plot 
## 
## --- status --- 
## 
## Missing Values: 15 
## 
##                Underweight  Normal  Overweight  Obese      Total 
## Frequencies:           107     865         215     15       1202 
## Proportions:         0.089   0.720       0.179  0.012      1.000 
## 
## Chi-squared test of null hypothesis of equal probabilities 
##   Chisq = 1480.609, df = 3, p-value = 0.000

Plot(bmi, pcfat, xlab="BMI", ylab="Percent body fat", data=ob)

## >>> Suggestions  or  enter: style(suggest=FALSE)
## Plot(bmi, pcfat, enhance=TRUE)  # many options
## Plot(bmi, pcfat, color="red")  # exterior edge color of points
## Plot(bmi, pcfat, fit="lm", fit_se=c(.90,.99))  # fit line, stnd errors
## Plot(bmi, pcfat, out_cut=.10)  # label top 10% from center as outliers 
## 
## 
## >>> Pearson's product-moment correlation 
##  
## Number of paired values with neither missing, n = 1217 
## Sample Correlation of bmi and pcfat: r = 0.441 
##   
## Hypothesis Test of 0 Correlation:  t = 17.123,  df = 1215,  p-value = 0.000 
## 95% Confidence Interval for Correlation:  0.394 to 0.485 
##

Day 1 exercise

Tuan Nguyen

2025-01-02

Loading packages

This is an exercise of using R

First, reading data using lessR

First, coding data

Getting a frequency table

Graphs