library(readr)
## Warning: package 'readr' was built under R version 4.3.3
nutrition <- read_csv("C:/Users/cisco/Downloads/nutrition_subset")
## Rows: 961 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): food item
## dbl (3): weight_in_grams, saturated_fat, cholesterol
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
n <- dim(nutrition)[1]
nutrition$Index <- c(1:n)
head(nutrition)
## # A tibble: 6 × 5
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 GELATIN; DRY … 7 0 0 1
## 2 SEAWEED; SPIRULINA; DRIED … 28.4 0.8 0 2
## 3 YEAST; BAKERS; DRY; ACTIVE … 7 0 0 3
## 4 PARMESAN CHEESE; GRATED … 28.4 5.4 22 4
## 5 PARMESAN CHEESE; GRATED … 100 19.1 79 5
## 6 PARMESAN CHEESE; GRATED … 5 1 4 6
nutrition_sort <- nutrition[ order(- nutrition$saturated_fat), ]
nutrition_sort[ 1:5, ]
## # A tibble: 5 × 5
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 CHEESECAKE … 1110 120. 2053 379
## 2 ICE CREAM; VANLLA; RICH 16% F… 1188 118. 703 536
## 3 YELLOWCAKE W/ CHOCFRSTNG;COMM… 1108 92 609 459
## 4 CREME PIE … 910 90.1 46 582
## 5 LARD … 205 80.4 195 891
print(nutrition_sort)
## # A tibble: 961 × 5
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 CHEESECAKE … 1110 120. 2053 379
## 2 ICE CREAM; VANLLA; RICH 16% … 1188 118. 703 536
## 3 YELLOWCAKE W/ CHOCFRSTNG;COM… 1108 92 609 459
## 4 CREME PIE … 910 90.1 46 582
## 5 LARD … 205 80.4 195 891
## 6 ICE CREAM; VANLLA; REGULR 11… 1064 71.3 476 467
## 7 CARROT CAKE;CREMCHESE FRST;R… 1536 66 1183 423
## 8 BUTTER; SALTED … 113 57.1 247 710
## 9 BUTTER; UNSALTED … 113 57.1 247 711
## 10 DEVIL'S FOOD CAKE;CHOCFRST;F… 1107 55.6 598 411
## # ℹ 951 more rows
Without standardization, comparing the raw values would be misleading
since you’d be comparing nutrients across different total amounts of
food. For example, 1 CUP of parmesan cheese (100g) showing 19.1g of
saturated fat versus 1 TBSP (5g) showing 1.0g of saturated fat appears
to be a huge difference,but it’s simply due to the 20x difference in
portion size.
However valid comparisons can be made if nutrient values are
standardized using the weight_in_grams column. By calculating nutrient
content per gram (dividing saturated_fat or cholesterol by
weight_in_grams), you can make direct, meaningful comparisons between
foods, regardless of their original serving sizes.
nutrition$saturated_fat_per_gram <- nutrition$saturated_fat / nutrition$weight_in_grams
nutrition_sort2 <- nutrition[ order(- nutrition$saturated_fat_per_gram), ]
nutrition_sort2[ 1:5, ]
## # A tibble: 5 × 6
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 BUTTER; SALTED … 14 7.1 31 909
## 2 BUTTER; UNSALTED … 14 7.1 31 910
## 3 BUTTER; SALTED … 113 57.1 247 710
## 4 BUTTER; UNSALTED … 113 57.1 247 711
## 5 BUTTER; SALTED … 5 2.5 11 913
## # ℹ 1 more variable: saturated_fat_per_gram <dbl>
print(nutrition_sort2)
## # A tibble: 961 × 6
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 BUTTER; SALTED … 14 7.1 31 909
## 2 BUTTER; UNSALTED … 14 7.1 31 910
## 3 BUTTER; SALTED … 113 57.1 247 710
## 4 BUTTER; UNSALTED … 113 57.1 247 711
## 5 BUTTER; SALTED … 5 2.5 11 913
## 6 BUTTER; UNSALTED … 5 2.5 11 914
## 7 LARD … 13 5.1 12 900
## 8 LARD … 205 80.4 195 891
## 9 IMITATION CREAMERS; POWDERED… 2 0.7 0 921
## 10 CHOCOLATE; BITTER OT BAKING … 28.4 9 0 211
## # ℹ 951 more rows
## # ℹ 1 more variable: saturated_fat_per_gram <dbl>
nutrition$cholesterol_per_gram <- nutrition$cholesterol / nutrition$weight_in_grams
nutrition_sort3 <- nutrition[ order(- nutrition$cholesterol_per_gram), ]
nutrition_sort3[ 1:5, ]
## # A tibble: 5 × 7
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 EGGS; RAW; YOLK … 17 1.6 213 120
## 2 CHICKEN LIVER; COOKED … 20 0.4 126 59
## 3 BEEF LIVER; FRIED … 85 2.5 410 46
## 4 EGGS; COOKED; FRIED … 46 1.9 211 168
## 5 EGGS; RAW; WHOLE … 50 1.6 213 185
## # ℹ 2 more variables: saturated_fat_per_gram <dbl>, cholesterol_per_gram <dbl>
print(nutrition_sort3)
## # A tibble: 961 × 7
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 EGGS; RAW; YOLK … 17 1.6 213 120
## 2 CHICKEN LIVER; COOKED … 20 0.4 126 59
## 3 BEEF LIVER; FRIED … 85 2.5 410 46
## 4 EGGS; COOKED; FRIED … 46 1.9 211 168
## 5 EGGS; RAW; WHOLE … 50 1.6 213 185
## 6 EGGS; COOKED; HARD-COOKED … 50 1.6 213 187
## 7 EGGS; COOKED; POACHED … 50 1.5 212 186
## 8 EGGS; COOKED; SCRAMBLED/OMEL… 61 2.2 215 190
## 9 BUTTER; SALTED … 14 7.1 31 909
## 10 BUTTER; UNSALTED … 14 7.1 31 910
## # ℹ 951 more rows
## # ℹ 2 more variables: saturated_fat_per_gram <dbl>, cholesterol_per_gram <dbl>
nutrition$saturated_fat_per_gram_z <- scale(x = nutrition$saturated_fat_per_gram)
nutrition_outliersA <- nutrition[ which(nutrition$saturated_fat_per_gram_z > 3) , ]
print(nutrition_outliersA)
## # A tibble: 15 × 8
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 CHOCOLATE; BITTER OT BAKING … 28.4 9 0 211
## 2 COCONUT; RAW; SHREDDED … 80 23.8 0 449
## 3 COCONUT; DRIED; SWEETND;SHRE… 93 29.3 0 493
## 4 COCONUT; RAW; PIECE … 45 13.4 0 577
## 5 BUTTER; SALTED … 113 57.1 247 710
## 6 BUTTER; UNSALTED … 113 57.1 247 711
## 7 LARD … 205 80.4 195 891
## 8 FATS; COOKING/VEGETBL SHORTE… 13 3.3 0 899
## 9 LARD … 13 5.1 12 900
## 10 FATS; COOKING/VEGETBL SHORTE… 205 51.3 0 908
## 11 BUTTER; SALTED … 14 7.1 31 909
## 12 BUTTER; UNSALTED … 14 7.1 31 910
## 13 BUTTER; SALTED … 5 2.5 11 913
## 14 BUTTER; UNSALTED … 5 2.5 11 914
## 15 IMITATION CREAMERS; POWDERED… 2 0.7 0 921
## # ℹ 3 more variables: saturated_fat_per_gram <dbl>, cholesterol_per_gram <dbl>,
## # saturated_fat_per_gram_z <dbl[,1]>
nutrition_outliersB <- nutrition[ which(nutrition$saturated_fat_per_gram_z < -3) , ]
print(nutrition_outliersB)
## # A tibble: 0 × 8
## # ℹ 8 variables: food item <chr>, weight_in_grams <dbl>, saturated_fat <dbl>,
## # cholesterol <dbl>, Index <int>, saturated_fat_per_gram <dbl>,
## # cholesterol_per_gram <dbl>, saturated_fat_per_gram_z <dbl[,1]>
There are no (0) outliers at the low end of the scale.
nutrition$cholesterol_per_gram_z <- scale(x = nutrition$cholesterol_per_gram)
nutrition_outliersC <- nutrition[ which(nutrition$cholesterol_per_gram_z > 3) , ]
print(nutrition_outliersC)
## # A tibble: 8 × 9
## `food item` weight_in_grams saturated_fat cholesterol Index
## <chr> <dbl> <dbl> <dbl> <int>
## 1 BEEF LIVER; FRIED … 85 2.5 410 46
## 2 CHICKEN LIVER; COOKED … 20 0.4 126 59
## 3 EGGS; RAW; YOLK … 17 1.6 213 120
## 4 EGGS; COOKED; FRIED … 46 1.9 211 168
## 5 EGGS; RAW; WHOLE … 50 1.6 213 185
## 6 EGGS; COOKED; POACHED … 50 1.5 212 186
## 7 EGGS; COOKED; HARD-COOKED … 50 1.6 213 187
## 8 EGGS; COOKED; SCRAMBLED/OMELE… 61 2.2 215 190
## # ℹ 4 more variables: saturated_fat_per_gram <dbl>, cholesterol_per_gram <dbl>,
## # saturated_fat_per_gram_z <dbl[,1]>, cholesterol_per_gram_z <dbl[,1]>