library(readr)
## Warning: package 'readr' was built under R version 4.3.3
nutrition <- read_csv("C:/Users/cisco/Downloads/nutrition_subset")
## Rows: 961 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): food item
## dbl (3): weight_in_grams, saturated_fat, cholesterol
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1.The elements in the data set are food items of various sizes, ranging from a teaspoon of cinnamon to an entire carrot cake.

n <- dim(nutrition)[1]
nutrition$Index <- c(1:n)
head(nutrition)
## # A tibble: 6 × 5
##   `food item`                    weight_in_grams saturated_fat cholesterol Index
##   <chr>                                    <dbl>         <dbl>       <dbl> <int>
## 1 GELATIN; DRY                 …             7             0             0     1
## 2 SEAWEED; SPIRULINA; DRIED    …            28.4           0.8           0     2
## 3 YEAST; BAKERS; DRY; ACTIVE   …             7             0             0     3
## 4 PARMESAN CHEESE; GRATED      …            28.4           5.4          22     4
## 5 PARMESAN CHEESE; GRATED      …           100            19.1          79     5
## 6 PARMESAN CHEESE; GRATED      …             5             1             4     6
nutrition_sort <- nutrition[ order(- nutrition$saturated_fat), ]
nutrition_sort[ 1:5, ]
## # A tibble: 5 × 5
##   `food item`                    weight_in_grams saturated_fat cholesterol Index
##   <chr>                                    <dbl>         <dbl>       <dbl> <int>
## 1 CHEESECAKE                   …            1110         120.         2053   379
## 2 ICE CREAM; VANLLA; RICH 16% F…            1188         118.          703   536
## 3 YELLOWCAKE W/ CHOCFRSTNG;COMM…            1108          92           609   459
## 4 CREME PIE                    …             910          90.1          46   582
## 5 LARD                         …             205          80.4         195   891
print(nutrition_sort)
## # A tibble: 961 × 5
##    `food item`                   weight_in_grams saturated_fat cholesterol Index
##    <chr>                                   <dbl>         <dbl>       <dbl> <int>
##  1 CHEESECAKE                  …            1110         120.         2053   379
##  2 ICE CREAM; VANLLA; RICH 16% …            1188         118.          703   536
##  3 YELLOWCAKE W/ CHOCFRSTNG;COM…            1108          92           609   459
##  4 CREME PIE                   …             910          90.1          46   582
##  5 LARD                        …             205          80.4         195   891
##  6 ICE CREAM; VANLLA; REGULR 11…            1064          71.3         476   467
##  7 CARROT CAKE;CREMCHESE FRST;R…            1536          66          1183   423
##  8 BUTTER; SALTED              …             113          57.1         247   710
##  9 BUTTER; UNSALTED            …             113          57.1         247   711
## 10 DEVIL'S FOOD CAKE;CHOCFRST;F…            1107          55.6         598   411
## # ℹ 951 more rows

Without standardization, comparing the raw values would be misleading since you’d be comparing nutrients across different total amounts of food. For example, 1 CUP of parmesan cheese (100g) showing 19.1g of saturated fat versus 1 TBSP (5g) showing 1.0g of saturated fat appears to be a huge difference,but it’s simply due to the 20x difference in portion size.
However valid comparisons can be made if nutrient values are standardized using the weight_in_grams column. By calculating nutrient content per gram (dividing saturated_fat or cholesterol by weight_in_grams), you can make direct, meaningful comparisons between foods, regardless of their original serving sizes.

2.Derive a new variable, saturated_fat_per_gram, by dividing the amount of saturated fat by the weight in grams.

nutrition$saturated_fat_per_gram <- nutrition$saturated_fat / nutrition$weight_in_grams
nutrition_sort2 <- nutrition[ order(- nutrition$saturated_fat_per_gram), ]
nutrition_sort2[ 1:5, ]
## # A tibble: 5 × 6
##   `food item`                    weight_in_grams saturated_fat cholesterol Index
##   <chr>                                    <dbl>         <dbl>       <dbl> <int>
## 1 BUTTER; SALTED               …              14           7.1          31   909
## 2 BUTTER; UNSALTED             …              14           7.1          31   910
## 3 BUTTER; SALTED               …             113          57.1         247   710
## 4 BUTTER; UNSALTED             …             113          57.1         247   711
## 5 BUTTER; SALTED               …               5           2.5          11   913
## # ℹ 1 more variable: saturated_fat_per_gram <dbl>
print(nutrition_sort2)
## # A tibble: 961 × 6
##    `food item`                   weight_in_grams saturated_fat cholesterol Index
##    <chr>                                   <dbl>         <dbl>       <dbl> <int>
##  1 BUTTER; SALTED              …            14             7.1          31   909
##  2 BUTTER; UNSALTED            …            14             7.1          31   910
##  3 BUTTER; SALTED              …           113            57.1         247   710
##  4 BUTTER; UNSALTED            …           113            57.1         247   711
##  5 BUTTER; SALTED              …             5             2.5          11   913
##  6 BUTTER; UNSALTED            …             5             2.5          11   914
##  7 LARD                        …            13             5.1          12   900
##  8 LARD                        …           205            80.4         195   891
##  9 IMITATION CREAMERS; POWDERED…             2             0.7           0   921
## 10 CHOCOLATE; BITTER OT BAKING …            28.4           9             0   211
## # ℹ 951 more rows
## # ℹ 1 more variable: saturated_fat_per_gram <dbl>

3.Derive a new variable, cholesterol_per_gram, by dividing the amount of cholesterol by the weight in grams.

nutrition$cholesterol_per_gram <- nutrition$cholesterol / nutrition$weight_in_grams
nutrition_sort3 <- nutrition[ order(- nutrition$cholesterol_per_gram), ]
nutrition_sort3[ 1:5, ]
## # A tibble: 5 × 7
##   `food item`                    weight_in_grams saturated_fat cholesterol Index
##   <chr>                                    <dbl>         <dbl>       <dbl> <int>
## 1 EGGS; RAW; YOLK              …              17           1.6         213   120
## 2 CHICKEN LIVER; COOKED        …              20           0.4         126    59
## 3 BEEF LIVER; FRIED            …              85           2.5         410    46
## 4 EGGS; COOKED; FRIED          …              46           1.9         211   168
## 5 EGGS; RAW; WHOLE             …              50           1.6         213   185
## # ℹ 2 more variables: saturated_fat_per_gram <dbl>, cholesterol_per_gram <dbl>
print(nutrition_sort3)
## # A tibble: 961 × 7
##    `food item`                   weight_in_grams saturated_fat cholesterol Index
##    <chr>                                   <dbl>         <dbl>       <dbl> <int>
##  1 EGGS; RAW; YOLK             …              17           1.6         213   120
##  2 CHICKEN LIVER; COOKED       …              20           0.4         126    59
##  3 BEEF LIVER; FRIED           …              85           2.5         410    46
##  4 EGGS; COOKED; FRIED         …              46           1.9         211   168
##  5 EGGS; RAW; WHOLE            …              50           1.6         213   185
##  6 EGGS; COOKED; HARD-COOKED   …              50           1.6         213   187
##  7 EGGS; COOKED; POACHED       …              50           1.5         212   186
##  8 EGGS; COOKED; SCRAMBLED/OMEL…              61           2.2         215   190
##  9 BUTTER; SALTED              …              14           7.1          31   909
## 10 BUTTER; UNSALTED            …              14           7.1          31   910
## # ℹ 951 more rows
## # ℹ 2 more variables: saturated_fat_per_gram <dbl>, cholesterol_per_gram <dbl>

4.Standardize the field saturated_fat_per_gram.

nutrition$saturated_fat_per_gram_z <- scale(x = nutrition$saturated_fat_per_gram)
nutrition_outliersA <- nutrition[ which(nutrition$saturated_fat_per_gram_z > 3) , ]
print(nutrition_outliersA)
## # A tibble: 15 × 8
##    `food item`                   weight_in_grams saturated_fat cholesterol Index
##    <chr>                                   <dbl>         <dbl>       <dbl> <int>
##  1 CHOCOLATE; BITTER OT BAKING …            28.4           9             0   211
##  2 COCONUT; RAW; SHREDDED      …            80            23.8           0   449
##  3 COCONUT; DRIED; SWEETND;SHRE…            93            29.3           0   493
##  4 COCONUT; RAW; PIECE         …            45            13.4           0   577
##  5 BUTTER; SALTED              …           113            57.1         247   710
##  6 BUTTER; UNSALTED            …           113            57.1         247   711
##  7 LARD                        …           205            80.4         195   891
##  8 FATS; COOKING/VEGETBL SHORTE…            13             3.3           0   899
##  9 LARD                        …            13             5.1          12   900
## 10 FATS; COOKING/VEGETBL SHORTE…           205            51.3           0   908
## 11 BUTTER; SALTED              …            14             7.1          31   909
## 12 BUTTER; UNSALTED            …            14             7.1          31   910
## 13 BUTTER; SALTED              …             5             2.5          11   913
## 14 BUTTER; UNSALTED            …             5             2.5          11   914
## 15 IMITATION CREAMERS; POWDERED…             2             0.7           0   921
## # ℹ 3 more variables: saturated_fat_per_gram <dbl>, cholesterol_per_gram <dbl>,
## #   saturated_fat_per_gram_z <dbl[,1]>
nutrition_outliersB <- nutrition[ which(nutrition$saturated_fat_per_gram_z < -3) , ]
print(nutrition_outliersB)
## # A tibble: 0 × 8
## # ℹ 8 variables: food item <chr>, weight_in_grams <dbl>, saturated_fat <dbl>,
## #   cholesterol <dbl>, Index <int>, saturated_fat_per_gram <dbl>,
## #   cholesterol_per_gram <dbl>, saturated_fat_per_gram_z <dbl[,1]>

There are no (0) outliers at the low end of the scale.

5.Standardize the field cholesterol_per_gram.

nutrition$cholesterol_per_gram_z <- scale(x = nutrition$cholesterol_per_gram)
nutrition_outliersC <- nutrition[ which(nutrition$cholesterol_per_gram_z > 3) , ]
print(nutrition_outliersC)
## # A tibble: 8 × 9
##   `food item`                    weight_in_grams saturated_fat cholesterol Index
##   <chr>                                    <dbl>         <dbl>       <dbl> <int>
## 1 BEEF LIVER; FRIED            …              85           2.5         410    46
## 2 CHICKEN LIVER; COOKED        …              20           0.4         126    59
## 3 EGGS; RAW; YOLK              …              17           1.6         213   120
## 4 EGGS; COOKED; FRIED          …              46           1.9         211   168
## 5 EGGS; RAW; WHOLE             …              50           1.6         213   185
## 6 EGGS; COOKED; POACHED        …              50           1.5         212   186
## 7 EGGS; COOKED; HARD-COOKED    …              50           1.6         213   187
## 8 EGGS; COOKED; SCRAMBLED/OMELE…              61           2.2         215   190
## # ℹ 4 more variables: saturated_fat_per_gram <dbl>, cholesterol_per_gram <dbl>,
## #   saturated_fat_per_gram_z <dbl[,1]>, cholesterol_per_gram_z <dbl[,1]>