We will use USDA dataset from National Nutrient Database for Standard Reference.

USDA<-read.csv("C:\\Users\\aman96\\Desktop\\the analytics edge\\unit 1\\USDA.csv", header = TRUE)

top row of the data

head(USDA)
##     ID              Description Calories Protein TotalFat Carbohydrate
## 1 1001         BUTTER,WITH SALT      717    0.85    81.11         0.06
## 2 1002 BUTTER,WHIPPED,WITH SALT      717    0.85    81.11         0.06
## 3 1003     BUTTER OIL,ANHYDROUS      876    0.28    99.48         0.00
## 4 1004              CHEESE,BLUE      353   21.40    28.74         2.34
## 5 1005             CHEESE,BRICK      371   23.24    29.68         2.79
## 6 1006              CHEESE,BRIE      334   20.75    27.68         0.45
##   Sodium SaturatedFat Cholesterol Sugar Calcium Iron Potassium VitaminC
## 1    714       51.368         215  0.06      24 0.02        24        0
## 2    827       50.489         219  0.06      24 0.16        26        0
## 3      2       61.924         256  0.00       4 0.00         5        0
## 4   1395       18.669          75  0.50     528 0.31       256        0
## 5    560       18.764          94  0.51     674 0.43       136        0
## 6    629       17.410         100  0.45     184 0.50       152        0
##   VitaminE VitaminD
## 1     2.32      1.5
## 2     2.32      1.5
## 3     2.80      1.8
## 4     0.25      0.5
## 5     0.26      0.5
## 6     0.24      0.5

Exploring the USDA’s data

summary(USDA)
##        ID       
##  Min.   : 1001  
##  1st Qu.: 8387  
##  Median :13294  
##  Mean   :14260  
##  3rd Qu.:18337  
##  Max.   :93600  
##                 
##                                                        Description  
##  BEEF,CHUCK,UNDER BLADE CNTR STEAK,BNLESS,DENVER CUT,LN,0" FA:   2  
##  CAMPBELL,CAMPBELL'S SEL MICROWAVEABLE BOWLS,HEA             :   2  
##  OIL,INDUSTRIAL,PALM KERNEL (HYDROGENATED),CONFECTION FAT    :   2  
##  POPCORN,OIL-POPPED,LOFAT                                    :   2  
##  ABALONE,MIXED SPECIES,RAW                                   :   1  
##  ABALONE,MXD SP,CKD,FRIED                                    :   1  
##  (Other)                                                     :7048  
##     Calories        Protein         TotalFat       Carbohydrate   
##  Min.   :  0.0   Min.   : 0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 85.0   1st Qu.: 2.29   1st Qu.:  0.72   1st Qu.:  0.00  
##  Median :181.0   Median : 8.20   Median :  4.37   Median :  7.13  
##  Mean   :219.7   Mean   :11.71   Mean   : 10.32   Mean   : 20.70  
##  3rd Qu.:331.0   3rd Qu.:20.43   3rd Qu.: 12.70   3rd Qu.: 28.17  
##  Max.   :902.0   Max.   :88.32   Max.   :100.00   Max.   :100.00  
##  NA's   :1       NA's   :1       NA's   :1        NA's   :1       
##      Sodium         SaturatedFat     Cholesterol          Sugar       
##  Min.   :    0.0   Min.   : 0.000   Min.   :   0.00   Min.   : 0.000  
##  1st Qu.:   37.0   1st Qu.: 0.172   1st Qu.:   0.00   1st Qu.: 0.000  
##  Median :   79.0   Median : 1.256   Median :   3.00   Median : 1.395  
##  Mean   :  322.1   Mean   : 3.452   Mean   :  41.55   Mean   : 8.257  
##  3rd Qu.:  386.0   3rd Qu.: 4.028   3rd Qu.:  69.00   3rd Qu.: 7.875  
##  Max.   :38758.0   Max.   :95.600   Max.   :3100.00   Max.   :99.800  
##  NA's   :84        NA's   :301      NA's   :288       NA's   :1910    
##     Calcium             Iron           Potassium          VitaminC       
##  Min.   :   0.00   Min.   :  0.000   Min.   :    0.0   Min.   :   0.000  
##  1st Qu.:   9.00   1st Qu.:  0.520   1st Qu.:  135.0   1st Qu.:   0.000  
##  Median :  19.00   Median :  1.330   Median :  250.0   Median :   0.000  
##  Mean   :  73.53   Mean   :  2.828   Mean   :  301.4   Mean   :   9.436  
##  3rd Qu.:  56.00   3rd Qu.:  2.620   3rd Qu.:  348.0   3rd Qu.:   3.100  
##  Max.   :7364.00   Max.   :123.600   Max.   :16500.0   Max.   :2400.000  
##  NA's   :136       NA's   :123       NA's   :409       NA's   :332       
##     VitaminE          VitaminD       
##  Min.   :  0.000   Min.   :  0.0000  
##  1st Qu.:  0.120   1st Qu.:  0.0000  
##  Median :  0.270   Median :  0.0000  
##  Mean   :  1.488   Mean   :  0.5769  
##  3rd Qu.:  0.710   3rd Qu.:  0.1000  
##  Max.   :149.400   Max.   :250.0000  
##  NA's   :2720      NA's   :2834
str(USDA)
## 'data.frame':    7058 obs. of  16 variables:
##  $ ID          : int  1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 ...
##  $ Description : Factor w/ 7054 levels "ABALONE,MIXED SPECIES,RAW",..: 1303 1302 1298 2303 2304 2305 2306 2307 2308 2309 ...
##  $ Calories    : int  717 717 876 353 371 334 300 376 403 387 ...
##  $ Protein     : num  0.85 0.85 0.28 21.4 23.24 ...
##  $ TotalFat    : num  81.1 81.1 99.5 28.7 29.7 ...
##  $ Carbohydrate: num  0.06 0.06 0 2.34 2.79 0.45 0.46 3.06 1.28 4.78 ...
##  $ Sodium      : int  714 827 2 1395 560 629 842 690 621 700 ...
##  $ SaturatedFat: num  51.4 50.5 61.9 18.7 18.8 ...
##  $ Cholesterol : int  215 219 256 75 94 100 72 93 105 103 ...
##  $ Sugar       : num  0.06 0.06 0 0.5 0.51 0.45 0.46 NA 0.52 NA ...
##  $ Calcium     : int  24 24 4 528 674 184 388 673 721 643 ...
##  $ Iron        : num  0.02 0.16 0 0.31 0.43 0.5 0.33 0.64 0.68 0.21 ...
##  $ Potassium   : int  24 26 5 256 136 152 187 93 98 95 ...
##  $ VitaminC    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ VitaminE    : num  2.32 2.32 2.8 0.25 0.26 0.24 0.21 NA 0.29 NA ...
##  $ VitaminD    : num  1.5 1.5 1.8 0.5 0.5 0.5 0.4 NA 0.6 NA ...

High sodium data analysis

Highsodium<-subset(USDA, Sodium>10000)

Highsodium$Description
##  [1] SALT,TABLE                                             
##  [2] SOUP,BF BROTH OR BOUILLON,PDR,DRY                      
##  [3] SOUP,BEEF BROTH,CUBED,DRY                              
##  [4] SOUP,CHICK BROTH OR BOUILLON,DRY                       
##  [5] SOUP,CHICK BROTH CUBES,DRY                             
##  [6] GRAVY,AU JUS,DRY                                       
##  [7] ADOBO FRESCO                                           
##  [8] LEAVENING AGENTS,BAKING PDR,DOUBLE-ACTING,NA AL SULFATE
##  [9] LEAVENING AGENTS,BAKING SODA                           
## [10] DESSERTS,RENNIN,TABLETS,UNSWTND                        
## 7054 Levels: ABALONE,MIXED SPECIES,RAW ... ZWIEBACK
summary(USDA$Sodium)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##     0.0    37.0    79.0   322.1   386.0 38760.0      84
sd(USDA$Sodium, na.rm = TRUE)
## [1] 1045.417

Now visualization of the data using plot function

plot(USDA$Protein, USDA$TotalFat, xlab="Protein", ylab="Fat", main="Protein Vs Fat", col="red")

Making histograms

hist(USDA$VitaminC, xlab = "Vitamin C (mg)", main = "Histogram of Vitamin C levels", xlim = c(0,100), breaks = 2000)

Adding additional variables into dataset

Highsodium<- USDA$Sodium > mean(USDA$Sodium, na.rm = TRUE)
Highsodium<-as.numeric(Highsodium)

USDA$Highsodium<-Highsodium

str(USDA)
## 'data.frame':    7058 obs. of  17 variables:
##  $ ID          : int  1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 ...
##  $ Description : Factor w/ 7054 levels "ABALONE,MIXED SPECIES,RAW",..: 1303 1302 1298 2303 2304 2305 2306 2307 2308 2309 ...
##  $ Calories    : int  717 717 876 353 371 334 300 376 403 387 ...
##  $ Protein     : num  0.85 0.85 0.28 21.4 23.24 ...
##  $ TotalFat    : num  81.1 81.1 99.5 28.7 29.7 ...
##  $ Carbohydrate: num  0.06 0.06 0 2.34 2.79 0.45 0.46 3.06 1.28 4.78 ...
##  $ Sodium      : int  714 827 2 1395 560 629 842 690 621 700 ...
##  $ SaturatedFat: num  51.4 50.5 61.9 18.7 18.8 ...
##  $ Cholesterol : int  215 219 256 75 94 100 72 93 105 103 ...
##  $ Sugar       : num  0.06 0.06 0 0.5 0.51 0.45 0.46 NA 0.52 NA ...
##  $ Calcium     : int  24 24 4 528 674 184 388 673 721 643 ...
##  $ Iron        : num  0.02 0.16 0 0.31 0.43 0.5 0.33 0.64 0.68 0.21 ...
##  $ Potassium   : int  24 26 5 256 136 152 187 93 98 95 ...
##  $ VitaminC    : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ VitaminE    : num  2.32 2.32 2.8 0.25 0.26 0.24 0.21 NA 0.29 NA ...
##  $ VitaminD    : num  1.5 1.5 1.8 0.5 0.5 0.5 0.4 NA 0.6 NA ...
##  $ Highsodium  : num  1 1 0 1 1 1 1 1 1 1 ...

Using tapply function

tapply(USDA$Iron, USDA$Highsodium, mean, na.rm=TRUE)
##        0        1 
## 2.332241 3.914451
tapply(USDA$Iron, USDA$Highsodium, max, na.rm=TRUE)
##      0      1 
## 123.60  87.47
tapply(USDA$Iron, USDA$Highsodium, summary, na.rm=TRUE)
## $`0`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.460   1.290   2.332   2.448 123.600      30 
## 
## $`1`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.6375  1.4000  3.9140  3.2000 87.4700      50