# Load/clean data
data <- read.csv("real_stuff.csv")
data$Vitamin.A..IU <- NULL
data$Fatty.acids..total.monounsaturated <- NULL
data$Fatty.acids..total.polyunsaturated <- NULL
data$Fatty.acids..total.trans.monoenoic <- NULL
data$Fatty.acids..total.trans.polyenoic <- NULL
id.group <- read.csv("id_group.csv")
merged <- merge(id.group, data)

# No food group data
fit.no.group <- lm(Magnesium..Mg ~ . - food.id, data)
summary(fit.no.group)
## 
## Call:
## lm(formula = Magnesium..Mg ~ . - food.id, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -276.8  -16.4   -4.1    8.9  470.6 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -2.42e+01   4.90e+00   -4.94  9.6e-07 ***
## Protein                         2.39e+00   2.06e-01   11.62  < 2e-16 ***
## Total.lipid..fat.               8.45e-01   1.20e-01    7.04  4.1e-12 ***
## Carbohydrate..by.difference     3.15e-01   9.93e-02    3.18   0.0016 ** 
## Sugars..total                   2.04e-01   1.51e-01    1.36   0.1754    
## Fiber..total.dietary            5.67e+00   3.75e-01   15.14  < 2e-16 ***
## Calcium..Ca                     2.33e-02   8.50e-03    2.74   0.0063 ** 
## Iron..Fe                       -1.35e-01   2.57e-01   -0.53   0.5980    
## Sodium..Na                     -9.25e-03   3.75e-03   -2.47   0.0138 *  
## Vitamin.A..RAE                 -1.03e-03   9.23e-04   -1.12   0.2642    
## Vitamin.C..total.ascorbic.acid  3.84e-01   6.48e-02    5.93  4.6e-09 ***
## Cholesterol                    -1.55e-02   9.57e-03   -1.62   0.1050    
## Fatty.acids..total.trans       -1.64e+00   5.35e-01   -3.06   0.0023 ** 
## Fatty.acids..total.saturated   -7.24e-01   2.20e-01   -3.29   0.0011 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 42.6 on 791 degrees of freedom
##   (6733 observations deleted due to missingness)
## Multiple R-squared: 0.507,   Adjusted R-squared: 0.499 
## F-statistic: 62.5 on 13 and 791 DF,  p-value: <2e-16 
## 
fit.no.group2 <- lm(Magnesium..Mg ~ . - food.id - Sugars..total - 
    Iron..Fe - Vitamin.A..RAE - Cholesterol, data)
summary(fit.no.group2)
## 
## Call:
## lm(formula = Magnesium..Mg ~ . - food.id - Sugars..total - Iron..Fe - 
##     Vitamin.A..RAE - Cholesterol, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -274.8  -16.2   -3.6    9.4  472.5 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    -24.73264    4.83921   -5.11  4.0e-07 ***
## Protein                          2.30238    0.20319   11.33  < 2e-16 ***
## Total.lipid..fat.                0.84787    0.12003    7.06  3.5e-12 ***
## Carbohydrate..by.difference      0.39812    0.06345    6.27  5.8e-10 ***
## Fiber..total.dietary             5.52282    0.35172   15.70  < 2e-16 ***
## Calcium..Ca                      0.02278    0.00825    2.76   0.0059 ** 
## Sodium..Na                      -0.00923    0.00370   -2.49   0.0128 *  
## Vitamin.C..total.ascorbic.acid   0.36746    0.06087    6.04  2.4e-09 ***
## Fatty.acids..total.trans        -1.67752    0.53500   -3.14   0.0018 ** 
## Fatty.acids..total.saturated    -0.68258    0.21922   -3.11   0.0019 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 42.7 on 795 degrees of freedom
##   (6733 observations deleted due to missingness)
## Multiple R-squared: 0.502,   Adjusted R-squared: 0.496 
## F-statistic:   89 on 9 and 795 DF,  p-value: <2e-16 
## 

# No food group data
fit <- lm(Magnesium..Mg ~ . - X - food.id, merged)
summary(fit)
## 
## Call:
## lm(formula = Magnesium..Mg ~ . - X - food.id, data = merged)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -220.84  -12.79   -0.04   10.50  285.92 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                  6.51e+00   1.50e+01    0.43
## food.groupBaked Products                     1.14e+01   1.51e+01    0.75
## food.groupBeef Products                     -6.27e+01   1.57e+01   -4.01
## food.groupBeverages                          2.42e+01   1.89e+01    1.28
## food.groupBreakfast Cereals                  1.95e+01   1.42e+01    1.37
## food.groupCereal Grains and Pasta            1.05e+01   1.54e+01    0.68
## food.groupDairy and Egg Products            -1.65e+01   2.86e+01   -0.58
## food.groupEthnic Foods                      -8.04e+01   3.81e+01   -2.11
## food.groupFast Foods                        -2.59e+01   1.51e+01   -1.72
## food.groupFats and Oils                     -2.04e+01   1.68e+01   -1.21
## food.groupFinfish and Shellfish Products    -1.39e+01   3.74e+01   -0.37
## food.groupFruits and Fruit Juices           -1.12e+01   2.85e+01   -0.39
## food.groupLamb, Veal, and Game Products     -5.88e+01   2.35e+01   -2.50
## food.groupLegumes and Legume Products       -3.31e+01   1.52e+01   -2.17
## food.groupMeals, Entrees, and Sidedishes    -1.56e+01   3.73e+01   -0.42
## food.groupNut and Seed Products              1.60e+02   1.86e+01    8.61
## food.groupPork Products                     -5.57e+01   1.53e+01   -3.63
## food.groupPoultry Products                  -5.92e+01   1.60e+01   -3.70
## food.groupSausages and Luncheon Meats       -4.17e+01   1.64e+01   -2.54
## food.groupSnacks                             5.28e+01   1.45e+01    3.63
## food.groupSoups, Sauces, and Gravies         1.59e+00   1.62e+01    0.10
## food.groupSpices and Herbs                   4.76e+01   1.87e+01    2.55
## food.groupSweets                             1.85e+01   1.55e+01    1.19
## food.groupVegetables and Vegetable Products -2.44e+00   1.73e+01   -0.14
## Protein                                      3.21e+00   2.51e-01   12.80
## Total.lipid..fat.                            3.72e-01   1.26e-01    2.96
## Carbohydrate..by.difference                 -4.29e-01   1.39e-01   -3.08
## Sugars..total                                2.28e-01   1.75e-01    1.30
## Fiber..total.dietary                         4.35e+00   3.38e-01   12.84
## Calcium..Ca                                  1.23e-02   7.30e-03    1.69
## Iron..Fe                                     4.39e-01   2.62e-01    1.68
## Sodium..Na                                  -4.78e-03   3.67e-03   -1.30
## Vitamin.A..RAE                              -6.89e-04   8.69e-04   -0.79
## Vitamin.C..total.ascorbic.acid               2.70e-01   5.59e-02    4.84
## Cholesterol                                  2.18e-03   8.08e-03    0.27
## Fatty.acids..total.trans                    -5.60e-01   4.49e-01   -1.25
## Fatty.acids..total.saturated                -2.96e-01   1.82e-01   -1.63
##                                             Pr(>|t|)    
## (Intercept)                                  0.66382    
## food.groupBaked Products                     0.45058    
## food.groupBeef Products                      6.7e-05 ***
## food.groupBeverages                          0.19962    
## food.groupBreakfast Cereals                  0.16973    
## food.groupCereal Grains and Pasta            0.49482    
## food.groupDairy and Egg Products             0.56251    
## food.groupEthnic Foods                       0.03525 *  
## food.groupFast Foods                         0.08586 .  
## food.groupFats and Oils                      0.22646    
## food.groupFinfish and Shellfish Products     0.70998    
## food.groupFruits and Fruit Juices            0.69380    
## food.groupLamb, Veal, and Game Products      0.01249 *  
## food.groupLegumes and Legume Products        0.02998 *  
## food.groupMeals, Entrees, and Sidedishes     0.67548    
## food.groupNut and Seed Products              < 2e-16 ***
## food.groupPork Products                      0.00030 ***
## food.groupPoultry Products                   0.00023 ***
## food.groupSausages and Luncheon Meats        0.01134 *  
## food.groupSnacks                             0.00030 ***
## food.groupSoups, Sauces, and Gravies         0.92147    
## food.groupSpices and Herbs                   0.01095 *  
## food.groupSweets                             0.23320    
## food.groupVegetables and Vegetable Products  0.88820    
## Protein                                      < 2e-16 ***
## Total.lipid..fat.                            0.00319 ** 
## Carbohydrate..by.difference                  0.00215 ** 
## Sugars..total                                0.19261    
## Fiber..total.dietary                         < 2e-16 ***
## Calcium..Ca                                  0.09150 .  
## Iron..Fe                                     0.09410 .  
## Sodium..Na                                   0.19338    
## Vitamin.A..RAE                               0.42784    
## Vitamin.C..total.ascorbic.acid               1.6e-06 ***
## Cholesterol                                  0.78695    
## Fatty.acids..total.trans                     0.21295    
## Fatty.acids..total.saturated                 0.10373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 34.7 on 768 degrees of freedom
##   (6733 observations deleted due to missingness)
## Multiple R-squared: 0.682,   Adjusted R-squared: 0.667 
## F-statistic: 45.7 on 36 and 768 DF,  p-value: <2e-16 
## 
fit2 <- lm(Magnesium..Mg ~ . - X - food.id - Sodium..Na - Sugars..total - 
    Iron..Fe - Vitamin.A..RAE - Cholesterol - Fatty.acids..total.trans - Fatty.acids..total.saturated - 
    Vitamin.A..RAE, merged)
summary(fit2)
## 
## Call:
## lm(formula = Magnesium..Mg ~ . - X - food.id - Sodium..Na - Sugars..total - 
##     Iron..Fe - Vitamin.A..RAE - Cholesterol - Fatty.acids..total.trans - 
##     Fatty.acids..total.saturated - Vitamin.A..RAE, data = merged)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -224.59  -11.60   -2.16   10.89  288.45 
## 
## Coefficients:
##                                              Estimate Std. Error t value
## (Intercept)                                   4.20830   14.88253    0.28
## food.groupBaked Products                     10.12727   15.08122    0.67
## food.groupBeef Products                     -58.93361   15.37465   -3.83
## food.groupBeverages                          26.91634   18.79306    1.43
## food.groupBreakfast Cereals                  24.81798   13.81308    1.80
## food.groupCereal Grains and Pasta             8.81720   15.22526    0.58
## food.groupDairy and Egg Products            -13.44246   28.44947   -0.47
## food.groupEthnic Foods                      -76.10468   38.02391   -2.00
## food.groupFast Foods                        -25.78984   14.86540   -1.73
## food.groupFats and Oils                     -21.61226   16.48300   -1.31
## food.groupFinfish and Shellfish Products    -13.96120   37.39451   -0.37
## food.groupFruits and Fruit Juices            -7.64936   28.37866   -0.27
## food.groupLamb, Veal, and Game Products     -61.79421   21.48741   -2.88
## food.groupLegumes and Legume Products       -32.41212   15.09434   -2.15
## food.groupMeals, Entrees, and Sidedishes    -16.86988   37.28283   -0.45
## food.groupNut and Seed Products             167.86470   18.20938    9.22
## food.groupPork Products                     -54.66799   15.03012   -3.64
## food.groupPoultry Products                  -56.31371   15.70695   -3.59
## food.groupSausages and Luncheon Meats       -42.88334   15.87745   -2.70
## food.groupSnacks                             51.90391   14.48184    3.58
## food.groupSoups, Sauces, and Gravies          0.95879   15.90691    0.06
## food.groupSpices and Herbs                   50.40825   18.57745    2.71
## food.groupSweets                             24.74113   14.28421    1.73
## food.groupVegetables and Vegetable Products  -1.49833   17.27685   -0.09
## Protein                                       3.17839    0.24739   12.85
## Total.lipid..fat.                             0.24549    0.10026    2.45
## Carbohydrate..by.difference                  -0.32918    0.11305   -2.91
## Fiber..total.dietary                          4.25724    0.31860   13.36
## Calcium..Ca                                   0.01626    0.00695    2.34
## Vitamin.C..total.ascorbic.acid                0.30657    0.05128    5.98
##                                             Pr(>|t|)    
## (Intercept)                                  0.77743    
## food.groupBaked Products                     0.50209    
## food.groupBeef Products                      0.00014 ***
## food.groupBeverages                          0.15248    
## food.groupBreakfast Cereals                  0.07277 .  
## food.groupCereal Grains and Pasta            0.56268    
## food.groupDairy and Egg Products             0.63670    
## food.groupEthnic Foods                       0.04569 *  
## food.groupFast Foods                         0.08316 .  
## food.groupFats and Oils                      0.19018    
## food.groupFinfish and Shellfish Products     0.70899    
## food.groupFruits and Fruit Juices            0.78758    
## food.groupLamb, Veal, and Game Products      0.00414 ** 
## food.groupLegumes and Legume Products        0.03208 *  
## food.groupMeals, Entrees, and Sidedishes     0.65105    
## food.groupNut and Seed Products              < 2e-16 ***
## food.groupPork Products                      0.00029 ***
## food.groupPoultry Products                   0.00036 ***
## food.groupSausages and Luncheon Meats        0.00707 ** 
## food.groupSnacks                             0.00036 ***
## food.groupSoups, Sauces, and Gravies         0.95195    
## food.groupSpices and Herbs                   0.00681 ** 
## food.groupSweets                             0.08366 .  
## food.groupVegetables and Vegetable Products  0.93091    
## Protein                                      < 2e-16 ***
## Total.lipid..fat.                            0.01456 *  
## Carbohydrate..by.difference                  0.00370 ** 
## Fiber..total.dietary                         < 2e-16 ***
## Calcium..Ca                                  0.01954 *  
## Vitamin.C..total.ascorbic.acid               3.4e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Residual standard error: 34.8 on 775 degrees of freedom
##   (6733 observations deleted due to missingness)
## Multiple R-squared: 0.678,   Adjusted R-squared: 0.666 
## F-statistic: 56.3 on 29 and 775 DF,  p-value: <2e-16 
## 


library(randomForest)
## randomForest 4.6-6
## Type rfNews() to see new features/changes/bug fixes.
rf <- randomForest(factor(Magnesium..Mg > 50) ~ . - X - food.id - 
    Sodium..Na - Sugars..total - Iron..Fe - Vitamin.A..RAE - Cholesterol - Fatty.acids..total.trans - 
    Fatty.acids..total.saturated - Vitamin.A..RAE, merged, na.action = na.omit)
print(rf)
## 
## Call:
##  randomForest(formula = factor(Magnesium..Mg > 50) ~ . - X - food.id -      Sodium..Na - Sugars..total - Iron..Fe - Vitamin.A..RAE -      Cholesterol - Fatty.acids..total.trans - Fatty.acids..total.saturated -      Vitamin.A..RAE, data = merged, na.action = na.omit) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 4.84%
## Confusion matrix:
##       FALSE TRUE class.error
## FALSE   631   14     0.02171
## TRUE     25  135     0.15625
varImpPlot(rf)

plot of chunk unnamed-chunk-1