library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
India_Menu <- read_csv("India_Menu.csv")
## Rows: 141 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): Menu Category, Menu Items, Per Serve Size
## dbl (10): Energy (kCal), Protein (g), Total fat (g), Sat Fat (g), Trans fat ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
DT::datatable(India_Menu)
db1<-India_Menu %>% select_if(is.numeric)
db1
## # A tibble: 141 × 10
##    `Energy (kCal)` `Protein (g)` `Total fat (g)` `Sat Fat (g)` `Trans fat (g)`
##              <dbl>         <dbl>           <dbl>         <dbl>           <dbl>
##  1            402.         10.2             13.8          5.34            0.16
##  2            340.          8.5             11.3          4.27            0.2 
##  3            653.         20.3             39.4         17.1             0.18
##  4            675.         21.0             39.1         19.7             0.26
##  5            512.         15.3             23.4         10.5             0.17
##  6            833.         24.2             37.9         16.8             0.28
##  7            356.          7.91            15.1          6.11            0.24
##  8            228.          5.45            11.4          5.72            0.09
##  9            401.         15.7             15.7          5.47            0.16
## 10            348.         15.4             14.2          5.79            0.21
## # … with 131 more rows, and 5 more variables: `Cholesterols (mg)` <dbl>,
## #   `Total carbohydrate (g)` <dbl>, `Total Sugars (g)` <dbl>,
## #   `Added Sugars (g)` <dbl>, `Sodium (mg)` <dbl>
db1$`Sodium (mg)`[is.na(db1$`Sodium (mg)`)] <- 152.025
db1
## # A tibble: 141 × 10
##    `Energy (kCal)` `Protein (g)` `Total fat (g)` `Sat Fat (g)` `Trans fat (g)`
##              <dbl>         <dbl>           <dbl>         <dbl>           <dbl>
##  1            402.         10.2             13.8          5.34            0.16
##  2            340.          8.5             11.3          4.27            0.2 
##  3            653.         20.3             39.4         17.1             0.18
##  4            675.         21.0             39.1         19.7             0.26
##  5            512.         15.3             23.4         10.5             0.17
##  6            833.         24.2             37.9         16.8             0.28
##  7            356.          7.91            15.1          6.11            0.24
##  8            228.          5.45            11.4          5.72            0.09
##  9            401.         15.7             15.7          5.47            0.16
## 10            348.         15.4             14.2          5.79            0.21
## # … with 131 more rows, and 5 more variables: `Cholesterols (mg)` <dbl>,
## #   `Total carbohydrate (g)` <dbl>, `Total Sugars (g)` <dbl>,
## #   `Added Sugars (g)` <dbl>, `Sodium (mg)` <dbl>
install.packages("psych")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
## (as 'lib' is unspecified)
library(psych)
corPlot(db1)

sumax=sum(db1$`Protein (g)`)
sumax
## [1] 1056.59
sumay=sum(db1$`Sodium (mg)`)
sumay
## [1] 50841
n=length(db1$`Protein (g)`)
n
## [1] 141
xpory = db1$`Protein (g)`*db1$`Sodium (mg)`
xpory
##   [1]  7230.7712  4635.3900 21803.2282 22793.1616 16083.9720 36961.2474
##   [7]  4584.6360  2129.5330 12000.7278  8191.5376 19926.0392 27357.5012
##  [13] 63060.1400 22974.3670  4741.5456  2329.1226  9435.9795  4908.7489
##  [19] 64570.2759 15144.5137  3141.8975  7066.8448 15900.5136  4853.3274
##  [25] 10923.5658 30339.2615   517.6470  1038.4241  2070.5204  1415.0668
##  [31]  2653.4144  5085.3855     0.0000    14.0000  1606.7673  1142.0416
##  [37]  8223.3186  5967.8610  8888.6640 12067.2500 18058.7384  7296.3055
##  [43]  5295.3640   531.2518     0.1664    54.4445     0.1664     0.5358
##  [49]     0.7085   544.1478  1136.0196  1629.6806   771.8080  1222.9490
##  [55]  1842.1104   998.1504  1605.5928  2184.0640   949.8060  1514.5453
##  [61]  2274.8992   373.2228  1125.3233  1594.5930  3009.5835   419.9220
##  [67]   482.6862     7.7504    10.8042    20.5842     4.9335     7.0760
##  [73]    13.4420     6.8338     9.6264    18.5328     2.7702     2.3328
##  [79]     2.3064   341.6060   940.8714  1280.9268  2005.5780   513.6899
##  [85]   742.3936   274.2945   306.5931    14.3752     3.8112     4.6340
##  [91]   225.2725  1053.0891    81.1522   139.0501   147.5100   385.2611
##  [97]    53.1454    98.9740   322.8480   680.7600   336.3934  1020.0440
## [103]   165.4965   540.2220 34108.4331 22945.2358 17298.3101 47761.7448
## [109] 75240.4769 50852.5712 32467.7628  3896.4007 14015.5533  3630.8816
## [115]  8169.4200     0.0000     0.0000     0.0000     0.0000     0.0000
## [121]     0.0000     0.0000     0.0000     0.0000     0.0000     0.0000
## [127]     0.0000    67.6856   101.2472    71.5768     0.0000     0.0000
## [133]   115.0864    29.4398     1.9572   211.5021     5.6840     0.0000
## [139]   547.5870     0.0588     7.0200
sumxpory = sum(xpory)
sumxpory
## [1] 872025.6
Sxy=sumxpory-(sumax*sumay)/n
Sxy
## [1] 491046.2
xalcuadrado=db1$`Protein (g)`*db1$`Protein (g)`
x_alcuadrado=sum(xalcuadrado)
x_alcuadrado
## [1] 17648.07
yalcuadrado=db1$`Sodium (mg)`*db1$`Sodium (mg)`
y_alcuadrado=sum(yalcuadrado)
y_alcuadrado
## [1] 49495212
Sxx= x_alcuadrado-sumax^2/n
Sxx
## [1] 9730.46
Syy= y_alcuadrado-sumay^2/n
Syy
## [1] 31163242
pendiente = Sxy/Sxx
pendiente
## [1] 50.46485
SSR=pendiente*Sxy
SSR
## [1] 24780573
SSE=Syy-SSR
SSE
## [1] 6382669
VarError=SSE/(n-2)
VarError
## [1] 45918.48
VarPend=VarError/Sxx
VarPend
## [1] 4.719045
VarInter=VarError*x_alcuadrado/(n*Sxx)
VarInter
## [1] 590.6526
regresion <- lm(`Sodium (mg)` ~ `Protein (g)`, data = db1)
summary(regresion)
## 
## Call:
## lm(formula = `Sodium (mg)` ~ `Protein (g)`, data = db1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1123.80   -61.84     1.98    73.12  1059.07 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -17.586     24.303  -0.724    0.471    
## `Protein (g)`   50.465      2.172  23.231   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 214.3 on 139 degrees of freedom
## Multiple R-squared:  0.7952, Adjusted R-squared:  0.7937 
## F-statistic: 539.7 on 1 and 139 DF,  p-value: < 2.2e-16
##si el p valor es menor a 0,05 se rechaza la hipotesis nula y se acepta la alternativa.
##Ho=0 (no hay efecto) Ha =! 0 (si hay efecto)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
## (as 'lib' is unspecified)
library(ggplot2)
## 
## Attaching package: 'ggplot2'
## 
## The following objects are masked from 'package:psych':
## 
##     %+%, alpha
library(ggpubr)
ggplot(db1, aes(x =`Protein (g)`, y =`Sodium (mg)`, color=`Protein (g)`)) +
  geom_point(color=7) +
  ggtitle("Proteina(g) vs Sodium(mg)")+
  xlab("Proteina(g)")+
  ylab("Sodium(mg)")+
  geom_smooth(method = "lm",se=T,col="green")+
  stat_regline_equation(label.y = 2000)+
  stat_cor(label.y = 1500)
## `geom_smooth()` using formula 'y ~ x'