library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
df <- read.csv("~/Downloads/ObesityDataSet_raw_and_data_sinthetic.csv", header=TRUE)
df |> group_by(NObeyesdad,family_history_with_overweight) |> summarise(count = n(), Mean_Value = mean(Weight))
## `summarise()` has grouped output by 'NObeyesdad'. You can override using the
## `.groups` argument.
## # A tibble: 13 × 4
## # Groups: NObeyesdad [7]
## NObeyesdad family_history_with_overweight count Mean_Value
## <chr> <chr> <int> <dbl>
## 1 Insufficient_Weight no 146 46.2
## 2 Insufficient_Weight yes 126 54.2
## 3 Normal_Weight no 132 61.0
## 4 Normal_Weight yes 155 63.1
## 5 Obesity_Type_I no 7 95.4
## 6 Obesity_Type_I yes 344 92.8
## 7 Obesity_Type_II no 1 93
## 8 Obesity_Type_II yes 296 115.
## 9 Obesity_Type_III yes 324 121.
## 10 Overweight_Level_I no 81 70.4
## 11 Overweight_Level_I yes 209 75.8
## 12 Overweight_Level_II no 18 82.0
## 13 Overweight_Level_II yes 272 82.1
ggplot(data = df[,c('family_history_with_overweight', 'NObeyesdad')] , aes(x = NObeyesdad, fill = family_history_with_overweight) ) + geom_bar(position = 'dodge',color = 'black') + theme_minimal()+theme(axis.text.x = element_text(angle = 45, hjust = 1))
gb <-(df |> group_by(Gender,FAVC,SCC) |> summarise(count = n()))
## `summarise()` has grouped output by 'Gender', 'FAVC'. You can override using
## the `.groups` argument.
gb
## # A tibble: 8 × 4
## # Groups: Gender, FAVC [4]
## Gender FAVC SCC count
## <chr> <chr> <chr> <int>
## 1 Female no no 116
## 2 Female no yes 27
## 3 Female yes no 857
## 4 Female yes yes 43
## 5 Male no no 91
## 6 Male no yes 11
## 7 Male yes no 951
## 8 Male yes yes 15
_ In Worldly sense, Males who are both careful and concisous about their food calories and track are very less in number. - The probability of having Male, FAVC = no, SCC = yes is very low compared to other combinations P = 11/2111
ggplot(gb,aes(x = interaction(FAVC, SCC), y = count, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal()
## Group By DataFrame_3 : Group by Gender,NObeyesdad and summarize
median AGE
gas <-(df |> group_by(df[,c('Gender','NObeyesdad')])) |> summarize(Median_Age = median(Age), count = n())
## `summarise()` has grouped output by 'Gender'. You can override using the
## `.groups` argument.
gas
## # A tibble: 14 × 4
## # Groups: Gender [2]
## Gender NObeyesdad Median_Age count
## <chr> <chr> <dbl> <int>
## 1 Female Insufficient_Weight 19.9 173
## 2 Female Normal_Weight 21 141
## 3 Female Obesity_Type_I 23 156
## 4 Female Obesity_Type_II 24.5 2
## 5 Female Obesity_Type_III 25.4 323
## 6 Female Overweight_Level_I 21.5 145
## 7 Female Overweight_Level_II 25.1 103
## 8 Male Insufficient_Weight 18 99
## 9 Male Normal_Weight 21 146
## 10 Male Obesity_Type_I 22.7 195
## 11 Male Obesity_Type_II 27.3 295
## 12 Male Obesity_Type_III 18 1
## 13 Male Overweight_Level_I 21.0 145
## 14 Male Overweight_Level_II 23.9 187
_ It can be deduced that, almost all of the Obesity_Type_III(execpt
1) are females.
- The probability of Male, Obesity_Type_III combination is very low,
i.e. P = 1/2111
ggplot(gas, aes(x = NObeyesdad, y = count, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
df |> group_by(SMOKE, CALC) |> summarise(count = n(), Median_Weight = median(Weight))
## `summarise()` has grouped output by 'SMOKE'. You can override using the
## `.groups` argument.
## # A tibble: 7 × 4
## # Groups: SMOKE [2]
## SMOKE CALC count Median_Weight
## <chr> <chr> <int> <dbl>
## 1 no Always 1 65
## 2 no Frequently 63 78.4
## 3 no Sometimes 1370 89.8
## 4 no no 633 80
## 5 yes Frequently 7 84
## 6 yes Sometimes 31 102
## 7 yes no 6 77.5
No
-
SMOKE
and always
- CALC
Alcohol
comsumption.Yes
- SMOKE
and
always
- CALC
Alcohol comsumption.CALC = Always
,
SMOKE = No
this combination is highly infrequent in the
dataset. P(no,Always) = 1/2111.ggplot((df |> group_by(SMOKE, CALC) |> summarise(count = n(), Median_Weight = median(Weight))), aes(x = CALC, y = count, fill = SMOKE)) +
geom_bar(stat = "identity", position = "dodge") +
theme_minimal()
## `summarise()` has grouped output by 'SMOKE'. You can override using the
## `.groups` argument.