This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
datas <- read.csv("C:\\Users\\karth\\Downloads\\Child Growth and Malnutrition.csv")
view(datas)
set.seed(30)
df <- datas[, c("Country.ISO.3.Code", "Age", "Sex", "Urban.Rural", "Stunting", "Underweight")]
df_1 <- sample_n(df, 19800, replace=TRUE)
df_2 <- sample_n(df, 19800, replace=TRUE)
df_3 <- sample_n(df, 19800, replace=TRUE)
df_4 <- sample_n(df, 19800, replace=TRUE)
df_5 <- sample_n(df, 19800, replace=TRUE)
df_6 <- sample_n(df, 19800, replace=TRUE)
df_7 <- sample_n(df, 19800, replace=TRUE)
view(df_1)
view(df_3)
print("df_1:")
## [1] "df_1:"
summary(df_1)
## Country.ISO.3.Code Age Sex Urban.Rural
## Length:19800 Length:19800 Length:19800 Length:19800
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Stunting Underweight
## Min. : 0.00 Min. : 0.000
## 1st Qu.:14.07 1st Qu.: 5.199
## Median :26.12 Median :13.380
## Mean :27.73 Mean :15.750
## 3rd Qu.:38.71 3rd Qu.:22.814
## Max. :90.62 Max. :78.700
## NA's :738 NA's :567
print("df_2:")
## [1] "df_2:"
summary(df_2)
## Country.ISO.3.Code Age Sex Urban.Rural
## Length:19800 Length:19800 Length:19800 Length:19800
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Stunting Underweight
## Min. : 0.00 Min. : 0.000
## 1st Qu.:14.27 1st Qu.: 5.337
## Median :26.56 Median :13.600
## Mean :27.96 Mean :15.979
## 3rd Qu.:39.11 3rd Qu.:23.148
## Max. :90.62 Max. :78.700
## NA's :734 NA's :557
print("df_3:")
## [1] "df_3:"
summary(df_3)
## Country.ISO.3.Code Age Sex Urban.Rural
## Length:19800 Length:19800 Length:19800 Length:19800
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Stunting Underweight
## Min. : 0.00 Min. : 0.000
## 1st Qu.:14.40 1st Qu.: 5.348
## Median :26.58 Median :13.598
## Mean :27.92 Mean :15.951
## 3rd Qu.:39.00 3rd Qu.:23.142
## Max. :90.70 Max. :77.900
## NA's :730 NA's :523
print("df_4:")
## [1] "df_4:"
summary(df_4)
## Country.ISO.3.Code Age Sex Urban.Rural
## Length:19800 Length:19800 Length:19800 Length:19800
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Stunting Underweight
## Min. : 0.00 Min. : 0.000
## 1st Qu.:14.20 1st Qu.: 5.237
## Median :26.28 Median :13.309
## Mean :27.80 Mean :15.739
## 3rd Qu.:38.84 3rd Qu.:22.738
## Max. :90.70 Max. :78.700
## NA's :732 NA's :559
print("df_5:")
## [1] "df_5:"
summary(df_5)
## Country.ISO.3.Code Age Sex Urban.Rural
## Length:19800 Length:19800 Length:19800 Length:19800
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Stunting Underweight
## Min. : 0.00 Min. : 0.000
## 1st Qu.:14.49 1st Qu.: 5.256
## Median :26.40 Median :13.488
## Mean :27.95 Mean :15.937
## 3rd Qu.:39.28 3rd Qu.:23.061
## Max. :86.50 Max. :75.500
## NA's :753 NA's :561
print("df_6:")
## [1] "df_6:"
summary(df_6)
## Country.ISO.3.Code Age Sex Urban.Rural
## Length:19800 Length:19800 Length:19800 Length:19800
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Stunting Underweight
## Min. : 0.00 Min. : 0.000
## 1st Qu.:14.15 1st Qu.: 5.275
## Median :26.29 Median :13.454
## Mean :27.68 Mean :15.711
## 3rd Qu.:38.98 3rd Qu.:22.796
## Max. :88.30 Max. :78.700
## NA's :754 NA's :525
print("df_7:")
## [1] "df_7:"
summary(df_7)
## Country.ISO.3.Code Age Sex Urban.Rural
## Length:19800 Length:19800 Length:19800 Length:19800
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Stunting Underweight
## Min. : 0.00 Min. : 0.000
## 1st Qu.:14.22 1st Qu.: 5.275
## Median :26.28 Median :13.302
## Mean :27.86 Mean :15.784
## 3rd Qu.:39.03 3rd Qu.:22.828
## Max. :90.62 Max. :77.900
## NA's :742 NA's :516
df_1_age = df_1 |>
group_by(Age, Country.ISO.3.Code) |>
summarise(Freq = n())
## `summarise()` has grouped output by 'Age'. You can override using the `.groups`
## argument.
df_1_age
## # A tibble: 1,492 × 3
## # Groups: Age [39]
## Age Country.ISO.3.Code Freq
## <chr> <chr> <int>
## 1 "" "Bayankhongor" 2
## 2 "" "MNG" 10
## 3 "" "MRT" 2
## 4 "" "Pacific Community\"" 16
## 5 "" "Statistics (NBS)" 15
## 6 "0. - 0.49" "AFG" 3
## 7 "0. - 0.49" "AGO" 3
## 8 "0. - 0.49" "ALB" 6
## 9 "0. - 0.49" "ARG" 6
## 10 "0. - 0.49" "ARM" 5
## # ℹ 1,482 more rows
head(df_1, 5)
## Country.ISO.3.Code Age Sex Urban.Rural Stunting
## 1 SWZ 0. - 4.99 BTSX BOTH 25.54947
## 2 PER 0. - 4.99 BTSX BOTH 17.59848
## 3 GHA 2. - 4.99 BTSX BOTH 32.96038
## 4 JAM 0. - 0.49 BTSX BOTH 14.62735
## 5 ETH 0. - 0.49 NUTRITION_FEMALE BOTH 15.62385
## Underweight
## 1 5.79140
## 2 3.26882
## 3 13.85938
## 4 10.78384
## 5 15.66706
head(df_2, 5)
## Country.ISO.3.Code Age Sex Urban.Rural Stunting
## 1 BRA 2. - 5.00 NUTRITION_MALE BOTH NA
## 2 NGA 0. - 4.99 BTSX BOTH 33.97850
## 3 SYC 0. - 1.99 NUTRITION_MALE BOTH 10.44304
## 4 ZMB 0. - 4.99 BTSX BOTH 64.09775
## 5 MLI 1. - 1.99 BTSX BOTH 34.43444
## Underweight
## 1 NA
## 2 14.06926
## 3 5.03641
## 4 28.62453
## 5 25.35185
head(df_3, 5)
## Country.ISO.3.Code Age Sex Urban.Rural Stunting
## 1 VNM 0. - 4.99 NUTRITION_MALE BOTH 45.46321
## 2 NER 0. - 1.99 BTSX BOTH 34.41766
## 3 VNM 0. - 4.99 BTSX BOTH 41.97952
## 4 ALB 0. - 4.99 BTSX BOTH 9.15313
## 5 NGA 1. - 1.99 NUTRITION_MALE BOTH 42.49993
## Underweight
## 1 31.86486
## 2 33.76796
## 3 23.38983
## 4 0.68091
## 5 28.03491
head(df_4, 5)
## Country.ISO.3.Code Age Sex Urban.Rural Stunting
## 1 LKA 0. - 4.99 BTSX BOTH 17.51699
## 2 TUR 0. - 4.99 NUTRITION_FEMALE BOTH 16.40309
## 3 GNQ 4. - 5.00 BTSX BOTH 65.30000
## 4 NER 0. - 0.49 BTSX BOTH 16.29320
## 5 SLV 0. - 1.99 NUTRITION_MALE BOTH 24.17783
## Underweight
## 1 20.82668
## 2 3.88967
## 3 14.40000
## 4 18.93707
## 5 8.92532
head(df_5, 5)
## Country.ISO.3.Code Age Sex Urban.Rural Stunting
## 1 LBN 0. - 5.00 BTSX BOTH 25.50000
## 2 NLD 0.50 - 0.99 NUTRITION_FEMALE BOTH 0.44843
## 3 BFA 0. - 4.99 BTSX BOTH 24.68031
## 4 KEN 0. - 0.49 BTSX BOTH 14.34295
## 5 CIV 0. - 4.99 BTSX BOTH 16.27907
## Underweight
## 1 4.80000
## 2 0.44643
## 3 19.02938
## 4 6.97639
## 5 9.39920
head(df_6, 5)
## Country.ISO.3.Code Age Sex Urban.Rural Stunting
## 1 NPL 0. - 4.99 BTSX BOTH 31.96446
## 2 CHN 0. - 5.00 NUTRITION_FEMALE BOTH 33.30000
## 3 SEN 0. - 4.99 BTSX BOTH 6.95566
## 4 GNB 0. - 4.99 NUTRITION_MALE BOTH 29.68094
## 5 TUR 3. - 3.99 NUTRITION_FEMALE BOTH 28.96636
## Underweight
## 1 26.74039
## 2 12.10000
## 3 6.45505
## 4 17.21606
## 5 9.63837
head(df_7, 5)
## Country.ISO.3.Code Age Sex Urban.Rural Stunting
## 1 MWI 3. - 3.99 NUTRITION_FEMALE BOTH 59.06151
## 2 EGY 0. - 4.99 BTSX BOTH 30.28890
## 3 CMR 0. - 4.99 BTSX BOTH 40.41050
## 4 TZA 0. - 0.49 NUTRITION_FEMALE BOTH 19.40409
## 5 THA 2. - 4.99 BTSX BOTH 15.08806
## Underweight
## 1 16.00159
## 2 8.26921
## 3 13.90295
## 4 7.88507
## 5 8.70966
The mean of “Underweight” is always between 15 and 16. The mean of “Stunting” is always between 27 ans 28. These values are consistent across all 7 samples
The anaomalies are not present, as the data is not completely tidy.
The above investigations indicate that “Underweight” and “Stunting” are distributed arounf 15.5 and 27.5 respectively. This gives us an idea of what we might consider as an outlier in thr future for these columns.