getwd()
## [1] "C:/Users/Jerome/Documents/Data_Science_110/Datasets"
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## -- Attaching packages ----------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.2 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'dplyr' was built under R version 4.0.2
## -- Conflicts -------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggplot2)
library(RColorBrewer)
lcn_fpf_co_names <-read.csv("lcn_fpf_co_names.csv")
lcn_fpf_co_names <-group_by(lcn_fpf_co_names, Svy_Year, Educ)
lcn_fpf_co_names_educ_summ <- summarize(lcn_fpf_co_names, MEASURE=mean(MEASURE))
## `summarise()` regrouping output by 'Svy_Year' (override with `.groups` argument)
the_measure <- "Comparison of Mean Person Measures by Mother's Education/Survey Year"
lcn_fpf_co_names_educ_summ$Educ<-as.factor(lcn_fpf_co_names_educ_summ$Educ)
levels(lcn_fpf_co_names_educ_summ$Educ)<- c("No Education", "Incomplete Primary", "Primary", "Incomplete Secondary", "Secondary", "Higher Education")
plot<-ggplot(lcn_fpf_co_names_educ_summ, aes(fill=as.factor(Educ), y=MEASURE, x=as.factor(Svy_Year))) +
geom_bar(position="dodge", stat="identity")+
labs(y="Mean Person Measure",x= "Survey Year") +
ggtitle(the_measure)
print(plot)
lcn_fpf_co_names <- ungroup(lcn_fpf_co_names)
lcn_fpf_co_names <-group_by(lcn_fpf_co_names, Svy_Year, Wealth)
lcn_fpf_co_names_wealth_summ <- summarize(lcn_fpf_co_names, MEASURE=mean(MEASURE))
## `summarise()` regrouping output by 'Svy_Year' (override with `.groups` argument)
the_measure <- "Comparison of Mean Person Measures by Wealth Category/Survey Year"
lcn_fpf_co_names_wealth_summ$Wealth<-as.factor(lcn_fpf_co_names_wealth_summ$Wealth)
levels(lcn_fpf_co_names_wealth_summ$Wealth)<- c("Poorest", "Poorer", "Middle", "Richer", "Richest")
plot<-ggplot(lcn_fpf_co_names_wealth_summ, aes(fill=as.factor(Wealth), y=MEASURE, x=as.factor(Svy_Year))) +
geom_bar(position="dodge", stat="identity") +
labs(y="Mean Person Measure",x= "Survey Year") +
ggtitle(the_measure)
print(plot)
## Another way to see the change in nutrition levels is to plot the measure by County to see how the measures changed in each County over the two years for which data are available. The chart indicates nutrition levels rose in only three counties, stayed constant in eight counties, and dropped in four counties. Given the increase in per capita GDP over the time period, this seems to indicate a concern.
lcn_fpf_co_names <-read.csv("lcn_fpf_co_names.csv")
data("lcn_fpf_co_names")
## Warning in data("lcn_fpf_co_names"): data set 'lcn_fpf_co_names' not found
the_measure <- "Measures of Child Nutrition in Liberian Counties, 2007 & 2013"
lcn_fpf_co_names %>%
ggplot(aes(Svy_Year, Co_Name, fill = MEASURE)) +
geom_tile(color = "grey50") +
scale_x_continuous(breaks=c(2007, 2013)) +
scale_fill_gradientn(colors = brewer.pal(9, "Reds"), trans = "sqrt") +
geom_vline(xintercept=2007, col = "blue") +
geom_vline(xintercept=2013, col = "blue") +
theme_minimal() + theme(panel.grid = element_blank()) +
ggtitle(the_measure) +
ylab("") +
xlab("")
library(ggrepel)
Item_Measures <-read.csv("Item_Measures.csv")
p1 <- ggplot(Item_Measures, aes(x= Item_Measures_2007, y = Item_Measures_2013, label = Food_Group))
p1 + geom_point()+ geom_smooth(method = "lm", formula =y~x)+
ggtitle ("Patterns of Foods Fed to Children in Liberia, 2007 and 2013") +
geom_text_repel (nudge_x = .005)+
xlim (0,100) +ylim (0,100)
t.test(lcn_fpf_co_names$MEASURE~lcn_fpf_co_names$Svy_Year)
##
## Welch Two Sample t-test
##
## data: lcn_fpf_co_names$MEASURE by lcn_fpf_co_names$Svy_Year
## t = 36.309, df = 19492, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 10.84161 12.07895
## sample estimates:
## mean in group 2007 mean in group 2013
## 49.40959 37.94930
fit4 <- lm(MEASURE ~ as.factor(UrbRur) +as.factor(Educ) +as.factor(Wealth), data = lcn_fpf_co_names)
summary(fit4)
##
## Call:
## lm(formula = MEASURE ~ as.factor(UrbRur) + as.factor(Educ) +
## as.factor(Wealth), data = lcn_fpf_co_names)
##
## Residuals:
## Min 1Q Median 3Q Max
## -53.614 -20.592 7.934 15.424 49.724
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.66065 0.50126 83.112 < 2e-16 ***
## as.factor(UrbRur)2 0.06121 0.41911 0.146 0.8839
## as.factor(Educ)1 -0.31553 0.37293 -0.846 0.3975
## as.factor(Educ)2 4.38239 0.84769 5.170 2.37e-07 ***
## as.factor(Educ)3 0.32681 0.55807 0.586 0.5581
## as.factor(Educ)4 1.21807 1.23126 0.989 0.3225
## as.factor(Educ)5 4.33243 2.16209 2.004 0.0451 *
## as.factor(Wealth)2 0.12967 0.42736 0.303 0.7616
## as.factor(Wealth)3 2.10815 0.47627 4.426 9.63e-06 ***
## as.factor(Wealth)4 3.72918 0.58120 6.416 1.43e-10 ***
## as.factor(Wealth)5 7.58124 0.80801 9.383 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.73 on 19711 degrees of freedom
## Multiple R-squared: 0.01143, Adjusted R-squared: 0.01093
## F-statistic: 22.79 on 10 and 19711 DF, p-value: < 2.2e-16
print(fit4)
##
## Call:
## lm(formula = MEASURE ~ as.factor(UrbRur) + as.factor(Educ) +
## as.factor(Wealth), data = lcn_fpf_co_names)
##
## Coefficients:
## (Intercept) as.factor(UrbRur)2 as.factor(Educ)1 as.factor(Educ)2
## 41.66065 0.06121 -0.31553 4.38239
## as.factor(Educ)3 as.factor(Educ)4 as.factor(Educ)5 as.factor(Wealth)2
## 0.32681 1.21807 4.33243 0.12967
## as.factor(Wealth)3 as.factor(Wealth)4 as.factor(Wealth)5
## 2.10815 3.72918 7.58124
library(mosaic)
## Warning: package 'mosaic' was built under R version 4.0.2
## Loading required package: lattice
## Loading required package: ggformula
## Warning: package 'ggformula' was built under R version 4.0.2
## Loading required package: ggstance
## Warning: package 'ggstance' was built under R version 4.0.2
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Warning: package 'mosaicData' was built under R version 4.0.2
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Registered S3 method overwritten by 'mosaic':
## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
##
## Have you tried the ggformula package for your plots?
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean
## The following object is masked from 'package:plotly':
##
## do
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following object is masked from 'package:purrr':
##
## cross
## The following object is masked from 'package:ggplot2':
##
## stat
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
## quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
lcn_sample_176 = resample(lcn_fpf_co_names, size = 176, replace = FALSE)
write.csv(lcn_sample_176, file = "lcn_sample_176.csv")
lcn_sample_176 <-read.csv("lcn_sample_176.csv", header = TRUE)
t.test(lcn_sample_176$MEASURE~lcn_sample_176$Svy_Year)
##
## Welch Two Sample t-test
##
## data: lcn_sample_176$MEASURE by lcn_sample_176$Svy_Year
## t = 3.9157, df = 165.52, p-value = 0.0001316
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 6.128047 18.593391
## sample estimates:
## mean in group 2007 mean in group 2013
## 49.02544 36.66472
fit5 <- lm(MEASURE ~ as.factor(UrbRur) +as.factor(Educ) +as.factor(Wealth), data = lcn_sample_176)
summary(fit5)
##
## Call:
## lm(formula = MEASURE ~ as.factor(UrbRur) + as.factor(Educ) +
## as.factor(Wealth), data = lcn_sample_176)
##
## Residuals:
## Min 1Q Median 3Q Max
## -51.282 -17.936 7.113 17.423 40.981
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 45.6242 4.8447 9.417 <2e-16 ***
## as.factor(UrbRur)2 -4.5954 4.3203 -1.064 0.289
## as.factor(Educ)1 -0.4001 3.9471 -0.101 0.919
## as.factor(Educ)2 7.6427 9.1522 0.835 0.405
## as.factor(Educ)3 -0.8092 6.1809 -0.131 0.896
## as.factor(Educ)4 -16.7027 16.7523 -0.997 0.320
## as.factor(Educ)5 3.7422 17.5044 0.214 0.831
## as.factor(Wealth)2 -2.8362 4.4019 -0.644 0.520
## as.factor(Wealth)3 -1.9745 5.0325 -0.392 0.695
## as.factor(Wealth)4 -1.9263 6.6434 -0.290 0.772
## as.factor(Wealth)5 5.6487 7.9581 0.710 0.479
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.58 on 165 degrees of freedom
## Multiple R-squared: 0.03313, Adjusted R-squared: -0.02547
## F-statistic: 0.5653 on 10 and 165 DF, p-value: 0.8405
print(fit5)
##
## Call:
## lm(formula = MEASURE ~ as.factor(UrbRur) + as.factor(Educ) +
## as.factor(Wealth), data = lcn_sample_176)
##
## Coefficients:
## (Intercept) as.factor(UrbRur)2 as.factor(Educ)1 as.factor(Educ)2
## 45.6242 -4.5954 -0.4001 7.6427
## as.factor(Educ)3 as.factor(Educ)4 as.factor(Educ)5 as.factor(Wealth)2
## -0.8092 -16.7027 3.7422 -2.8362
## as.factor(Wealth)3 as.factor(Wealth)4 as.factor(Wealth)5
## -1.9745 -1.9263 5.6487