Data Science Recording using dlply and brew package. Read UN dataset, create data for reporting
#read file
MyData <- read.csv(file="C:/Users/Banu/Documents/RScriptfiles/Datascienceincontext/SYB62_T07_Education_BrewPracticedataset.csv", header=TRUE, sep=",",stringsAsFactors = FALSE)
str(MyData)
## 'data.frame': 8629 obs. of 7 variables:
## $ ï..T07 : chr "Region/Country/Area" "1" "1" "1" ...
## $ Enrolment.in.primary..secondary.and.tertiary.education.levels: chr "" "Total, all countries or areas" "Total, all countries or areas" "Total, all countries or areas" ...
## $ X : chr "Year" "2005" "2005" "2005" ...
## $ X.1 : chr "Series" "Students enrolled in primary education (thousands)" "Gross enrollement ratio - Primary (male)" "Gross enrollment ratio - Primary (female)" ...
## $ X.2 : chr "Value" "678,991.61" "104.936" "99.9214" ...
## $ X.3 : chr "Footnotes" "" "" "" ...
## $ X.4 : chr "Source" "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ ...
head(MyData)
## ï..T07
## 1 Region/Country/Area
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
## Enrolment.in.primary..secondary.and.tertiary.education.levels X
## 1 Year
## 2 Total, all countries or areas 2005
## 3 Total, all countries or areas 2005
## 4 Total, all countries or areas 2005
## 5 Total, all countries or areas 2005
## 6 Total, all countries or areas 2005
## X.1 X.2
## 1 Series Value
## 2 Students enrolled in primary education (thousands) 678,991.61
## 3 Gross enrollement ratio - Primary (male) 104.936
## 4 Gross enrollment ratio - Primary (female) 99.9214
## 5 Students enrolled in secondary education (thousands) 509,245.76
## 6 Gross enrollment ratio - Secondary (male) 65.7318
## X.3
## 1 Footnotes
## 2
## 3
## 4
## 5
## 6
## X.4
## 1 Source
## 2 United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for Statistics (UIS) statistics database, last accessed March 2019.
## 3 United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for Statistics (UIS) statistics database, last accessed March 2019.
## 4 United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for Statistics (UIS) statistics database, last accessed March 2019.
## 5 United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for Statistics (UIS) statistics database, last accessed March 2019.
## 6 United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for Statistics (UIS) statistics database, last accessed March 2019.
names(MyData) <- c("CountryCode","Country", "Year","Series", "Value", "Footnotes","Source")
MyData1 <- MyData
MyData1$CountryCode <- as.numeric(as.character(MyData1$CountryCode))
## Warning: NAs introduced by coercion
MyData1$Value <- as.numeric(str_extract(gsub(",","",MyData1$Value),"[[:digit:]]+\\."))
str(MyData1)
## 'data.frame': 8629 obs. of 7 variables:
## $ CountryCode: num NA 1 1 1 1 1 1 1 1 1 ...
## $ Country : chr "" "Total, all countries or areas" "Total, all countries or areas" "Total, all countries or areas" ...
## $ Year : chr "Year" "2005" "2005" "2005" ...
## $ Series : chr "Series" "Students enrolled in primary education (thousands)" "Gross enrollement ratio - Primary (male)" "Gross enrollment ratio - Primary (female)" ...
## $ Value : num NA 678991 104 99 509245 ...
## $ Footnotes : chr "Footnotes" "" "" "" ...
## $ Source : chr "Source" "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ ...
df <- subset(MyData1, CountryCode %in% c(32,233))
#(28,32,31,36,51,233,288,292,300))
str(df)
## 'data.frame': 90 obs. of 7 variables:
## $ CountryCode: num 32 32 32 32 32 32 32 32 32 32 ...
## $ Country : chr "Argentina" "Argentina" "Argentina" "Argentina" ...
## $ Year : chr "2005" "2005" "2005" "2005" ...
## $ Series : chr "Students enrolled in primary education (thousands)" "Gross enrollement ratio - Primary (male)" "Gross enrollment ratio - Primary (female)" "Students enrolled in secondary education (thousands)" ...
## $ Value : num 4872 117 116 3884 89 ...
## $ Footnotes : chr "" "" "" "" ...
## $ Source : chr "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ "United Nations Educational, Scientific and Cultural Organization (UNESCO), Montreal, the UNESCO Institute for S"| __truncated__ ...
names(df)
## [1] "CountryCode" "Country" "Year" "Series" "Value"
## [6] "Footnotes" "Source"
You can also embed plots, for example:
popreportdata <- dlply(df,.(Country),function(df){
Country <- gsub(" ", "_", unique(df$Country))
filename <- function(y){
paste("graphs",Country,y,".pdf", sep = "")}
historic <- df
current <- subset(df, Year == 2017)
blabel <- c(0.01, 0.1, 1, 10, 100)
alabel <- "Population (in thousands)"
pbox <- ggplot(historic, aes(factor(Year),Value)) + geom_boxplot() + labs(x = "",y = alabel)
ggsave(filename("_box"), pbox, dpi = 100)
prank <- ggplot(current, aes(seq_along(Country),rev(sort(Value)))) + geom_point() + labs(x = "Rank", y = alabel)
ggsave(filename("_rank"), prank, dpi = 100)})
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image
## Saving 7 x 5 in image