In this project educational investment in high income countries and crime index of year 2019 is going to be analyzed.
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.4 v dplyr 1.0.7
## v tidyr 1.1.3 v stringr 1.4.0
## v readr 2.0.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(XML)
library(dplyr)
library(shiny)
library("wesanderson")
rsconnect::setAccountInfo(name='maliati',token='6362933A3F7D3E0E3D665BE0FF6F9878',secret='0vsr2VidAXUthdEM+J+UDpce0oIp1JfM2XrMtQGL')
Data Wrangling
worldcrime<-read_html("https://www.numbeo.com/crime/rankings_by_country.jsp?title=2019")
crimetable<-worldcrime %>% html_table(fill = TRUE)
crimtab2<-crimetable[[2]]
dataIncomeGroup <- read.csv("https://raw.githubusercontent.com/maliat-hossain/FileProcessing/main/Metadata_Country_API_NY.GDP.MKTP.CD_DS2_en_csv_v2_3930485.csv")
Dataedu<- read.csv("https://raw.githubusercontent.com/maliat-hossain/FileProcessing/main/World%20Education%20Spending%20Data.csv")
IncomebyCountry<- select(dataIncomeGroup,c(3,5))
CrimeIndex<- select(crimtab2,c(2,3))
IncomebyCountry <- IncomebyCountry %>%
rename(Country = TableName)
colnames(Dataedu) <- c("Country", "Recent Year","GDP Spending Percentage")
IncomeEdu<-inner_join(IncomebyCountry, Dataedu, by = "Country")
IncomeEduCrime<-inner_join(IncomeEdu,CrimeIndex, by = "Country")
#colnames(IncomeEduCrime)[3] <- "Education Investment Percentage in 2019"
scatter Plot
library(ggplot2)
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
# linear trend + confidence interval
p<- ggplot(IncomeEduCrime1, aes(x=IncomeEduCrime1$`GDP Spending Percentage`, y=IncomeEduCrime1$`Crime Index of Year 2019`)) +
geom_point() +
geom_smooth(method=lm , color="red", fill="#69b3a2", se=TRUE) +
theme_ipsum()
## Warning: Use of `IncomeEduCrime1$`GDP Spending Percentage`` is discouraged. Use
## `GDP Spending Percentage` instead.
## Warning: Use of `IncomeEduCrime1$`Crime Index of Year 2019`` is discouraged. Use
## `Crime Index of Year 2019` instead.
## Warning: Use of `IncomeEduCrime1$`GDP Spending Percentage`` is discouraged. Use
## `GDP Spending Percentage` instead.
## Warning: Use of `IncomeEduCrime1$`Crime Index of Year 2019`` is discouraged. Use
## `Crime Index of Year 2019` instead.
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_stringMetric, as.graphicsAnnot(x$label)): font family not
## found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, : font
## family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Transforming Numeric Values to Categorical
# mean value removing NA
IncomeEduCrime1<-na.omit(IncomeEduCrime1)
mean(IncomeEduCrime1$`GDP Spending Percentage`)
## [1] 5.029286
mean(IncomeEduCrime1$`Crime Index of Year 2019`)
## [1] 33.31214
#IncomeEduCrime1 %>% add_row(IncomeGroup = "High income",Country = "United States", `Recent Year` = "2020",`GDP Spending Percentage`= "4.96", `Crime Index of Year 2019`= "47.13")
IncomeEduCrime1[nrow(IncomeEduCrime1) + 1,] <- c("High income", "United States","2020", 4.96,47.13)
#IncomeEduCrime4<-IncomeEduCrime1
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
#IncomeEduCrime4$`Crime Index of Year 2019`<-arules::discretize(IncomeEduCrime4$`Crime Index of Year 2019`, breaks = 3, labels = c("Low Crime Rate","Medium Crime Rate","High Crime Rate"))
#IncomeEduCrime4$`GDP Spending Percentage`<-arules::discretize(IncomeEduCrime4$`GDP Spending Percentage`, breaks = 3, labels = c("Low Education Spending","Medium Education Spending","High Education Spending"))
DataRede<- IncomeEduCrime1
DataRede$`GDP Spending Percentage Type` <- as.factor(ifelse(DataRede$`GDP Spending Percentage`>= 5.02, "High Spending", "Low Spending"))
DataRede$`Crime Index of Year 2019 Type` <- as.factor(ifelse(DataRede$`Crime Index of Year 2019`>= 33.31, "High Crime Rate", "Low Crime Rate"))
#DataRede$`Crime Index of Year 2019`<-arules::discretize(DataRede$`Crime Index of Year 2019`, breaks = 2, labels = c("Low Crime Rate","Medium Crime Rate","High Crime Rate"))
#IncomeEduCrime4$`GDP Spending Percentage`<-arules::discretize(IncomeEduCrime4$`GDP Spending Percentage`, breaks = 3, labels = c("Low Education Spending","Medium Education Spending","High Education Spending"))