# install.packages("tidyverse")
# install.packages("stringr")
# install.packages("lubridate")
# install.packages("wordcloud")
# install.packages("tidytext")
# install.packages("DT")
# install.packages("leaflet")
# install.packages("igraph")
# install.packages("ggraph")
# install.packages("topicmodels")
# install.packages("SnowballC")
# install.packages("tm")
library(tidyverse) # data manipulation and graphs
## -- Attaching packages --------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.3.0
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(stringr) # string manipulation
library(lubridate) # date manipulation
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(wordcloud) # wordcloud
## Loading required package: RColorBrewer
#library(tidytext) # tidy implementation of NLP methods
library(DT) # table format display of data
library(leaflet) # maps
library(igraph) # graphs
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:lubridate':
##
## %--%, union
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
##
## compose, simplify
## The following object is masked from 'package:tidyr':
##
## crossing
## The following object is masked from 'package:tibble':
##
## as_data_frame
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
#library(ggraph) # graphs
library(topicmodels) # for LDA topic modelling
library(tm) # general text mining functions, making document term matrixes
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:ggplot2':
##
## annotate
library(SnowballC) # for stemming
setwd("C:/Users/v-vyupad/Desktop/Analytics")
rm(list=ls())
fillColor = "#FFA07A"
fillColor2 = "#F1C40F"
loans <- read.csv("kiva_loans.csv")
regions <- read.csv("kiva_mpi_region_locations.csv")
themes <- read.csv("loan_theme_ids.csv")
themes_region <- read.csv("loan_themes_by_region.csv")
#Loan Data
datatable(head(loans), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
# Region Data
datatable(head(regions), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
# Themes Data
datatable(head(themes), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
# Regions Data
datatable(head(themes_region), style="bootstrap", class="table-condensed", options = list(dom = 'tp',scrollX = TRUE))
## Warning in instance$preRenderHook(instance): It seems your data is too
## big for client-side DataTables. You may consider server-side processing:
## https://rstudio.github.io/DT/server.html
# Most Poopular Themes
themes %>%
rename (themeType = `Loan.Theme.Type`) %>%
filter(!is.na(themeType)) %>%
group_by(themeType) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(themeType = reorder(themeType,Count)) %>%
head(10) %>%
ggplot(aes(x = themeType,y = Count)) +
geom_bar(stat='identity',colour="white", fill = fillColor) +
geom_text(aes(x = themeType, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Type of Theme',
y = 'Count',
title = 'Type of Theme and Count') +
coord_flip() +
theme_bw()

# Most Popular Regions
themes_region %>%
group_by(country) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(country = reorder(country,Count)) %>%
head(10) %>%
ggplot(aes(x = country,y = Count)) +
geom_bar(stat='identity',colour="white", fill = fillColor2) +
geom_text(aes(x = country, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Country',
y = 'Count',
title = 'Country and Count') +
coord_flip() +
theme_bw()

# Most popular Sector### Working
themes_region %>%
group_by(sector) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(sector = reorder(sector,Count)) %>%
head(10) %>%
ggplot(aes(x = sector,y = Count)) +
geom_bar(stat='identity',colour="white", fill = fillColor2) +
geom_text(aes(x = sector, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Sector',
y = 'Count',
title = 'Sector and Count') +
coord_flip() +
theme_bw()

# Maps of Loan
leaflet(themes_region) %>% addProviderTiles("Esri.NatGeoWorldMap") %>%
addCircles(lng = ~lon, lat = ~lat,radius = ~(amount/10) ,
color = ~c("red")) %>%
# controls
setView(lng=0, lat=0,zoom = 2)
## Warning in validateCoords(lng, lat, funcName): Data contains 2074 rows with
## either missing or invalid lat/lon values and will be ignored
# Loans in Philipiens
country_loans = themes_region %>%
filter(country == "Philippines") %>%
rename (themeType = `Loan.Theme.Type`)
country_loans$themeType = as.factor(country_loans$themeType)
colorsList = c("#e6194b",
"#3cb44b",
"#ffe119",
"#0082c8",
"#f58231",
"#911eb4",
"#46f0f0",
"#f032e6",
"#d2f53c",
"#fabebe",
"#008080",
"#e6beff",
"#aa6e28",
"#fffac8",
"#800000",
"#aaffc3",
"#808000",
"#ffd8b1",
"#000080",
"#808080",
"#FFFFFF",
"#000000"
)
factpal <- colorFactor(colorsList, country_loans$themeType)
center_lon = median(country_loans$lon,na.rm = TRUE)
center_lat = median(country_loans$lat,na.rm = TRUE)
leaflet(country_loans) %>% addProviderTiles("Esri.NatGeoWorldMap") %>%
addCircles(lng = ~lon, lat = ~lat,radius = ~(amount/10) ,
color = ~factpal(themeType)) %>%
# controls
setView(lng=center_lon, lat=center_lat,zoom = 6) %>%
#legends for the map
addLegend("bottomright", pal = factpal, values = ~themeType,
title = "Loans Themes",
opacity = 1)
## Warning in validateCoords(lng, lat, funcName): Data contains 384 rows with
## either missing or invalid lat/lon values and will be ignored
# Distribution of Funded Loan
loans %>%
ggplot(aes(x = funded_amount) )+
scale_x_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
geom_histogram(fill = fillColor2,bins=50) +
labs(x = 'Funded Loan Amount' ,y = 'Count', title = paste("Distribution of", "Funded Loan Amount")) +
theme_bw()
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 3383 rows containing non-finite values (stat_bin).
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 6 rows containing missing values (geom_bar).

# Distribution of Funded Loan Amount by Sector
loans %>%
mutate( fill = as.factor(sector)) %>%
ggplot(aes(x = sector, y= funded_amount, fill = sector)) +
scale_y_log10(
breaks = scales::trans_breaks("log10", function(x) 10^x),
labels = scales::trans_format("log10", scales::math_format(10^.x))
) +
geom_boxplot() +
labs(x= 'Sector Type',y = 'Funded Amount',
title = paste("Distribution of", ' Funded Amount ')) +
theme_bw() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Removed 3383 rows containing non-finite values (stat_boxplot).

# Distribution of Loan by Gender
loans %>%
filter(!is.na(borrower_genders)) %>%
group_by(borrower_genders) %>%
summarise(Count = n()) %>%
arrange(desc(Count)) %>%
ungroup() %>%
mutate(borrower_genders = reorder(borrower_genders,Count)) %>%
head(10) %>%
ggplot(aes(x = borrower_genders,y = Count)) +
geom_bar(stat='identity',colour="white", fill = fillColor2) +
geom_text(aes(x = borrower_genders, y = 1, label = paste0("(",Count,")",sep="")),
hjust=0, vjust=.5, size = 4, colour = 'black',
fontface = 'bold') +
labs(x = 'Gender',
y = 'Count',
title = 'Gender and Count') +
coord_flip() +
theme_bw()
