Problem 1:
These files contain complete loan data for all loans issued through the time period stated, including the current loan status (Current, Late, Fully Paid, etc.) and latest payment information. The file containing loan data through the “present” contains complete loan data for all loans issued through the previous completed calendar quarter.
Insights required
Load required data
Load required packages
Manipulations on data
loans_sample <- sample_n(loans,100)
loans_summary <-
loans_sample %>% select(c(id,loan_amnt,funded_amnt,term,int_rate,emp_length,home_ownership,annual_inc,verification_status,issue_d,loan_status,purpose,total_pymnt_inv)) %>%
mutate(term1=as.numeric(str_trim(str_replace(term,"months","")))) %>%
mutate(issue_date=as.Date(str_replace(paste(as.character(issue_d),-2015)," ",""),"%d-%b-%Y")) %>%
select(-term,issue_d) %>%
mutate(emp_length=str_replace(emp_length,c("n/a"),"NA")) %>%
mutate(total_pymnt=(funded_amnt+(funded_amnt*(as.numeric(str_replace(as.character(int_rate),"%","")))*(term1/12))/100)) %>%
group_by(purpose) %>% summarize(summary = sum(total_pymnt)) %>% ungroup(purpose)
remove(loans_sample1,test)
## Warning in remove(loans_sample1, test): object 'loans_sample1' not found
## Warning in remove(loans_sample1, test): object 'test' not found
plot_ly(loans_summary,x=~purpose,y=~summary)
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#bar
Problem 2:
This data comes from the Open Food Facts database, a free, open, and collaborative database of food products around the world.
Insights required
foodfacts <- read.csv("Data/FoodFacts.csv",header = TRUE,sep = ",",nrows = 100,na.strings = c("",NA,"NA","n/a"),skipNul = TRUE)
foodfacts_nona <- foodfacts[,!apply(foodfacts, 2, function(x) all(is.na(x)))] %>% mutate_each(funs(replace(., is.na(.), 0)), c(energy_100g:nutrition_score_uk_100g)) %>%
mutate(totalcal=energy_100g + fat_100g + trans_fat_100g + carbohydrates_100g + starch_100g + proteins_100g + sodium_100g + vitamin_c_100g + calcium_100g + fruits_vegetables_nuts_100g + nutrition_score_uk_100g + energy_from_fat_100g + saturated_fat_100g + cholesterol_100g + sugars_100g + fiber_100g + salt_100g + vitamin_a_100g + vitamin_b9_100g + iron_100g + nutrition_score_fr_100g+nutrition_score_uk_100g) %>% select(-creator,-(energy_100g:nutrition_score_uk_100g),-(ingredients_text:main_category_en),-created_t,-last_modified_t,-generic_name,-packaging,-brands,-categories,-categories_en,-(origins:countries_tags),-image_small_url,-url) %>% arrange(code) %>% mutate(image_url=na.locf(image_url)) %>% filter(! is.na(countries_en) ) %>% spread(countries_en,totalcal) %>% mutate_each(funs(replace(.,is.na(.),0)),France,`France,United Kingdom`,Germany,Spain,`United Kingdom`,`United States`) %>% select(code,product_name,France:`United States`)
#If image is required
#foodfacts_nona$pic <- sprintf('',foodfacts_nona$url)
#kable(head(foodfacts_nona),format="html")
kable(head(foodfacts_nona))
| code | product_name | France | France,United Kingdom | Germany | Spain | United Kingdom | United States |
|---|---|---|---|---|---|---|---|
| 104 | 30 Panach’ Fruits | 1877.878 | 0 | 0 | 0 | 0.0000 | 0 |
| 758 | Cauliflower | 0.000 | 0 | 0 | 0 | 156.1431 | 0 |
| 1373 | Spring Onions | 0.000 | 0 | 0 | 0 | 0.0000 | 0 |
| 2929 | Flat Leaf Parsley | 0.000 | 0 | 0 | 0 | 0.0000 | 0 |
| 3100 | Chair à saucisse | 0.000 | 0 | 0 | 0 | 0.0000 | 0 |
| 4394 | tree ripened papaya | 0.000 | 0 | 0 | 0 | 0.0000 | 0 |
Problem 3:
Medical historical spend for each year.
healthspend <- read.csv("Data/NHE2014.csv",header = TRUE,sep = ",",skip=1 , strip.white = TRUE, blank.lines.skip = TRUE, na.strings = c("-",""),stringsAsFactors = FALSE)
healthspend$`Expenditure.Amount..Millions.` <- as.factor(healthspend$`Expenditure.Amount..Millions.`)
expense_year <- healthspend[!apply(healthspend,1,function(x) all(is.na(x))),] %>% rename(expense_type=`Expenditure.Amount..Millions.`) %>%
mutate_each(funs(replace(.,is.na(.),0)), c(X1960:X2014)) %>%
mutate_each(funs(str_replace(.,",","")), c(X1960:X2014)) %>%
mutate_each(funs(as.numeric(.)), c(X1960:X2014)) %>%
gather(xyear,value = "amount",-expense_type) %>% filter(amount>0) %>%
mutate(year=str_sub(xyear,2,5)) %>% select(-xyear)
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
## Warning in eval(substitute(expr), envir, enclos): NAs introduced by
## coercion
summary_expense <- expense_year %>% group_by(year) %>% summarise(sum=sum(amount))
ggplot(summary_expense,aes(year,sum)) + geom_point(size=3,alpha=1/2) +geom_smooth(method="lm")
plot_ly(expense_year,x=~year,y=~amount)
## No trace type specified:
## Based on info supplied, a 'bar' trace seems appropriate.
## Read more about this trace type -> https://plot.ly/r/reference/#bar