library(tidyverse)
library("imputeTS")
library(bitops)
library(RCurl)
file_url <- getURL("https://raw.githubusercontent.com/jey1987/DATA606/master/Week6/2018-2010_export.csv")
india_trade <- data.frame(read.csv(text=file_url, header = TRUE),stringsAsFactors =FALSE)
india_trade_commodity_prep <- india_trade %>%
select(Commodity,year) %>%
filter(Commodity == "PHARMACEUTICAL PRODUCTS") %>%
group_by(year,Commodity) %>%
summarize(comm_cnt = n())
india_trade_export_prep <- india_trade %>%
select(value,country,year) %>%
group_by(year) %>%
arrange(desc(value),.by_group=TRUE) %>%
do(head(.,n=5))
Below are two questions
1. How many number of exports India is performing over the years and how does the trend look like? 2. Which countries are top 5 contributors to India’s export over time?
What are the cases, and how many are there? There are 137023 Observations and 5 variables
Describe the method of data collection.
The data is collected by Kaggle and comprises of Indian export data from 2010 to 2018
What type of study is this (observational/experiment)?
The Study is Observatory
If you collected the data, state self-collected. If not, provide a citation/link.
Below is a link to the data source
What is the response variable? Is it quantitative or qualitative?
Below are Response variable for two questions ,
You should have two independent variables, one quantitative and one qualitative.
Provide summary statistics for each the variables. Also include appropriate visualizations related to your research question (e.g. scatter plot, boxplots, etc). This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.
summary(india_trade_commodity_prep)
## year
## Min. :2010
## 1st Qu.:2012
## Median :2014
## Mean :2014
## 3rd Qu.:2016
## Max. :2018
##
## Commodity
## PHARMACEUTICAL PRODUCTS :9
## AIRCRAFT, SPACECRAFT, AND PARTS THEREOF. :0
## ALBUMINOIDAL SUBSTANCES; MODIFIED STARCHES; GLUES; ENZYMES. :0
## ALUMINIUM AND ARTICLES THEREOF. :0
## ANIMAL OR VEGETABLE FATS AND OILS AND THEIR CLEAVAGE PRODUCTS; PRE. EDIBLE FATS; ANIMAL OR VEGETABLE WAXEX.:0
## ARMS AND AMMUNITION; PARTS AND ACCESSORIES THEREOF. :0
## (Other) :0
## comm_cnt
## Min. :211.0
## 1st Qu.:212.0
## Median :213.0
## Mean :212.9
## 3rd Qu.:214.0
## Max. :215.0
##
dim(india_trade_commodity_prep)
## [1] 9 3
head(india_trade_commodity_prep)
## # A tibble: 6 x 3
## # Groups: year [6]
## year Commodity comm_cnt
## <int> <fct> <int>
## 1 2010 PHARMACEUTICAL PRODUCTS 212
## 2 2011 PHARMACEUTICAL PRODUCTS 214
## 3 2012 PHARMACEUTICAL PRODUCTS 212
## 4 2013 PHARMACEUTICAL PRODUCTS 211
## 5 2014 PHARMACEUTICAL PRODUCTS 213
## 6 2015 PHARMACEUTICAL PRODUCTS 212
summary(india_trade_export_prep)
## value country year
## Min. : 4005 U ARAB EMTS:13 Min. :2010
## 1st Qu.: 6520 U S A :12 1st Qu.:2012
## Median : 9550 HONG KONG : 9 Median :2014
## Mean : 9468 SINGAPORE : 7 Mean :2014
## 3rd Qu.:11271 SAUDI ARAB : 2 3rd Qu.:2016
## Max. :19805 UNSPECIFIED: 2 Max. :2018
## (Other) : 0
dim(india_trade_export_prep)
## [1] 45 3
head(india_trade_export_prep)
## # A tibble: 6 x 3
## # Groups: year [2]
## value country year
## <dbl> <fct> <int>
## 1 19805. U ARAB EMTS 2010
## 2 10174. UNSPECIFIED 2010
## 3 8670. HONG KONG 2010
## 4 5518. SINGAPORE 2010
## 5 5273. U S A 2010
## 6 18393. U ARAB EMTS 2011
ggplot(data=india_trade_commodity_prep, aes(x=year, y=comm_cnt, group=1)) +
geom_line(linetype = "dashed",color="red")+
geom_point() + theme_minimal() + theme(legend.position="top") +
xlab("Year") + ylab("Number of Exports") + ggtitle("Number of Exports over years`")
ggplot(data=india_trade_export_prep, aes(x=year, y=value, fill=country)) +
geom_bar( stat="identity") + theme_minimal() + theme(axis.text.x = element_text(angle=90)) +
xlab("Countries") + ylab("Amount Exported") + ggtitle("Top 5 Amount Exported by Countries by Year ")