Data Preparation

library(tidyverse)
library("imputeTS")
library(bitops)
library(RCurl)

file_url <- getURL("https://raw.githubusercontent.com/jey1987/DATA606/master/Week6/2018-2010_export.csv")
india_trade <- data.frame(read.csv(text=file_url, header = TRUE),stringsAsFactors =FALSE)

india_trade_commodity_prep <- india_trade %>%
  select(Commodity,year) %>%
  filter(Commodity == "PHARMACEUTICAL PRODUCTS") %>%
  group_by(year,Commodity) %>%
  summarize(comm_cnt = n())

india_trade_export_prep <- india_trade %>%
  select(value,country,year) %>%
  group_by(year) %>%
  arrange(desc(value),.by_group=TRUE) %>%
  do(head(.,n=5))

Research question

Below are two questions

1. How many number of exports India is performing over the years and how does the trend look like? 2. Which countries are top 5 contributors to India’s export over time?

Cases

What are the cases, and how many are there? There are 137023 Observations and 5 variables

Data collection

Describe the method of data collection.

The data is collected by Kaggle and comprises of Indian export data from 2010 to 2018

Type of study

What type of study is this (observational/experiment)?

The Study is Observatory

Data Source

If you collected the data, state self-collected. If not, provide a citation/link.

Below is a link to the data source

https://www.kaggle.com/lakshyaag/india-trade-data

Dependent Variable

What is the response variable? Is it quantitative or qualitative?

Below are Response variable for two questions ,

  1. Commodity Count (Numeric Variable)
  2. Sum of export amount per Country in a given year

Independent Variable

You should have two independent variables, one quantitative and one qualitative.

  1. HS2 column will act like independent variable and Year will act like an experimental variable

Relevant summary statistics

Provide summary statistics for each the variables. Also include appropriate visualizations related to your research question (e.g. scatter plot, boxplots, etc). This step requires the use of R, hence a code chunk is provided below. Insert more code chunks as needed.

summary(india_trade_commodity_prep)
##       year     
##  Min.   :2010  
##  1st Qu.:2012  
##  Median :2014  
##  Mean   :2014  
##  3rd Qu.:2016  
##  Max.   :2018  
##                
##                                                                                                        Commodity
##  PHARMACEUTICAL PRODUCTS                                                                                    :9  
##  AIRCRAFT, SPACECRAFT, AND PARTS THEREOF.                                                                   :0  
##  ALBUMINOIDAL SUBSTANCES; MODIFIED STARCHES; GLUES; ENZYMES.                                                :0  
##  ALUMINIUM AND ARTICLES THEREOF.                                                                            :0  
##  ANIMAL OR VEGETABLE FATS AND OILS AND THEIR CLEAVAGE PRODUCTS; PRE. EDIBLE FATS; ANIMAL OR VEGETABLE WAXEX.:0  
##  ARMS AND AMMUNITION; PARTS AND ACCESSORIES THEREOF.                                                        :0  
##  (Other)                                                                                                    :0  
##     comm_cnt    
##  Min.   :211.0  
##  1st Qu.:212.0  
##  Median :213.0  
##  Mean   :212.9  
##  3rd Qu.:214.0  
##  Max.   :215.0  
## 
dim(india_trade_commodity_prep)
## [1] 9 3
head(india_trade_commodity_prep)
## # A tibble: 6 x 3
## # Groups:   year [6]
##    year Commodity               comm_cnt
##   <int> <fct>                      <int>
## 1  2010 PHARMACEUTICAL PRODUCTS      212
## 2  2011 PHARMACEUTICAL PRODUCTS      214
## 3  2012 PHARMACEUTICAL PRODUCTS      212
## 4  2013 PHARMACEUTICAL PRODUCTS      211
## 5  2014 PHARMACEUTICAL PRODUCTS      213
## 6  2015 PHARMACEUTICAL PRODUCTS      212
summary(india_trade_export_prep)
##      value              country        year     
##  Min.   : 4005   U ARAB EMTS:13   Min.   :2010  
##  1st Qu.: 6520   U S A      :12   1st Qu.:2012  
##  Median : 9550   HONG KONG  : 9   Median :2014  
##  Mean   : 9468   SINGAPORE  : 7   Mean   :2014  
##  3rd Qu.:11271   SAUDI ARAB : 2   3rd Qu.:2016  
##  Max.   :19805   UNSPECIFIED: 2   Max.   :2018  
##                  (Other)    : 0
dim(india_trade_export_prep)
## [1] 45  3
head(india_trade_export_prep)
## # A tibble: 6 x 3
## # Groups:   year [2]
##    value country      year
##    <dbl> <fct>       <int>
## 1 19805. U ARAB EMTS  2010
## 2 10174. UNSPECIFIED  2010
## 3  8670. HONG KONG    2010
## 4  5518. SINGAPORE    2010
## 5  5273. U S A        2010
## 6 18393. U ARAB EMTS  2011
ggplot(data=india_trade_commodity_prep, aes(x=year, y=comm_cnt, group=1)) +
  geom_line(linetype = "dashed",color="red")+
  geom_point() + theme_minimal() + theme(legend.position="top") +
  xlab("Year") + ylab("Number of Exports") + ggtitle("Number of Exports over years`") 

ggplot(data=india_trade_export_prep, aes(x=year, y=value, fill=country)) +
  geom_bar( stat="identity") + theme_minimal() + theme(axis.text.x = element_text(angle=90)) + 
  xlab("Countries") + ylab("Amount Exported") + ggtitle("Top 5 Amount Exported by Countries by Year ")