Project 2. Read 3 datasets and transform them from wide to long format and perform analysis.
library (curl)
## Warning: package 'curl' was built under R version 3.2.2
library (ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.2
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.2.2
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
setwd("C://users//angus//Documents//IS607//Project2")
hr<- read.csv("C://users//angus//Documents//IS607//Project2//hr.csv")
hr2<-subset(hr,Role!='ALL')
hr2
## Role Location Current_HeadCount Q1_2015 Q2_2015 Q3_2015
## 1 Administration US 33 39 47 55
## 2 Administration DZ 33 39 47 55
## 3 Administration GF 34 40 48 57
## 4 Sales US 66 78 93 111
## 5 Sales DZ 66 78 93 111
## 6 Sales GF 67 80 95 113
## 7 Product US 66 78 93 111
## 8 Product DZ 66 78 93 111
## 9 Product GF 67 80 95 113
## 10 Engineering US 100 119 141 168
## 11 Engineering DZ 100 119 141 168
## 12 Engineering GF 100 119 141 168
## 13 Services US 17 20 24 29
## 14 Services DZ 17 20 24 29
## 15 Services GF 18 21 25 30
## 16 Operations US 50 59 71 84
## 17 Operations DZ 50 59 71 84
## 18 Operations GF 50 59 71 84
## Q4_2015
## 1 66
## 2 66
## 3 68
## 4 132
## 5 132
## 6 134
## 7 132
## 8 132
## 9 134
## 10 200
## 11 200
## 12 200
## 13 34
## 14 34
## 15 36
## 16 100
## 17 100
## 18 100
grade<-read.csv("C://users//angus//Documents//IS607//Project2//schoolgrade.csv")
grade
## Student English Math Science Social.Studies Technology
## 1 Alicia Cruz 46 47 94 96 84
## 2 Solomon Tunde 64 84 43 60 83
## 3 Daniel Zainea 47 82 52 58 96
## 4 Thomas Brown 96 50 58 71 100
## 5 Lisa Giamanco 40 76 50 85 78
## 6 Cheryl Haridat 75 78 98 41 88
## 7 Winston Cadiz 90 73 90 75 45
## 8 Pablo Romero 91 50 85 68 72
## 9 Jennifer WeiI 98 63 43 58 40
## 10 Ronald Rosario 98 40 98 75 81
## 11 Martha carpio 69 60 52 60 47
## 12 Jasmine carpio 99 86 49 89 93
## 13 Raul Martin 42 59 54 48 80
## 14 Desio Trump 41 42 69 92 55
## Days.Present
## 1 169
## 2 121
## 3 146
## 4 95
## 5 118
## 6 159
## 7 175
## 8 101
## 9 107
## 10 94
## 11 171
## 12 140
## 13 174
## 14 167
grade2 <-grade
names(grade2)[5] <- "Social_Studies"
names(grade2)[7] <- "Days_Present"
grade2
## Student English Math Science Social_Studies Technology
## 1 Alicia Cruz 46 47 94 96 84
## 2 Solomon Tunde 64 84 43 60 83
## 3 Daniel Zainea 47 82 52 58 96
## 4 Thomas Brown 96 50 58 71 100
## 5 Lisa Giamanco 40 76 50 85 78
## 6 Cheryl Haridat 75 78 98 41 88
## 7 Winston Cadiz 90 73 90 75 45
## 8 Pablo Romero 91 50 85 68 72
## 9 Jennifer WeiI 98 63 43 58 40
## 10 Ronald Rosario 98 40 98 75 81
## 11 Martha carpio 69 60 52 60 47
## 12 Jasmine carpio 99 86 49 89 93
## 13 Raul Martin 42 59 54 48 80
## 14 Desio Trump 41 42 69 92 55
## Days_Present
## 1 169
## 2 121
## 3 146
## 4 95
## 5 118
## 6 159
## 7 175
## 8 101
## 9 107
## 10 94
## 11 171
## 12 140
## 13 174
## 14 167
trade <- read.csv("C://users//angus//Documents//IS607//Project2//trade.csv")
trade2<- trade
trade2[,] <-lapply(trade2[,],function(trade2){gsub(",","",trade2)} )
trade2
## Period X X2013 X.1 X2014 X.2 X2015 X.3
## 1 Exports Imports Exports Imports Exports Imports
## 2
## 3 January Goods 131228 191745 133738 193706 129292 192242
## 4 Services 56883 37995 59141 38635 59733 40371
## 5
## 6 February Goods 132084 193731 131768 193060 126329 184370
## 7 Services 56828 37935 57726 39270 59560 40057
## 8
## 9 March Goods 130093 186326 135923 198973 127183 198347
## 10 Services 56647 37938 58837 38908 59583 40581
## 11
## 12 April Goods 131468 189507 135556 199877 129376 190967
## 13 Services 56592 38318 59468 39417 59766 40432
names(trade2)[2] <- "Type"
names(trade2)[3:8] <- c("2013:e","2013:i","2014:e","2014:i","2015:e","2015:i")
trade3 <-trade2[2:13,]
trade3 <-trade3[complete.cases(trade3),]
#remove blank rows
trade3 <- trade3[!(is.na(trade3$`2015:i`) |trade3$`2015:i` ==""),]
trade3[2,1] <- "January"
trade3[4,1] <- "February"
trade3[6,1] <- "March"
trade3[8,1] <- "April"
head(trade3)
## Period Type 2013:e 2013:i 2014:e 2014:i 2015:e 2015:i
## 3 January Goods 131228 191745 133738 193706 129292 192242
## 4 January Services 56883 37995 59141 38635 59733 40371
## 6 February Goods 132084 193731 131768 193060 126329 184370
## 7 February Services 56828 37935 57726 39270 59560 40057
## 9 March Goods 130093 186326 135923 198973 127183 198347
## 10 March Services 56647 37938 58837 38908 59583 40581
trade4 <- trade3 %>% gather (year,amount,3:8)
head(trade4)
## Period Type year amount
## 1 January Goods 2013:e 131228
## 2 January Services 2013:e 56883
## 3 February Goods 2013:e 132084
## 4 February Services 2013:e 56828
## 5 March Goods 2013:e 130093
## 6 March Services 2013:e 56647
trade4$tradecat<-sub(".*:","",trade4$year)
trade4$year <-sub(":.*","",trade4$year)
head(trade4)
## Period Type year amount tradecat
## 1 January Goods 2013 131228 e
## 2 January Services 2013 56883 e
## 3 February Goods 2013 132084 e
## 4 February Services 2013 56828 e
## 5 March Goods 2013 130093 e
## 6 March Services 2013 56647 e
Conclusion: The final table shows that American companies have strong export in service industry.
trade5 <-trade4%>% spread(tradecat,amount)
head(trade5)
## Period Type year e i
## 1 April Goods 2013 131468 189507
## 2 April Goods 2014 135556 199877
## 3 April Goods 2015 129376 190967
## 4 April Services 2013 56592 38318
## 5 April Services 2014 59468 39417
## 6 April Services 2015 59766 40432
trade5 <-trade5[order(trade5$Type),]
tbl_df(trade5)
## Source: local data frame [24 x 5]
##
## Period Type year e i
## 1 April Goods 2013 131468 189507
## 2 April Goods 2014 135556 199877
## 3 April Goods 2015 129376 190967
## 4 February Goods 2013 132084 193731
## 5 February Goods 2014 131768 193060
## 6 February Goods 2015 126329 184370
## 7 January Goods 2013 131228 191745
## 8 January Goods 2014 133738 193706
## 9 January Goods 2015 129292 192242
## 10 March Goods 2013 130093 186326
## .. ... ... ... ... ...
trade5$e <- as.numeric(trade5$e)
trade5$i <- as.numeric(trade5$i)
trade5$exportratio <-(trade5$e/trade5$i)
trade5$exportratio <-(trade5$e/trade5$i)*100
names(trade5)[4:5]<-c("export","import")
trade5
## Period Type year export import exportratio
## 1 April Goods 2013 131468 189507 69.37369
## 2 April Goods 2014 135556 199877 67.81971
## 3 April Goods 2015 129376 190967 67.74783
## 7 February Goods 2013 132084 193731 68.17907
## 8 February Goods 2014 131768 193060 68.25236
## 9 February Goods 2015 126329 184370 68.51928
## 13 January Goods 2013 131228 191745 68.43881
## 14 January Goods 2014 133738 193706 69.04174
## 15 January Goods 2015 129292 192242 67.25481
## 19 March Goods 2013 130093 186326 69.82010
## 20 March Goods 2014 135923 198973 68.31228
## 21 March Goods 2015 127183 198347 64.12146
## 4 April Services 2013 56592 38318 147.69038
## 5 April Services 2014 59468 39417 150.86891
## 6 April Services 2015 59766 40432 147.81856
## 10 February Services 2013 56828 37935 149.80361
## 11 February Services 2014 57726 39270 146.99771
## 12 February Services 2015 59560 40057 148.68812
## 16 January Services 2013 56883 37995 149.71180
## 17 January Services 2014 59141 38635 153.07623
## 18 January Services 2015 59733 40371 147.96017
## 22 March Services 2013 56647 37938 149.31467
## 23 March Services 2014 58837 38908 151.22083
## 24 March Services 2015 59583 40581 146.82487
library(ggplot2)
hr2
## Role Location Current_HeadCount Q1_2015 Q2_2015 Q3_2015
## 1 Administration US 33 39 47 55
## 2 Administration DZ 33 39 47 55
## 3 Administration GF 34 40 48 57
## 4 Sales US 66 78 93 111
## 5 Sales DZ 66 78 93 111
## 6 Sales GF 67 80 95 113
## 7 Product US 66 78 93 111
## 8 Product DZ 66 78 93 111
## 9 Product GF 67 80 95 113
## 10 Engineering US 100 119 141 168
## 11 Engineering DZ 100 119 141 168
## 12 Engineering GF 100 119 141 168
## 13 Services US 17 20 24 29
## 14 Services DZ 17 20 24 29
## 15 Services GF 18 21 25 30
## 16 Operations US 50 59 71 84
## 17 Operations DZ 50 59 71 84
## 18 Operations GF 50 59 71 84
## Q4_2015
## 1 66
## 2 66
## 3 68
## 4 132
## 5 132
## 6 134
## 7 132
## 8 132
## 9 134
## 10 200
## 11 200
## 12 200
## 13 34
## 14 34
## 15 36
## 16 100
## 17 100
## 18 100
hr3<-hr2[,1:3]
hr4<-hr3 %>%spread(Location,Current_HeadCount)
hr4$total <- hr4$DZ+hr4$GF+hr4$US
hr4
## Role DZ GF US total
## 1 Administration 33 34 33 100
## 2 Engineering 100 100 100 300
## 3 Operations 50 50 50 150
## 4 Product 66 67 66 199
## 5 Sales 66 67 66 199
## 6 Services 17 18 17 52
ggplot(data=hr4, aes(x=hr4$Role, y=hr4$total)) + geom_bar(stat="identity", position = "dodge")
grade2
## Student English Math Science Social_Studies Technology
## 1 Alicia Cruz 46 47 94 96 84
## 2 Solomon Tunde 64 84 43 60 83
## 3 Daniel Zainea 47 82 52 58 96
## 4 Thomas Brown 96 50 58 71 100
## 5 Lisa Giamanco 40 76 50 85 78
## 6 Cheryl Haridat 75 78 98 41 88
## 7 Winston Cadiz 90 73 90 75 45
## 8 Pablo Romero 91 50 85 68 72
## 9 Jennifer WeiI 98 63 43 58 40
## 10 Ronald Rosario 98 40 98 75 81
## 11 Martha carpio 69 60 52 60 47
## 12 Jasmine carpio 99 86 49 89 93
## 13 Raul Martin 42 59 54 48 80
## 14 Desio Trump 41 42 69 92 55
## Days_Present
## 1 169
## 2 121
## 3 146
## 4 95
## 5 118
## 6 159
## 7 175
## 8 101
## 9 107
## 10 94
## 11 171
## 12 140
## 13 174
## 14 167
grade2$avg<-(grade2$English +grade2$Math+grade2$Science+grade2$Social_Studies+grade2$Technology)/5
grade2
## Student English Math Science Social_Studies Technology
## 1 Alicia Cruz 46 47 94 96 84
## 2 Solomon Tunde 64 84 43 60 83
## 3 Daniel Zainea 47 82 52 58 96
## 4 Thomas Brown 96 50 58 71 100
## 5 Lisa Giamanco 40 76 50 85 78
## 6 Cheryl Haridat 75 78 98 41 88
## 7 Winston Cadiz 90 73 90 75 45
## 8 Pablo Romero 91 50 85 68 72
## 9 Jennifer WeiI 98 63 43 58 40
## 10 Ronald Rosario 98 40 98 75 81
## 11 Martha carpio 69 60 52 60 47
## 12 Jasmine carpio 99 86 49 89 93
## 13 Raul Martin 42 59 54 48 80
## 14 Desio Trump 41 42 69 92 55
## Days_Present avg
## 1 169 73.4
## 2 121 66.8
## 3 146 67.0
## 4 95 75.0
## 5 118 65.8
## 6 159 76.0
## 7 175 74.6
## 8 101 73.2
## 9 107 60.4
## 10 94 78.4
## 11 171 57.6
## 12 140 83.2
## 13 174 56.6
## 14 167 59.8
cor(grade2$Days_Present, grade2$avg)
## [1] -0.3066169