library(tidyr)
library(data.table)
comp<-c(1,1,1,2,2,2,3,3,3)
yr<-c(rep(1998:2000,3))
q1<-runif(9,min=0,max=100)
q2<-runif(9,min=0,max=100)
q3<-runif(9,min=0,max=100)
q4<-runif(9,min=0,max=100)
df<-data.frame(comp,yr,q1,q2,q3,q4)
df
## comp yr q1 q2 q3 q4
## 1 1 1998 29.03591 34.43415 41.046921 91.416682
## 2 1 1999 74.41004 36.73510 2.141253 1.384745
## 3 1 2000 15.06838 10.77902 72.343713 88.234364
## 4 2 1998 93.15164 61.19964 33.971277 87.646790
## 5 2 1999 84.69916 33.56199 40.201096 6.187373
## 6 2 2000 98.66549 71.61750 64.150951 15.551795
## 7 3 1998 98.83526 81.80541 49.097261 10.999286
## 8 3 1999 62.10341 84.78101 20.750242 32.584577
## 9 3 2000 57.92006 58.57658 52.617543 74.281422
##we can reshape the data using gather:
#Here Quarter and Revenue are the columns are act
#as key-value columns.Q1 to Q4 columns are going
#to change as a key value pairs and filled in
#key-value columns.
df_gather<-gather(df,Quarter,Revenue,q1:q4)
df_gather
## comp yr Quarter Revenue
## 1 1 1998 q1 29.035913
## 2 1 1999 q1 74.410044
## 3 1 2000 q1 15.068379
## 4 2 1998 q1 93.151642
## 5 2 1999 q1 84.699156
## 6 2 2000 q1 98.665487
## 7 3 1998 q1 98.835255
## 8 3 1999 q1 62.103414
## 9 3 2000 q1 57.920056
## 10 1 1998 q2 34.434152
## 11 1 1999 q2 36.735100
## 12 1 2000 q2 10.779020
## 13 2 1998 q2 61.199639
## 14 2 1999 q2 33.561991
## 15 2 2000 q2 71.617498
## 16 3 1998 q2 81.805406
## 17 3 1999 q2 84.781009
## 18 3 2000 q2 58.576576
## 19 1 1998 q3 41.046921
## 20 1 1999 q3 2.141253
## 21 1 2000 q3 72.343713
## 22 2 1998 q3 33.971277
## 23 2 1999 q3 40.201096
## 24 2 2000 q3 64.150951
## 25 3 1998 q3 49.097261
## 26 3 1999 q3 20.750242
## 27 3 2000 q3 52.617543
## 28 1 1998 q4 91.416682
## 29 1 1999 q4 1.384745
## 30 1 2000 q4 88.234364
## 31 2 1998 q4 87.646790
## 32 2 1999 q4 6.187373
## 33 2 2000 q4 15.551795
## 34 3 1998 q4 10.999286
## 35 3 1999 q4 32.584577
## 36 3 2000 q4 74.281422
#we can change the key-value columns into multiple
#key columns.
df_spread<-spread(df_gather,Quarter,Revenue)
df_spread
## comp yr q1 q2 q3 q4
## 1 1 1998 29.03591 34.43415 41.046921 91.416682
## 2 1 1999 74.41004 36.73510 2.141253 1.384745
## 3 1 2000 15.06838 10.77902 72.343713 88.234364
## 4 2 1998 93.15164 61.19964 33.971277 87.646790
## 5 2 1999 84.69916 33.56199 40.201096 6.187373
## 6 2 2000 98.66549 71.61750 64.150951 15.551795
## 7 3 1998 98.83526 81.80541 49.097261 10.999286
## 8 3 1999 62.10341 84.78101 20.750242 32.584577
## 9 3 2000 57.92006 58.57658 52.617543 74.281422
#separate:It is going to separate the column into
#multiple columns based on separator
df<-data.frame(new.col=c(NA,"a-x","b-y","c-z"))
df.sep<-separate(data=df,col=new.col,into=c("abc","xyz"),sep='-')
df.sep
## abc xyz
## 1 <NA> <NA>
## 2 a x
## 3 b y
## 4 c z
##Unite: It is going to combine two columns into
#single column
unite(df.sep,new_joined_col,abc,xyz,sep="---")
## new_joined_col
## 1 NA---NA
## 2 a---x
## 3 b---y
## 4 c---z