###FOOD PRICE INDEX DATA FOR NEW ZEALAND##
###ORIGINALLY THREE DATA SETS ##

##PACKAGES I LOAD EVERYTIME##
pacman::p_load(pacman,dplyr,GGally,ggplot2,ggthemes,ggvis,httr,lubridate,
               shiny,rmarkdown,stringr,tidyr,psych,plotly,rio)

##CONVERTING EACH DATA INTO DATA CHUNKS FOR EASY USE##

##INDEX NUMBER CHUNK
chunkSize <- 10000
con <- file(description =("C:\\Users\\xholi\\OneDrive\\Desktop\\New folder\\index_number.csv") ,open = "r")
index_data <- read.table(con, nrows = chunkSize, header = T, fill = T, sep = ",")
close(con)

##SEASONALLY ADJUSTED CHUNK
chunkSize<- 10000
con1 <- file(description = ("C:\\Users\\xholi\\OneDrive\\Desktop\\New folder\\seasonally_Adj.csv"),open = "r")
seasonal_data <- read.table(con1, nrows = chunkSize,header = T,fill = T,sep = ",")
close(con1)

##WEIGHTED AVARAGE PRICES CHUNK##
chunkSize <- 10000
con2 <- file(description = ("C:\\Users\\xholi\\OneDrive\\Desktop\\New folder\\weighted_ava_price.csv"),open = "r")
weigthed_data <- read.table(con2, nrows = chunkSize ,header = T,fill = T,sep = ",")
close(con2)

X1 <- rbind(index_data,weigthed_data)     ###ROW BINDING OF THE INDEX CHUNK DATA AND THE WEIGHTED CHUNK DATA##
summary(X1)
##  Series_reference       Period       Data_value         STATUS         
##  Length:20000       Min.   :1960   Min.   :   0.90   Length:20000      
##  Class :character   1st Qu.:2006   1st Qu.:   3.68   Class :character  
##  Mode  :character   Median :2011   Median :  45.22   Mode  :character  
##                     Mean   :2009   Mean   : 402.19                     
##                     3rd Qu.:2016   3rd Qu.: 830.53                     
##                     Max.   :2020   Max.   :1200.00                     
##                                    NA's   :4                           
##     UNITS             Subject             Group           Series_title_1    
##  Length:20000       Length:20000       Length:20000       Length:20000      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
## 
summary(seasonal_data)
##  Series_reference       Period       Data_value        STATUS         
##  Length:3544        Min.   :1999   Min.   : 624.0   Length:3544       
##  Class :character   1st Qu.:2009   1st Qu.: 860.0   Class :character  
##  Mode  :character   Median :2013   Median : 958.0   Mode  :character  
##                     Mean   :2013   Mean   : 929.6                     
##                     3rd Qu.:2017   3rd Qu.:1005.0                     
##                     Max.   :2020   Max.   :1125.0                     
##                                    NA's   :3                          
##     UNITS             Subject             Group           Series_title_1    
##  Length:3544        Length:3544        Length:3544        Length:3544       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
## 
##CONVERTING OUR VARIABLES TO FACTORS##
X1$Series_reference<-as.factor(X1$Series_reference)
X1$Period<-as.factor(X1$Period)
X1$Data_value<-as.factor(X1$Data_value)
X1$STATUS<-as.factor(X1$STATUS)
X1$UNITS<-as.factor(X1$UNITS)
X1$Subject<-as.factor(X1$Subject)
X1$Group<-as.factor(X1$Group)
X1$Series_title_1<-as.factor(X1$Series_title_1)

summary(X1)
##        Series_reference     Period        Data_value      STATUS     
##  CPIM.SE901    :  730   2014.07:   94   1000   :   56   FINAL:20000  
##  CPIM.SE9012   :  658   2014.08:   94   1.08   :   46                
##  CPIM.SE9012014:  502   2014.09:   94   2.57   :   39                
##  CPIM.SE901202 :  502   2014.1 :   94   2.86   :   38                
##  CPIM.SE9012011:  382   2014.11:   94   1.78   :   37                
##  CPIM.SE9012012:  382   2014.12:   94   (Other):19780                
##  (Other)       :16844   (Other):19436   NA's   :    4                
##      UNITS                              Subject     
##  Dollars:10000   Consumers Price Index - CPI:20000  
##  Index  :10000                                      
##                                                     
##                                                     
##                                                     
##                                                     
##                                                     
##                                                                        Group      
##  Food Price Index for New Zealand                                         :  730  
##  Food Price Index Level 2 Subgroups for New Zealand                       : 1386  
##  Food Price Index Level 3 Classes for New Zealand                         : 2815  
##  Food Price Index Level 4 Sections for New Zealand                        : 5069  
##  Food Price Index Selected Monthly Weighted Average Prices for New Zealand:10000  
##                                                                                   
##                                                                                   
##                                   Series_title_1 
##  Food                                    :  730  
##  Meat, poultry and fish                  :  658  
##  Fish and other seafood                  :  502  
##  Poultry (fresh, chilled or frozen)      :  502  
##  Beef and veal (fresh, chilled or frozen):  382  
##  Bread                                   :  382  
##  (Other)                                 :16844
par(mfrow=c(3,1))    ###converting parameters to hold 3 charts in one screen###

plot(X1$Group,col= "blue", xlab= "GROUP",
     ylab="FPI:Monthly weighted avarage prices",
     main= "FOOD PRICE INDEX PER GROUP")
plot(X1$Period, col="red",xlab="Period",
     ylab="Frequency",
     main="Period plot")

plot(X1$Data_value,col="green",xlab="Data values",
     ylab="Frequency",
     main="Data Values")

par(mfrow=c(1,1))  ##reverting the parameters back to normal##
plot(X1$Series_title_1)

describe(seasonal_data)
##                   vars    n    mean     sd  median trimmed   mad     min    max
## Series_reference*    1 3544   10.27   5.88   10.00   10.25  7.41    1.00   20.0
## Period               2 3544 2012.93   4.44 2013.06 2013.01  5.87 1999.06 2020.1
## Data_value           3 3541  929.63 101.30  958.00  937.97 88.96  624.00 1125.0
## STATUS*              4 3544    1.99   0.07    2.00    2.00  0.00    1.00    2.0
## UNITS*               5 3544    1.00   0.00    1.00    1.00  0.00    1.00    1.0
## Subject*             6 3544    1.00   0.00    1.00    1.00  0.00    1.00    1.0
## Group*               7 3544    2.61   0.62    3.00    2.73  0.00    1.00    3.0
## Series_title_1*      8 3544    1.00   0.00    1.00    1.00  0.00    1.00    1.0
##                    range   skew kurtosis   se
## Series_reference*  19.00   0.01    -1.22 0.10
## Period             21.04  -0.20    -0.72 0.07
## Data_value        501.00  -0.71    -0.23 1.70
## STATUS*             1.00 -13.19   172.11 0.00
## UNITS*              0.00    NaN      NaN 0.00
## Subject*            0.00    NaN      NaN 0.00
## Group*              2.00  -1.34     0.67 0.01
## Series_title_1*     0.00    NaN      NaN 0.00
seasonal_data$Group <-as.factor(seasonal_data$Group)
seasonal_data$Series_reference <-as.factor(seasonal_data$Series_reference)
seasonal_data$Period <-as.factor(seasonal_data$Period)
seasonal_data$Data_value <-as.factor(seasonal_data$Data_value)
seasonal_data$STATUS <-as.factor(seasonal_data$STATUS)
seasonal_data$Subject <-as.factor(seasonal_data$Subject)
seasonal_data$Series_title_1 <-as.factor(seasonal_data$Series_title_1)

##SEASONALLY ADJUSTED data##

plot(seasonal_data$Group,col="pink")

plot(seasonal_data$Period)

plot(seasonal_data$Data_value,xlab="Data values:in Dollars",ylab="Frequency")