Tema AACPI - Balasa Ana-Maria

Incarcarea setului de date pentru Partea 1:

setwd(“C:/Users/ELNAAAB/Desktop/All/Mine/AD/Proiect”)

date_1 <- readxl::read_xlsx(“date_part1.xlsx”, col_names = TRUE)

Incarcarea librariilor folosite:

library("knitLatex")

## Warning: package 'knitLatex' was built under R version 3.6.2

library("knitr")

## Warning: package 'knitr' was built under R version 3.6.2

library("rmarkdown")

## Warning: package 'rmarkdown' was built under R version 3.6.2

library("tidyverse")

## Warning: package 'tidyverse' was built under R version 3.6.2

## -- Attaching packages ---------------------------------------------------------------------------------- tidyverse 1.3.0 --

## <U+221A> ggplot2 3.2.1     <U+221A> purrr   0.3.3
## <U+221A> tibble  2.1.3     <U+221A> dplyr   0.8.3
## <U+221A> tidyr   1.0.0     <U+221A> stringr 1.4.0
## <U+221A> readr   1.3.1     <U+221A> forcats 0.4.0

## Warning: package 'ggplot2' was built under R version 3.6.2

## Warning: package 'tidyr' was built under R version 3.6.2

## Warning: package 'dplyr' was built under R version 3.6.2

## Warning: package 'stringr' was built under R version 3.6.2

## -- Conflicts ------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library("RColorBrewer")
library("scales")

## Warning: package 'scales' was built under R version 3.6.2

## 
## Attaching package: 'scales'

## The following object is masked from 'package:purrr':
## 
##     discard

## The following object is masked from 'package:readr':
## 
##     col_factor

library("ggplot2")
library("Rmisc")

## Warning: package 'Rmisc' was built under R version 3.6.2

## Loading required package: lattice

## Warning: package 'lattice' was built under R version 3.6.2

## Loading required package: plyr

## Warning: package 'plyr' was built under R version 3.6.2

## ------------------------------------------------------------------------------

## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)

## ------------------------------------------------------------------------------

## 
## Attaching package: 'plyr'

## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize

## The following object is masked from 'package:purrr':
## 
##     compact

library("lattice")
library("ggpubr")

## Warning: package 'ggpubr' was built under R version 3.6.2

## Loading required package: magrittr

## Warning: package 'magrittr' was built under R version 3.6.2

## 
## Attaching package: 'magrittr'

## The following object is masked from 'package:purrr':
## 
##     set_names

## The following object is masked from 'package:tidyr':
## 
##     extract

## 
## Attaching package: 'ggpubr'

## The following object is masked from 'package:plyr':
## 
##     mutate

library("magrittr")
library("GGally")

## Warning: package 'GGally' was built under R version 3.6.2

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## 
## Attaching package: 'GGally'

## The following object is masked from 'package:dplyr':
## 
##     nasa

library("zoo")

## Warning: package 'zoo' was built under R version 3.6.2

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

library("xts")

## Warning: package 'xts' was built under R version 3.6.2

## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

library("skimr")

## Warning: package 'skimr' was built under R version 3.6.2

library("DataExplorer")

## Warning: package 'DataExplorer' was built under R version 3.6.2

library("PerformanceAnalytics")

## Warning: package 'PerformanceAnalytics' was built under R version 3.6.2

## 
## Attaching package: 'PerformanceAnalytics'

## The following object is masked from 'package:graphics':
## 
##     legend

library("plyr")
library("dplyr")
library("stringr")
library("reshape2")

## Warning: package 'reshape2' was built under R version 3.6.2

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

library("wordcloud")

## Warning: package 'wordcloud' was built under R version 3.6.2

## 
## Attaching package: 'wordcloud'

## The following object is masked from 'package:PerformanceAnalytics':
## 
##     textplot

library("tidytext")

## Warning: package 'tidytext' was built under R version 3.6.2

library("tidyr")
library("LaF")

## Warning: package 'LaF' was built under R version 3.6.2

## 
## Attaching package: 'LaF'

## The following object is masked from 'package:readr':
## 
##     read_lines

library("gridExtra")

## Warning: package 'gridExtra' was built under R version 3.6.2

## 
## Attaching package: 'gridExtra'

## The following object is masked from 'package:dplyr':
## 
##     combine

library("tseries")

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

library("forecast")

## Warning: package 'forecast' was built under R version 3.6.2

## Registered S3 methods overwritten by 'forecast':
##   method             from    
##   fitted.fracdiff    fracdiff
##   residuals.fracdiff fracdiff

## 
## Attaching package: 'forecast'

## The following object is masked from 'package:ggpubr':
## 
##     gghistogram

library("aTSA")

## 
## Attaching package: 'aTSA'

## The following object is masked from 'package:forecast':
## 
##     forecast

## The following objects are masked from 'package:tseries':
## 
##     adf.test, kpss.test, pp.test

## The following object is masked from 'package:graphics':
## 
##     identify

library("tidytext")
library("textdata")
library("tm")

## Loading required package: NLP

## 
## Attaching package: 'NLP'

## The following object is masked from 'package:ggplot2':
## 
##     annotate

library("SnowballC")
library("wordcloud")
library("RColorBrewer")
library("readxl")
library("tinytex")

## Warning: package 'tinytex' was built under R version 3.6.2

Citirea datelor:

### Citirea datelor

date_1 <- readxl::read_xlsx("date_part1.xlsx", col_names = TRUE)
library(gridExtra)

Curatarea si procesarea datelor a constat in tratarea valorilor NA si agregarea valorilor care au fost scrise asemanator si care reprezinta acelasi lucru

### Curatarea datelor

class(date_1)

## [1] "tbl_df"     "tbl"        "data.frame"

date_1 <- as.data.frame(date_1)
class(date_1)

## [1] "data.frame"

library(stringr)
library(tidyverse)
library(xts)

# Eliminarea NA de pe coloana ID prin inlocuire cu ID-uri generate manual

sum(is.na(date_1$ID))

## [1] 4

which(is.na(date_1$ID))

## [1] 46 48 53 60

generated_ids <- c("X1", "X2", "X3", "X4")

date_1$ID[46] <- generated_ids[1]
date_1$ID[48] <- generated_ids[2]
date_1$ID[53] <- generated_ids[3]
date_1$ID[60] <- generated_ids[4]

sum(is.na(date_1$ID))

## [1] 0

# Eliminare NA de pe coloanele cu notele acordate brand-urilor, prin inlocuire cu note de 0

date_1$Gucci[is.na(date_1$Gucci)] <- 0
date_1$Dior[is.na(date_1$Dior)] <- 0
date_1$Armani[is.na(date_1$Armani)] <- 0

# Agregare variabile categoriale pentru System


date_1$System <- str_replace(date_1$System, "Fast", "fast")
date_1$System <- str_replace(date_1$System, "FAST", "fast")
date_1$System <- str_replace(date_1$System, "Slow", "slow")

table(date_1$System)

## 
##    - fast slow 
##    1   32   60

date_1$System[is.na(date_1$System)] <- "slow"

date_1$System <- str_replace(date_1$System, "-", "slow")

table(date_1$System)

## 
## fast slow 
##   32   62

# Agregare variabile categoriale pentru Dependency

date_1$Dependency <- str_replace(date_1$Dependency, "2", "little")
date_1$Dependency <- str_replace(date_1$Dependency, "3", "little")
date_1$Dependency <- str_replace(date_1$Dependency, "5", "very")

table(date_1$Dependency)

## 
##        little not dependent          very 
##            46            44             4

# Agregare variabile categoriale pentru Q_personal

date_1$Q_personal <- str_replace(date_1$Q_personal, "armani", "Armani")
date_1$Q_personal <- str_replace(date_1$Q_personal, "dior", "Dior")
date_1$Q_personal <- str_replace(date_1$Q_personal, "DIOR", "Dior")
date_1$Q_personal <- str_replace(date_1$Q_personal, "GUCCI", "Gucci")
date_1$Q_personal <- str_replace(date_1$Q_personal, "chanel", "Chanel")

table(date_1$Q_personal)

## 
##          Adidas          Armani          Chanel            Dior Dolce & Gabbana 
##               1              14               1              59               1 
##           Gucci           Guess   Louis Vuitton    Michael Kors 
##               8               1               1               1

# Agregare variabile categoriale pentru Q_manager

date_1$Q_manager <- str_replace(date_1$Q_manager, "armani", "Armani")
date_1$Q_manager <- str_replace(date_1$Q_manager, "dior", "Dior")
date_1$Q_manager <- str_replace(date_1$Q_manager, "DIOR", "Dior")
date_1$Q_manager <- str_replace(date_1$Q_manager, "christian louboutin", "Christian Louboutin")


table(date_1$Q_manager)

## 
##               Apple              Armani Christian Louboutin                Dior 
##                   1                  12                   1                  55 
##               Gucci               Guess               Prada 
##                  15                   2                   1

Inceputul analizei propriu zise: Partea 1 - Alegeri la momentul t0: analiza raspunsurilor din chestionar

Statistici descriptive:

# Partea 1 ----------------------------------------------------------------

### Statistici descriptive

summary(date_1)

##       ID                 Age          System          Personality       
##  Length:94          Min.   :18.0   Length:94          Length:94         
##  Class :character   1st Qu.:23.0   Class :character   Class :character  
##  Mode  :character   Median :23.0   Mode  :character   Mode  :character  
##                     Mean   :24.9                                        
##                     3rd Qu.:24.0                                        
##                     Max.   :53.0                                        
##      Risk            Dependency         Commercials    Recommendations 
##  Length:94          Length:94          Min.   :1.000   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:3.000   1st Qu.: 6.000  
##  Mode  :character   Mode  :character   Median :4.000   Median : 7.000  
##                                        Mean   :4.521   Mean   : 6.766  
##                                        3rd Qu.:5.000   3rd Qu.: 8.000  
##                                        Max.   :9.000   Max.   :10.000  
##    Happiness          Gucci         Dior            Armani      
##  Min.   : 1.000   Min.   : 0   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 7.000   1st Qu.: 3   1st Qu.: 6.000   1st Qu.: 5.000  
##  Median : 8.000   Median : 5   Median : 8.000   Median : 6.000  
##  Mean   : 7.234   Mean   : 5   Mean   : 7.032   Mean   : 5.915  
##  3rd Qu.: 8.750   3rd Qu.: 7   3rd Qu.: 9.000   3rd Qu.: 8.000  
##  Max.   :10.000   Max.   :10   Max.   :10.000   Max.   :10.000  
##   Q_personal          Product           Q_manager         Q_CAPM_brand      
##  Length:94          Length:94          Length:94          Length:94         
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Q_CAPM_mark       
##  Length:94         
##  Class :character  
##  Mode  :character  
##                    
##                    
##

table(date_1$System)

## 
## fast slow 
##   32   62

table(date_1$Risk)

## 
##       averse   risk lover risk neutral 
##           31            9           54

table(date_1$Dependency)

## 
##        little not dependent          very 
##            46            44             4

table(date_1$Q_personal)

## 
##          Adidas          Armani          Chanel            Dior Dolce & Gabbana 
##               1              14               1              59               1 
##           Gucci           Guess   Louis Vuitton    Michael Kors 
##               8               1               1               1

table(date_1$Q_manager)

## 
##               Apple              Armani Christian Louboutin                Dior 
##                   1                  12                   1                  55 
##               Gucci               Guess               Prada 
##                  15                   2                   1

table(date_1$Product)

## 
##                      bag                      Bag          bags, cosmetics 
##                        3                        1                        1 
##                  clothes                  Clothes    Clothes or accesories 
##                        6                        1                        1 
##                    dress flipflops, clothes, bags               foundation 
##                        3                        1                        1 
##                fragrance                   geanta                  handbag 
##                        1                        1                        1 
##                  Handbag                 handbags                   jacket 
##                        1                        3                        1 
##                    jeans                    Jeans                  make-up 
##                        1                        1                       11 
##           make-up, watch                   makeup                  parfume 
##                        1                        3                        4 
##                  Parfume                  perfume                  PERFUME 
##                        1                       20                        1 
##         perfume, clothes  perfume, clothes, belts         perfume, make-up 
##                        1                        1                        2 
##           perfumes, bags                    purse                     puse 
##                        1                        2                        1 
##                    shirt                    shoes               sunglasses 
##                        2                        6                        1 
##              winter coat 
##                        1

Calcularea mediilor notelor acordate pentru fiecare dintre cele 3 brand-uri:

### Respondentii care au dat note mai mari decat media grupului pe tipuri de brand-uri

medie_Gucci <- mean(date_1$Gucci)
medie_Dior <- mean(date_1$Dior)
medie_Armani <- mean(date_1$Armani)

Reprezentarea grafica a respondentilor care au dat note mai mari decat media, pentru brand-ul Gucci

#Gucci
date_1 %>%
  select(ID, Gucci, System) %>%
  filter(Gucci >= medie_Gucci) %>%
  ggplot(mapping = aes(x = ID, y = Gucci, color = System)) + 
  geom_point(size = 5)

Reprezentarea grafica a respondentilor care au dat note mai mari decat media, pentru brand-ul Dior

#Dior
date_1 %>%
  select(ID, Dior, System) %>%
  filter(Dior >= medie_Dior) %>%
  ggplot(mapping = aes(x = ID, y = Dior, color = System)) + 
  geom_point(size = 5)

Reprezentarea grafica a respondentilor care au dat note mai mari decat media, pentru brand-ul Armani

#Armani
date_1 %>%
  select(ID, Armani, System) %>%
  filter(Armani >= medie_Armani) %>%
  ggplot(mapping = aes(x = ID, y = Armani, color = System)) + 
  geom_point(size = 5)

Reprezentarea grafica a notelor acordate pentru Gucci, colorate dupa atitudinea la risc si cu forma in functie de sistemul de gandire al respondentilor aferenti

### Situatia notelor in functie de atitudinea fata de risc si de sistemul de gandire

Note_Gucci_Risk<- date_1 %>%
  select(ID, Gucci, Risk, System) 

g11 <-ggplot(Note_Gucci_Risk, mapping = aes(x = ID, y = Gucci, color = Risk, shape = System)) +
  geom_point(size = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
g11

Reprezentarea grafica a notelor acordate pentru Dior, colorate dupa atitudinea la risc si cu forma in functie de sistemul de gandire al respondentilor aferenti

Note_Dior_Risk <- date_1 %>%
  select(ID, Dior, Risk, System)

g12 <- ggplot(Note_Dior_Risk, mapping = aes(x = ID, y = Dior, color = Risk, shape = System)) +
  geom_point(size = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
g12

Reprezentarea grafica a notelor acordate pentru Armani, colorate dupa atitudinea la risc si cu forma in functie de sistemul de gandire al respondentilor aferenti

Note_Armani_Risk <- date_1 %>%
  select(ID, Armani, Risk, System) 

g13 <- ggplot(Note_Armani_Risk, mapping = aes(x = ID, y = Armani, color = Risk, shape = System)) +
  geom_point(size = 4) +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

g13

Combinarea celor 3 grafice de mai sus

plots_brands <- list(g11,g12,g13)
marrangeGrob(plots_brands, nrow = 3, ncol = 1)

Grupare pe tipurile de produse alese pentru achizitie

### Grupare pe tipuri de produse alese 

date_1 %>%
  group_by(Product) %>%
  filter(Product != "") %>%
  dplyr::summarize(n=n()) %>%
  ggplot(mapping = aes(x = reorder(Product,n), y = n)) +
  geom_bar(stat = "identity", fill = "seagreen", width = 0.8) +
  scale_x_discrete(expand = c(0,0)) +
  labs(x = "Produs",
       y = "Respondenti") + 
  coord_flip()

Inca este necesara procesarea datelor de tip text:

# agregarea produselor (scrise asemanator)

date_1$Product <- str_replace(date_1$Product, "parfume", "perfume")
date_1$Product <- str_replace(date_1$Product, "PERFUME", "perfume")
date_1$Product <- str_replace(date_1$Product, "Parfume", "perfume")
date_1$Product <- str_replace(date_1$Product, "fragrance", "perfume")
date_1$Product <- str_replace(date_1$Product, "makeup", "make-up")
date_1$Product <- str_replace(date_1$Product, "Bag", "bag")
date_1$Product <- str_replace(date_1$Product, "bags", "bag")
date_1$Product <- str_replace(date_1$Product, "geanta", "bag")
date_1$Product <- str_replace(date_1$Product, "puse", "bag")
date_1$Product <- str_replace(date_1$Product, "purse", "bag")
date_1$Product <- str_replace(date_1$Product, "Handbag", "bag")
date_1$Product <- str_replace(date_1$Product, "handbag", "bag")
date_1$Product <- str_replace(date_1$Product, "Clothes", "clothes")
date_1$Product <- str_replace(date_1$Product, "Jeans", "jeans")

Partea 2 - Analiza la momentul de timp t1: Analiza activelor Dior, Carrefour, indicelui de piata Cac40 si a activului fara risc

Citirea datelor pentru Partea 2:

# Partea 2 ----------------------------------------------------------------

### Citirea datelor

date_2 <- readxl::read_xlsx("date2.xlsx", col_names = TRUE)

date_2 <- as.data.frame(date_2)

date_2[,-1]

date_2 <- na.omit(date_2)

#View(date_2)

price = xts(date_2[,-1], order.by = as.Date(date_2[,1], "%d/%m/%Y"))

price[1:5,]

##             Dior Carrefour   Cac40   Rf
## 2019-01-02 333.6    14.680 881.144 1604
## 2019-01-03 326.0    14.965 901.348 1607
## 2019-01-04 334.7    15.130 916.573 1632
## 2019-01-07 336.4    15.285 913.558 1651
## 2019-01-08 345.1    15.700 904.313 1662

max(price$Rf)

## [1] 1662

#View(price)

Procesarea datelor (calcularea rentabilitatilor aferente):

### Transformare preturi in rentabilitati

stocks = CalculateReturns(price)
#View(stocks)

# se va elimina prima linie deoarece nu poate fi calculata rentabilitatea din prima zi

stocks <- na.omit(stocks)

colnames(stocks) <- c("RDior", "RCarrefour", "RCac40", "rf")

summary(stocks)

##      Index                RDior             RCarrefour        
##  Min.   :2019-01-03   Min.   :-0.048003   Min.   :-0.0401642  
##  1st Qu.:2019-04-03   1st Qu.:-0.005473   1st Qu.:-0.0073925  
##  Median :2019-07-08   Median : 0.001799   Median :-0.0002896  
##  Mean   :2019-07-06   Mean   : 0.001365   Mean   : 0.0002132  
##  3rd Qu.:2019-10-07   3rd Qu.: 0.008149   3rd Qu.: 0.0068091  
##  Max.   :2020-01-09   Max.   : 0.048159   Max.   : 0.0691766  
##      RCac40                rf            
##  Min.   :-0.071399   Min.   :-0.1781116  
##  1st Qu.:-0.005035   1st Qu.:-0.0242726  
##  Median : 0.003185   Median :-0.0038785  
##  Mean   : 0.001971   Mean   :-0.0009204  
##  3rd Qu.: 0.011191   3rd Qu.: 0.0179682  
##  Max.   : 0.046126   Max.   : 0.3062099

max(stocks$rf)

## [1] 0.3062099

Analizarea evolutiei activelor pe parcursul anului 2019, cu ajutorul RollingPerformance:

#### Pasul 1 : Analizam evolutia activelor


chart.RollingPerformance(stocks[,-4], main = "Performanta pe 12 luni pentru Dior, Carrefour si CAC40",
                         colorset = c("red", "blue", "green"),
                         legend.loc = "topleft")

Analizarea valorilor negative pe parcursul anului 2019, cu ajutorul functiei Drawdown:

#### Pasul 2 : Analiza valorilor negative - ce actiune are pierderile cele mai mari?


chart.Drawdown(stocks[,-4], colorset = c("red", "blue", "green"), 
               main = "Valori negative pentru rentabilitatea Dior, Carrefour si CAC40 in 2019",
               legend.loc = "bottomleft")

Evolutia relativa a activelor fata de indicele de piata, prin intermediul functiei RelativePerformance:

#### Pasul 3 : Evolutia unui activ fata de indicele de piata

chart.RelativePerformance(stocks[,1:2], stocks[,3], main = "Performanta relativa a activelor fata de CAC40",
                          legend.loc = "bottomright",
                          colorset = c("red", "blue"))

Calculul rentabilitatilor medii pentru fiecare dintre active:

#### Pasul 4 : Furnizarea de statistici referitoare la cele 3 rentabilitati

# Calculam rentabilitatile medii (vrem activul cu castig maxim)

rentab_medie <- colMeans(stocks)
rentab_medie

##         RDior    RCarrefour        RCac40            rf 
##  0.0013645120  0.0002131729  0.0019705527 -0.0009203597

max(rentab_medie) # ce actiune are rentab medie maxima

## [1] 0.001970553

# => indicele de piata

# ambele au rentab medii mai mari decat piata

Realizarea matricii de covarianta si a celei de corelatii

### Matricea de covarianta si corelatie

covariance_matrix <- cov(stocks)
covariance_matrix

##                    RDior   RCarrefour        RCac40           rf
## RDior       1.730940e-04 3.891986e-05 -1.388818e-05 8.802778e-05
## RCarrefour  3.891986e-05 1.675725e-04  2.434325e-07 1.898371e-04
## RCac40     -1.388818e-05 2.434325e-07  2.585866e-04 9.247012e-06
## rf          8.802778e-05 1.898371e-04  9.247012e-06 2.859471e-03

correlation_matrix <- cor(stocks)
correlation_matrix

##                  RDior RCarrefour      RCac40         rf
## RDior       1.00000000 0.22852241 -0.06564497 0.12512263
## RCarrefour  0.22852241 1.00000000  0.00116943 0.27424373
## RCac40     -0.06564497 0.00116943  1.00000000 0.01075364
## rf          0.12512263 0.27424373  0.01075364 1.00000000

Alte statistici descriptive pentru rentabilitati:

table.Stats(stocks, ci=0.95, digits = 4)

Rata Sharpe

# Rata Sharpe
# Calculam rata Sharpe si comparam, folosind SharpeRatio

SharpeRatio(stocks[,1:3], rf = 0, p = 0.95, FUN = "StdDev")

##                                   RDior RCarrefour   RCac40
## StdDev Sharpe (Rf=0%, p=95%): 0.1037137 0.01646761 0.122542

t(table.CalendarReturns(stocks, digits = 4)) # t rastoarna tabelul

##               2019   2020
## ian.       -0.2162 0.3886
## feb.        0.3342     NA
## mar.        1.3846     NA
## apr.        0.5925     NA
## mai        -0.6478     NA
## iun.        1.5866     NA
## iul.        0.1705     NA
## aug.       -0.2227     NA
## sept.       0.7425     NA
## oct.       -0.4500     NA
## nov.        0.1252     NA
## dec.       -0.4793     NA
## RDior       2.9300 0.3886
## RCarrefour  2.3102 0.0989
## RCac40      6.3016 0.1581
## rf         -8.2997 0.3429

Reprezentarea grafica a performantelor tuturor actiunilor fata de indicele de piata Cac40

charts.PerformanceSummary(stocks[,1:2], stocks[,3], methods = "StdDev", 
                          main = "Performanta actiunilor fata de CAC40",
                          colorset = c("blue", "green"),
                          legend.loc = "topleft")

Reprezentarea grafica a performantelor tuturor actiunilor fata de activul fara risc

charts.PerformanceSummary(stocks[,1:2], stocks[,4], methods = "StdDev", 
                          main = "Performanta actiunilor fata de Activul fara risc",
                          colorset = c("blue", "green"),
                          legend.loc = "topleft")

Performance Analytics

Realizarea de histograme si boxplot-uri pentru fiecare dintre cele 4 variabile analizate

Pentru Dior:

#pas 1: histograme + boxplot

chart.Histogram(stocks[,1], main="Histograma Dior", methods=c("add.density", "add.normal"), colorset= tim8equal)

chart.Boxplot(stocks[,1], main="Boxplot Dior")

Pentru Carrefour:

chart.Histogram(stocks[,2], main="Histograma Carrefour", methods=c("add.density", "add.normal"), colorset= tim8equal)

chart.Boxplot(stocks[,2], main="Boxplot Carrefour")

Pentru indicele de piata Cac40:

chart.Histogram(stocks[,3], main="Histograma Cac40", methods=c("add.density", "add.normal"), colorset= tim8equal)

chart.Boxplot(stocks[,3], main="Boxplot Cac40")

Pentru Activul fara risc:

chart.Histogram(stocks[,4], main="Histograma Activ fara risc", methods=c("add.density", "add.normal"), colorset= tim8equal)

chart.Boxplot(stocks[,4], main="Boxplot Activ fara risc")

Matricea de corelatii:

# pas 2: matricea de corelatii

chart.Correlation(stocks, histogram = TRUE )

Rata Sharpe

# pas 3: sharpe ratio

SharpeRatio(stocks[,1:2], FUN = "StdDev")

##                                   RDior RCarrefour
## StdDev Sharpe (Rf=0%, p=95%): 0.1037137 0.01646761

Testarea sezonalitatii:

#a) testez daca cele 2 serii de timp sunt stationare si detectam sezonalitatea

#a1)detectam sezonalitatea
#windows()
monthplot(stocks[,1], col="blue", main="Sezonalitate lunara pentru Dior")

monthplot(stocks[,2], col="blue", main="Sezonalitate lunara pentru Carrefour")

#nu prezinta sezonlitate -> de facut la fel si pt celelalte
monthplot(stocks[,3], col="blue", main="Sezonalitate lunara pentru Cac40")

# nu prezinta
monthplot(stocks[,4], col="blue", main="Sezonalitate lunara pentru Activul fara risc")

# nu prezinta sezonalitate

Testarea stationaritatii:

#a2) testez stationaritatea

adf.test(stocks[,1])

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag  ADF p.value
## [1,]   0 15.3    0.99
## [2,]   1 21.8    0.99
## [3,]   2 27.8    0.99
## [4,]   3 32.3    0.99
## [5,]   4 36.7    0.99
## Type 2: with drift no trend 
##      lag  ADF p.value
## [1,]   0 15.5    0.99
## [2,]   1 22.2    0.99
## [3,]   2 28.4    0.99
## [4,]   3 33.2    0.99
## [5,]   4 37.9    0.99
## Type 3: with drift and trend 
##      lag  ADF p.value
## [1,]   0 15.5    0.99
## [2,]   1 22.5    0.99
## [3,]   2 29.0    0.99
## [4,]   3 34.1    0.99
## [5,]   4 39.1    0.99
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

# p.value <0.05, deci Dior este stationara
pp.test(stocks[,1])

## Phillips-Perron Unit Root Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##  lag Z_rho p.value
##    5  -234    0.01
## ----- 
##  Type 2: with drift no trend 
##  lag Z_rho p.value
##    5  -232    0.01
## ----- 
##  Type 3: with drift and trend 
##  lag Z_rho p.value
##    5  -230    0.01
## --------------- 
## Note: p-value = 0.01 means p.value <= 0.01

# p.valuea<0.05, deci Dior este stationara

adf.test(stocks[,2])

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag  ADF p.value
## [1,]   0 16.2    0.99
## [2,]   1 23.5    0.99
## [3,]   2 27.6    0.99
## [4,]   3 30.9    0.99
## [5,]   4 33.9    0.99
## Type 2: with drift no trend 
##      lag  ADF p.value
## [1,]   0 16.2    0.99
## [2,]   1 23.5    0.99
## [3,]   2 27.5    0.99
## [4,]   3 30.8    0.99
## [5,]   4 33.8    0.99
## Type 3: with drift and trend 
##      lag  ADF p.value
## [1,]   0 16.3    0.99
## [2,]   1 23.8    0.99
## [3,]   2 28.0    0.99
## [4,]   3 31.4    0.99
## [5,]   4 34.6    0.99
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

# este stationara
pp.test(stocks[,2])

## Phillips-Perron Unit Root Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##  lag Z_rho p.value
##    5  -271    0.01
## ----- 
##  Type 2: with drift no trend 
##  lag Z_rho p.value
##    5  -271    0.01
## ----- 
##  Type 3: with drift and trend 
##  lag Z_rho p.value
##    5  -271    0.01
## --------------- 
## Note: p-value = 0.01 means p.value <= 0.01

# este stationara

adf.test(stocks[,3])

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag  ADF p.value
## [1,]   0 14.8    0.99
## [2,]   1 21.7    0.99
## [3,]   2 27.1    0.99
## [4,]   3 31.2    0.99
## [5,]   4 35.2    0.99
## Type 2: with drift no trend 
##      lag  ADF p.value
## [1,]   0 14.9    0.99
## [2,]   1 22.2    0.99
## [3,]   2 27.8    0.99
## [4,]   3 32.2    0.99
## [5,]   4 36.6    0.99
## Type 3: with drift and trend 
##      lag  ADF p.value
## [1,]   0 14.9    0.99
## [2,]   1 22.2    0.99
## [3,]   2 27.8    0.99
## [4,]   3 32.3    0.99
## [5,]   4 36.7    0.99
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

# e stationar
pp.test(stocks[,3])

## Phillips-Perron Unit Root Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##  lag Z_rho p.value
##    5  -222    0.01
## ----- 
##  Type 2: with drift no trend 
##  lag Z_rho p.value
##    5  -220    0.01
## ----- 
##  Type 3: with drift and trend 
##  lag Z_rho p.value
##    5  -220    0.01
## --------------- 
## Note: p-value = 0.01 means p.value <= 0.01

# e stationar

adf.test(stocks[,4])

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag  ADF p.value
## [1,]   0 16.9    0.99
## [2,]   1 23.5    0.99
## [3,]   2 28.1    0.99
## [4,]   3 32.1    0.99
## [5,]   4 35.7    0.99
## Type 2: with drift no trend 
##      lag  ADF p.value
## [1,]   0 16.8    0.99
## [2,]   1 23.5    0.99
## [3,]   2 28.0    0.99
## [4,]   3 32.1    0.99
## [5,]   4 35.7    0.99
## Type 3: with drift and trend 
##      lag  ADF p.value
## [1,]   0 16.9    0.99
## [2,]   1 23.6    0.99
## [3,]   2 28.2    0.99
## [4,]   3 32.3    0.99
## [5,]   4 36.1    0.99
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

# e stationar
pp.test(stocks[,4])

## Phillips-Perron Unit Root Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##  lag Z_rho p.value
##    5  -280    0.01
## ----- 
##  Type 2: with drift no trend 
##  lag Z_rho p.value
##    5  -280    0.01
## ----- 
##  Type 3: with drift and trend 
##  lag Z_rho p.value
##    5  -279    0.01
## --------------- 
## Note: p-value = 0.01 means p.value <= 0.01

# e stationar

Regresie liniara, modelul SFM:

#b) realizam o regresie intre rentab dior si cea a indicelui de piata, aplicam SFM -> FORMULA 3, PG. 3, DIN CURSUL 3
# y = dior, x = cac40
# varianta clasica - var1
sfmDior<- lm(stocks[,1]~stocks[,3])
summary(sfmDior)

## 
## Call:
## lm(formula = stocks[, 1] ~ stocks[, 3])
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.049237 -0.006391  0.000675  0.006821  0.046280 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  0.0014703  0.0008219   1.789   0.0748 .
## stocks[, 3] -0.0537081  0.0508265  -1.057   0.2916  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01315 on 258 degrees of freedom
## Multiple R-squared:  0.004309,   Adjusted R-squared:  0.00045 
## F-statistic: 1.117 on 1 and 258 DF,  p-value: 0.2916

# avem o volatilitatea de 1,59 ce va influenta piata

plot.ts(stocks[,3],stocks[,1], col="green", main="SFM Dior")
abline(sfmDior)

# varianta cu PA -var 2
chart.Regression(Ra=stocks[,1], Rb=stocks[,3], Rf=0, fit=c("linear"), col="blue", main="SFM pt Dior")

table.SFM(Ra=stocks[,1], Rb=stocks[,3], Rf=0, digits=4)

Calcul rentabilitati in exces:

# c)aplicam index model pentru Dior luand in calcul activul fara risc
# calculam Ri - rf =
# calculam Rm- rf = market risk premium

# se aplica ecuatia nr 4 de la pagina 3 curs 3
RDrf <- stocks[,1]-stocks[,4] # rentabilitatea in exces a activului Dior
RCrf <- stocks[,2]-stocks[,4] # rentabilitatea in exces a activului Carrefour
Rmrf<- stocks[,3]-stocks[,4] # rentabilitatea in exces a CaC40

Index Model:

# Facem index model
chart.Regression(Ra=stocks[,1, drop=F], Rb=stocks[,3, drop=F], Rf=stocks[,4, drop=F], fit=c("linear"), col="blue", main="SFM pt Dior")

table.SFM(Ra=stocks[,1], Rb=stocks[,3], Rf=stocks[,4], digits=4)

#titlul tinde spre titlu normal, intre 0-1(beta)
# prin ajustarea serilor, coeficientu de volatilitate scade, va urmari evolutia indicelui de piata -> piata stabila, actiune stabila
# kering se apropie de un comportament titlu neutru care se apropie de CaC40

chart.RollingPerformance(stocks, legend.loc ="topleft")

summary(stocks)

##      Index                RDior             RCarrefour        
##  Min.   :2019-01-03   Min.   :-0.048003   Min.   :-0.0401642  
##  1st Qu.:2019-04-03   1st Qu.:-0.005473   1st Qu.:-0.0073925  
##  Median :2019-07-08   Median : 0.001799   Median :-0.0002896  
##  Mean   :2019-07-06   Mean   : 0.001365   Mean   : 0.0002132  
##  3rd Qu.:2019-10-07   3rd Qu.: 0.008149   3rd Qu.: 0.0068091  
##  Max.   :2020-01-09   Max.   : 0.048159   Max.   : 0.0691766  
##      RCac40                rf            
##  Min.   :-0.071399   Min.   :-0.1781116  
##  1st Qu.:-0.005035   1st Qu.:-0.0242726  
##  Median : 0.003185   Median :-0.0038785  
##  Mean   : 0.001971   Mean   :-0.0009204  
##  3rd Qu.: 0.011191   3rd Qu.: 0.0179682  
##  Max.   : 0.046126   Max.   : 0.3062099

table.Stats(stocks, digits=4)

Aplicarea modelului CAPM pentru rentabilitatea Dior:

#d)folosim index model pt a clacula rentabilitatea unui activ folosind modelul CAPM
#CAPM explica ce se intampla cu actiunea ta in functie de indicele de piata 

#?table.CAPM
table.CAPM(Ra=stocks[,1], Rb=stocks[,3], Rf=stocks[,4], scale = 232, digits=4)

#acelasi model ca index model, folosim beta mai departe pt a face o previziune 

#il extragem pe beta=volatilitatea separat din CAPM 
CAPM.beta(Ra=stocks[,1], Rb=stocks[,3], Rf=stocks[,4])

## [1] 0.8866758

#calculam fol formula 11' din cur3 -> rentab viitoare asteptata 
#pt Dior 
DiorCAPM = mean(stocks[,4]) + CAPM.beta(Ra=stocks[,1], Rb=stocks[,3], Rf=stocks[,4])*mean(stocks[,3]-stocks[,4])
DiorCAPM

## [1] 0.001642942

DiorCAPM1 = mean(stocks[1:231,4]) + CAPM.beta(Ra=stocks[1:231,1], Rb=stocks[1:231,3], Rf=stocks[1:231,4])*mean(stocks[1:231,3]-stocks[1:231,4])
DiorCAPM1

## [1] 0.001577414

stocks$RDior[260]

##                 RDior
## 2020-01-09 0.00388601

Aplicarea modelului CAPM pentru rentabilitatea Carrefour:

table.CAPM(Ra=stocks[,2], Rb=stocks[,3], Rf=stocks[,4], scale = 232, digits=4)

CarrefourCAPM = mean(stocks[,4]) + CAPM.beta(Ra=stocks[,2], Rb=stocks[,3], Rf=stocks[,4])*mean(stocks[,3]-stocks[,4])
CarrefourCAPM

## [1] 0.001561167

CarrefourCAPM1 = mean(stocks[1:231,4]) + CAPM.beta(Ra=stocks[1:231,2], Rb=stocks[1:231,3], Rf=stocks[1:231,4])*mean(stocks[1:231,3]-stocks[1:231,4])
CarrefourCAPM1

## [1] 0.001440834

stocks$RCarrefour[260]

##              RCarrefour
## 2020-01-09 0.0009887937

Calcularea valorii prospectului. Calcularea functii valoare.

### 2.3:

a = 0.88
b = 0.88
lambda = 2.25

val_RDIOR = ifelse(stocks[,1] >= 0, (stocks[,1])^a, (-lambda)*((-stocks[,1])^b) )
#View(val_RDIOR)
colnames(val_RDIOR) = c("Val_RDIOR")
mean(val_RDIOR) # valoare functiei valoare

## [1] -0.006040419

mean(stocks[,1]) #rentab medie

## [1] 0.001364512

val_RCarrefour = ifelse(stocks[,2] >= 0, (stocks[,2])^a, (-lambda)*((-stocks[,2])^b) )
#View(val_RCarrefour)
colnames(val_RCarrefour) = c("Val_RKERING")
mean(val_RCarrefour) #valoare functiei valoare

## [1] -0.009696051

mean(stocks[,2]) #rentab medie

## [1] 0.0002131729

Unirea rentabilitatii reale Dior cu rentabilitatea asteptata (valoarea perceptiei rentabilitatii)

Dior = merge.zoo(stocks[,1], val_RDIOR)
#View(Dior)
colnames(Dior) <- c("RDior", "val_RDior")

Carrefour = merge.zoo(stocks[,2], val_RCarrefour)
#View(Carrefour)
colnames(Carrefour) = c("RCarrefour", "val_RCarrefour")

Statistici descriptive pentru cele doua tipuri de rentabilitati (reala si perceputa):

table.Stats(Dior, digits = 4)

table.Stats(Carrefour, digits = 4)

Performanta lunara relativa pentru rentabilitatea Dior si valoarea rentabilitatii:

chart.RollingPerformance(Dior, main = "PERFORMANTA LUNARA RELATIVA PT Dior SI VALOAREA RENTABILITATII", legend.loc = "topright")

Performanta lunara relativa pentru rentabilitatea Carrefour si valoarea rentabilitatii:

chart.RollingPerformance(Carrefour, main = "PERFORMANTA LUNARA RELATIVA PT Carrefour SI VALOAREA RENTABILITATII", legend.loc = "topright")

Reprezentarea grafica a rentabilitatii Dior si a functiei valoare aferente:

plot(x = Dior[,1], y= Dior[,2], xlab = "Rentabilitatea Dior", ylab = "Valoare Dior", main = "Functia valoare pentru Dior", col = "pink")

Reprezentarea grafica a rentabilitatii Carrefour si a functiei valoare aferente:

plot(x = Carrefour[,1], y= Carrefour[,2], xlab = "Rentabilitatea Carrefour", ylab = "Valoare Carrefour", main = "Functia valoare pentru Carrefour", col = "purple")

Transformarea obiectului de tip zoo in obiect de tip data.frame, dupa functia valoare:

# transf obj zoo in df si ordonati dupa functia valoare

obiect1 = as.data.frame(Dior)
obiect1 = transform(obiect1, Dates = as.Date(rownames(obiect1)))

Se ordoneaza dupa prima coloana:

ord1 = obiect1[order(obiect1[,1]), ]
ord1 = ord1[,-3]
#View(ord1)

Reprezentarea grafica a functiei valoare pentru rentabilitatea Dior

#windows()
plot(ord1, type = "l", main = "Functie valoare pentru Dior", col = "pink")

library(ggplot2)

Alt tip de reprezentare grafica:

#windows()
ggplot() +geom_line(data = ord1, aes(x = ord1[,1], y = ord1[,2]), color = "pink") + ggtitle ("Repr functie valore actiuni Dior")

#windows()
autoplot.zoo(Dior, colour = 2)

Analiza investitiei initiale pe piata de capital si valoarea in timp a banilor

Se calculeaza rentabilitatea prin mai multe metode:

# - ex 5000 euro - 

# calculam prin mai multe metode:
#a) suma #media rentabilitatii

val1_D = mean(Dior[,1]) * 5000
val1_D # e ca si cand castigi 6.82 eur la 5000 eur - nu e luata in considerare val in timp a banilor

## [1] 6.82256

#b) calc time value of money cu media geometrica, se foloseste return cumulative
Return.cumulative(Dior[,1], geometric = T) # o fol sa aflam val investitiei la final

##                           x
## Cumulative Return 0.3938849

#calc acelasi lucru
val2_D = 5000 * (Return.cumulative(Dior[,1], geometric = T))
val2_D #asta e castigul in prezent cu TVM

##                          x
## Cumulative Return 1969.424

tvm_dior = 5000+val2_D
tvm_dior #daca inmultim asta cu inflatia aflam valoarea reala a banilor investiti initial

##                          x
## Cumulative Return 6969.424

### facem ridicare la putere cu 0.88 - perceptia acestei sume:
v_tvm_dior = tvm_dior^0.88
v_tvm_dior

##                          x
## Cumulative Return 2409.979

Similar, pentru Carrefour:

val1_C = mean(Carrefour[,1]) * 5000
val1_C # e ca si cand castigi 2.82 eur la 5000 eur - nu e luata in considerare val in timp a banilor

## [1] 1.065865

#b) calc time value of money cu media geometrica, se foloseste return cumulative
Return.cumulative(Carrefour[,1], geometric = T) # o fol sa aflam val investitiei la final

##                            x
## Cumulative Return 0.03440054

#calc acelasi lucru
val2_C = 5000 * (Return.cumulative(Carrefour[,1], geometric = T))
val2_C #asta e castigul in prezent cu TVM

##                          x
## Cumulative Return 172.0027

tvm_carrefour = 5000+val2_C
tvm_carrefour #daca inmultim asta cu inflatia aflam valoarea reala a banilor investiti initial

##                          x
## Cumulative Return 5172.003

### facem ridicare la putere cu 0.88 - perceptia acestei sume 
v_tvm_carrefour = tvm_carrefour^0.88
v_tvm_carrefour

##                          x
## Cumulative Return 1853.616

Partea 3 - Analiza de sentimente: analizarea personalitatilor descrise de respondenti prin intermediul Sentiment Analysis si a descrierilor detaliate oferite de respondentii urmariti, pentru cele doua brand-uri: Dior si Carrefour

Incarcarea datelor si a pachetelor necesare:

# Sentiment Analysis ------------------------------------------------------

#install.packages(c("tm", "SnowballC", "wordcloud", "RColorBrewer", "RCurl", "XML"))
library("tm")                 
library("SnowballC")                 
library("wordcloud")                 
library("RColorBrewer")                 
library("RCurl")

## Loading required package: bitops

## 
## Attaching package: 'RCurl'

## The following object is masked from 'package:tidyr':
## 
##     complete

library("XML")                 

get_sentiments("afinn")

get_sentiments("bing")

get_sentiments("nrc")

# data

Personality <- tibble(ID = date_1[["ID"]], text = as.character(date_1[["Personality"]]))
#View(Personality)

Impartirea setului de date pe cuvinte, cu ajutorul functiei unnest_tokens:

# se imparte setul de date pe cuvinte

Personality <- Personality %>%
  unnest_tokens(word, text)

Reprezentarea grafica a celor mai folosite cuvinte in descrierea personalitatilor respondentilor:

g_pers <-Personality  %>%
  dplyr::count(word, sort = T) %>%
  filter(n>2) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_bar(stat = "identity", fill = "aquamarine3") +
  labs(x = "Nr de repetitii",
       y = "Cuvant",
       title = "Cel mai des folosite cuvinte pentru Personalitate") +
  coord_flip()

g_pers

Generarea unui grafic de tip wordcloud:

Personality %>% 
  dplyr::count(word) %>% 
  with(wordcloud(word,n,max.words = 100, color = brewer.pal(8,"Dark2")))

Generarea unui grafic de tip radar chart:

library(radarchart)

## Warning: package 'radarchart' was built under R version 3.6.2

nrc <- get_sentiments("nrc")

# calculam intensitatea sentimentelor generate pentru fiecare brand

scores_nrc <- Personality %>%
  inner_join(nrc, by = c("word" = "word")) %>%
  filter(!grepl("positive|negative", sentiment)) %>%
  dplyr::count(word, sentiment) %>%
  spread(word, n)
#scores_nrc

# grafic radar:

#windows()
chartJSRadar(scores_nrc)

Analiza descrierilor detaliate oferite de BAM, RR si IRI (respondentii urmariti)

Citirea datelor si prelucrarea acestora:

### b - analiza review-uri

reviews <- readxl::read_xlsx("reviews.xlsx", col_names = TRUE)

text_Dior <- tibble(ID = reviews[["ID"]], text = as.character(reviews[["Review_Dior"]]))

text_Carrefour <- tibble(ID = reviews[["ID"]], text = as.character(reviews[["Review_Carrefour"]]))

text_Dior["brand"] <- c("Dior")

text_Carrefour["brand"] <- c("Carrefour")

text_Dior <- text_Dior %>%
  unnest_tokens(word, text)

text_Carrefour <- text_Carrefour %>%
  unnest_tokens(word, text)

tokens_brands <- bind_rows(text_Dior,text_Carrefour)

Eliminarea cuvintelor, prin intermediul lexicoanelor si a prelucrarii manuale:

# se elimina cuvintele care nu reprezinta sentimente

clean_tokens <- tokens_brands %>%
  anti_join(stop_words)

## Joining, by = "word"

tokens_brands_clean <- tokens_brands %>%
  filter(!word %in% c("and","on","their","etc","are","of","the","but","about","all",
                      "more","then","than","a","some","very","so","my","it","is",
                      "comes","to","for","tend","be","i","although","like","has",
                      "in","over","maybe","we","can't","can","say","i'm","thinking",
                      "most","seems","quite","fact","reason","with","not","don't",
                      "at","by","too","when","me","they","have","or","that",
                      "everytime","them","without","this","ranked","if","even",
                      "6","up","as","well","buy","8","think","both","also","make","birthday",
                      "21st","the","are","i","and","men","things","kills","6", "gg", "brand", 
                      "10", "8", "these", "look", "products", "associate", "i'm", "brand", "ones", "brands",
                      "because", "don't", "many", "from", "since", "little", "overall", "know", "much",
                      "general", "what", "offer", "any", "dior", "product", "talk", "experience", "one", "sounds",
                      "name", "something", "only", "from", "were", "lot", "it's", "give", "since", "from", "go",
                      "there", "other", "would", "seen", "had", "only", "still", "remains", "because", "go", "lot",
                      "you", "else", "Carrefour", "carrefour", "shopping"))

Cele mai folosite cuvinte in descrierea brand-ului Dior:

#Dior
tokens_brands_clean  %>%
  filter(brand == "Dior") %>%
  dplyr::count(word, sort = TRUE)

Cele mai folosite cuvinte in descrierea brand-ului Carrefour:

#Carrefour
tokens_brands_clean  %>%
  filter(brand == "Carrefour") %>%
  dplyr::count(word, sort = TRUE)

Reprezentarea grafica a celor mai folosite cuvinte in descrierea Dior - grafic de tip bar chart:

# grafice cele mai folosite cuvinte

#Dior
g_Dior <- tokens_brands_clean  %>%
  filter(brand == "Dior") %>%
  dplyr::count(word, sort = TRUE) %>%
  filter(n>=2) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_bar(stat = "identity", fill = "aquamarine2") +
  labs(x = "Nr de repetitii",
       y = "Cuvant",
       title = "Cel mai des folosite cuvinte pentru Dior") +
  coord_flip()

g_Dior

Reprezentarea grafica a celor mai folosite cuvinte in descrierea Carrefour - grafic de tip bar chart:

#Carrefour
g_Carrefour <- tokens_brands_clean  %>%
  filter(brand == "Carrefour") %>%
  dplyr::count(word, sort = TRUE) %>%
  filter(n>=2) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n)) +
  geom_bar(stat = "identity", fill = "aquamarine2") +
  labs(x = "Nr de repetitii",
       y = "Cuvant",
       title = "Cel mai des folosite cuvinte pentru Carrefour") +
  coord_flip()

g_Carrefour

WordCloud pentru brand-ul Dior:

## word clouds

tokens_brands_clean %>% 
  filter(brand == "Dior") %>%
  dplyr::count(word) %>% 
  with(wordcloud(word,n,max.words = 150, color = brewer.pal(8,"Dark2")))

WordCloud pentru brand-ul Carrefour:

tokens_brands_clean %>% 
  filter(brand == "Carrefour") %>%
  dplyr::count(word) %>% 
  with(wordcloud(word,n,max.words = 150, color = brewer.pal(8,"Dark2")))

Grafic de tip radar chart pentru cele doua brand-uri:

## radar chart

scores_nrc2 <- tokens_brands_clean %>%
  inner_join(nrc, by = c("word" = "word")) %>%
  filter(!grepl("positive|negative", sentiment)) %>%
  dplyr::count(brand, sentiment) %>%
  spread(brand, n)
scores_nrc

chartJSRadar(scores_nrc2)