library(ggplot2)
library(dplyr)
library(pastecs)
library(fpc)
library(FactoMineR)
library(readxl)
library(xlsx)df1 <- read_excel("df.xlsx")# Remove someone doesnt know the product
df<-df1[!(df1$`1.Do you know the product?` =="No"),]Let use strsplit:
# string split
s <- strsplit(df$`3. Channel`,split = ",")
s <- lapply(s, function(x){x[!x ==""]})
s2 <- data.frame(V1 = rep(df$ID, sapply(s, length)), V2 = unlist(s))After split string, there are white space before and after the text, need to remove it. First, we create the function trim(), then apply it to the data:
#TRIM Leading and trailing white space
# Returns string without leading white space
trim.leading <- function (x) sub("^\\s+", "", x)
# Returns string without trailing white space
trim.trailing <- function (x) sub("\\s+$", "", x)
# Returns string without leading or trailing white space
trim <- function (x) gsub("^\\s+|\\s+$", "", x)
# Application
s2$V2 <- trim(s2$V2)write.xlsx(s2, file = "myworkbook.xlsx",
sheetName = "Channel", append = FALSE)Let use str_replace to conditionally replace values in a column:
library(stringr)
s2$V2 <- str_replace(s2$V2, "Ny giới thiệu", "WoM")
s2$V2 <- str_replace(s2$V2, "Show âm nhạc", "Music Show")
s2$V2 <- str_replace(s2$V2, "Quảng cáo tại rạp phim", "Advertisting at movie theater")
s2$V2 <- str_replace(s2$V2, "Truyền miệng", "WoM")
s2$V2 <- str_replace(s2$V2, "Báo chí", "PR Com")
s2$V2 <- str_replace(s2$V2, "Quang cáo", "Others")# basic plot
ggplot(s2, aes(x = V2)) +
geom_bar()# try another one
library(tidyverse)
library(scales)
p <- s2 %>%
count(V2) %>%
mutate(prop = n/sum(n)) %>%
ggplot(aes(x = reorder(V2,-prop), y = prop)) + # reorder
geom_col(fill = "#FF7F24") +
geom_text(aes(label = percent(prop)), vjust = -1) +
coord_cartesian(clip = "off") +
scale_y_continuous(labels = percent_format()) +
theme_minimal() +
theme(axis.text.x = element_text(angle=45, hjust=1, vjust = 1),
axis.title = element_blank(),
plot.margin = margin(t = 20, r = 10, b = 10, l = 10))
p