1. Load and Clean Data

# قراءة الملف وتنظيف أسماء الأعمدة
data <- read_excel("D:/R analysis/customer_opinions (1).xlsx") %>%
  clean_names()

# عرض أول الصفوف
head(data)
## # A tibble: 6 × 9
##      id company review_source username           review_date         review_text
##   <dbl> <chr>   <chr>         <chr>              <dttm>              <chr>      
## 1     1 Amazon  trustpilot    Deborah Stevenson… 2025-07-02 00:00:00 "Absolutel…
## 2     2 Amazon  trustpilot    Rui Oliveira       2025-07-02 00:00:00 "Don’t w…
## 3     3 Amazon  trustpilot    chris schoepf      2025-06-27 00:00:00 "Amazon co…
## 4     4 Amazon  trustpilot    Mokshya Priyadars… 2025-07-04 00:00:00 "Amazon Pr…
## 5     5 Amazon  trustpilot    SG                 2025-07-04 00:00:00 "Customer …
## 6     6 Amazon  trustpilot    customer           2025-07-03 00:00:00 "Don’t r…
## # ℹ 3 more variables: star_rating <dbl>, sentiment <chr>, category <chr>

2. Data Overview

# ملخص إحصائي شامل
skim(data)
Data summary
Name data
Number of rows 22
Number of columns 9
_______________________
Column type frequency:
character 6
numeric 2
POSIXct 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
company 0 1.00 6 6 0 1 0
review_source 0 1.00 10 10 0 1 0
username 0 1.00 2 27 0 22 0
review_text 0 1.00 54 1339 0 22 0
sentiment 0 1.00 8 13 0 3 0
category 1 0.95 13 28 0 12 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
id 0 1 11.50 6.49 1 6.25 11.5 16.75 22 ▇▆▆▆▇
star_rating 0 1 1.36 0.90 1 1.00 1.0 1.00 5 ▇▂▁▁▁

Variable type: POSIXct

skim_variable n_missing complete_rate min max median n_unique
review_date 0 1 2025-06-27 2025-07-06 2025-07-03 12:00:00 6

3. Star Rating Analysis

# توزيع التقييمات
ggplot(data, aes(x = star_rating)) +
  geom_bar(fill = "steelblue") +
  labs(title = "Star Rating Distribution", x = "Stars", y = "Number of Reviews")

4. Sentiment Analysis

# عدد المراجعات حسب المشاعر
data %>% count(sentiment, sort = TRUE)
## # A tibble: 3 × 2
##   sentiment         n
##   <chr>         <int>
## 1 Very Negative    17
## 2 Negative          4
## 3 Very Positive     1
# رسم بياني للمشاعر
ggplot(data, aes(x = sentiment)) +
  geom_bar(fill = "darkgreen") +
  labs(title = "Sentiment Distribution", x = "Sentiment", y = "Number of Reviews")

5. Category Analysis

# توزيع الفئات
data %>%
  count(category, sort = TRUE) %>%
  ggplot(aes(x = reorder(category, n), y = n)) +
  geom_bar(stat = "identity", fill = "purple") +
  coord_flip() +
  labs(title = "Review Count by Category", x = "Category", y = "Number of Reviews")

6. Text Analysis

Most Frequent Words

# استخراج الكلمات الشائعة من التعليقات
words <- data %>%
  select(review_text) %>%
  unnest_tokens(word, review_text) %>%
  anti_join(stop_words) %>%
  count(word, sort = TRUE)

# عرض أول 10 كلمات
head(words, 10)
## # A tibble: 10 × 2
##    word           n
##    <chr>      <int>
##  1 amazon        24
##  2 customer      16
##  3 â             16
##  4 service       14
##  5 delivery      11
##  6 prime          9
##  7 time           9
##  8 donâ           7
##  9 experience     7
## 10 delivered      6

Word Cloud

# رسم سحابة الكلمات
wordcloud(words = words$word, freq = words$n, min.freq = 5,
          max.words = 100, random.order = FALSE, 
          colors = brewer.pal(8, "Dark2"))

7. Conclusions & Recommendations