library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
orders <- read.csv("C:/Users/HARDEEP DESHWAL/Downloads/archive (3)/List of Orders.csv")


details <- read.csv("C:/Users/HARDEEP DESHWAL/Downloads/archive (3)/Order Details.csv")

sales_target <- read.csv("C:/Users/HARDEEP DESHWAL/Downloads/archive (3)/Sales target.csv")
View(orders)
View(details)
str(orders)
## 'data.frame':    560 obs. of  5 variables:
##  $ Order.ID    : chr  "B-25601" "B-25602" "B-25603" "B-25604" ...
##  $ Order.Date  : chr  "01-04-2018" "01-04-2018" "03-04-2018" "03-04-2018" ...
##  $ CustomerName: chr  "Bharat" "Pearl" "Jahan" "Divsha" ...
##  $ State       : chr  "Gujarat" "Maharashtra" "Madhya Pradesh" "Rajasthan" ...
##  $ City        : chr  "Ahmedabad" "Pune" "Bhopal" "Jaipur" ...
str(details)
## 'data.frame':    1500 obs. of  6 variables:
##  $ Order.ID    : chr  "B-25601" "B-25601" "B-25601" "B-25601" ...
##  $ Amount      : num  1275 66 8 80 168 ...
##  $ Profit      : num  -1148 -12 -2 -56 -111 ...
##  $ Quantity    : int  7 5 3 4 2 5 4 3 8 5 ...
##  $ Category    : chr  "Furniture" "Clothing" "Clothing" "Electronics" ...
##  $ Sub.Category: chr  "Bookcases" "Stole" "Hankerchief" "Electronic Games" ...
names(orders)
## [1] "Order.ID"     "Order.Date"   "CustomerName" "State"        "City"
names(details)
## [1] "Order.ID"     "Amount"       "Profit"       "Quantity"     "Category"    
## [6] "Sub.Category"
head(orders)
##   Order.ID Order.Date CustomerName          State      City
## 1  B-25601 01-04-2018       Bharat        Gujarat Ahmedabad
## 2  B-25602 01-04-2018        Pearl    Maharashtra      Pune
## 3  B-25603 03-04-2018        Jahan Madhya Pradesh    Bhopal
## 4  B-25604 03-04-2018       Divsha      Rajasthan    Jaipur
## 5  B-25605 05-04-2018      Kasheen    West Bengal   Kolkata
## 6  B-25606 06-04-2018        Hazel      Karnataka Bangalore
head(details)
##   Order.ID Amount Profit Quantity    Category     Sub.Category
## 1  B-25601   1275  -1148        7   Furniture        Bookcases
## 2  B-25601     66    -12        5    Clothing            Stole
## 3  B-25601      8     -2        3    Clothing      Hankerchief
## 4  B-25601     80    -56        4 Electronics Electronic Games
## 5  B-25602    168   -111        2 Electronics           Phones
## 6  B-25602    424   -272        5 Electronics           Phones
tail(orders)
##     Order.ID Order.Date CustomerName State City
## 555                                            
## 556                                            
## 557                                            
## 558                                            
## 559                                            
## 560
tail(details)
##      Order.ID Amount Profit Quantity    Category Sub.Category
## 1495  B-26099    207     37        4    Clothing  Hankerchief
## 1496  B-26099    835    267        5 Electronics       Phones
## 1497  B-26099   2366    552        5    Clothing     Trousers
## 1498  B-26100    828    230        2   Furniture       Chairs
## 1499  B-26100     34     10        2    Clothing      T-shirt
## 1500  B-26100     72     16        2    Clothing        Shirt
summary(orders)
##    Order.ID          Order.Date        CustomerName          State          
##  Length:560         Length:560         Length:560         Length:560        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##      City          
##  Length:560        
##  Class :character  
##  Mode  :character
summary(details)
##    Order.ID             Amount           Profit            Quantity     
##  Length:1500        Min.   :   4.0   Min.   :-1981.00   Min.   : 1.000  
##  Class :character   1st Qu.:  45.0   1st Qu.:   -9.25   1st Qu.: 2.000  
##  Mode  :character   Median : 118.0   Median :    9.00   Median : 3.000  
##                     Mean   : 287.7   Mean   :   15.97   Mean   : 3.743  
##                     3rd Qu.: 322.0   3rd Qu.:   38.00   3rd Qu.: 5.000  
##                     Max.   :5729.0   Max.   : 1698.00   Max.   :14.000  
##    Category         Sub.Category      
##  Length:1500        Length:1500       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
dim(orders)
## [1] 560   5
dim(details)
## [1] 1500    6
colSums(is.na(orders))
##     Order.ID   Order.Date CustomerName        State         City 
##            0            0            0            0            0
colSums(is.na(details))
##     Order.ID       Amount       Profit     Quantity     Category Sub.Category 
##            0            0            0            0            0            0
ecommerce <- merge(orders, details, by = "Order.ID")
View(ecommerce)

# Structure of merged data
str(ecommerce)
## 'data.frame':    1500 obs. of  10 variables:
##  $ Order.ID    : chr  "B-25601" "B-25601" "B-25601" "B-25601" ...
##  $ Order.Date  : chr  "01-04-2018" "01-04-2018" "01-04-2018" "01-04-2018" ...
##  $ CustomerName: chr  "Bharat" "Bharat" "Bharat" "Bharat" ...
##  $ State       : chr  "Gujarat" "Gujarat" "Gujarat" "Gujarat" ...
##  $ City        : chr  "Ahmedabad" "Ahmedabad" "Ahmedabad" "Ahmedabad" ...
##  $ Amount      : num  1275 66 8 80 168 ...
##  $ Profit      : num  -1148 -12 -2 -56 -111 ...
##  $ Quantity    : int  7 5 3 4 2 5 4 3 8 5 ...
##  $ Category    : chr  "Furniture" "Clothing" "Clothing" "Electronics" ...
##  $ Sub.Category: chr  "Bookcases" "Stole" "Hankerchief" "Electronic Games" ...
ecommerce$Amount <- as.numeric(ecommerce$Amount)
ecommerce$Quantity <- as.numeric(ecommerce$Quantity)
ecommerce$Profit <- as.numeric(ecommerce$Profit)
high_sales <- ecommerce %>%
  filter(Amount > 1000) %>%
  select(Order.ID, Amount)

high_sales
##    Order.ID Amount
## 1   B-25601   1275
## 2   B-25602   2617
## 3   B-25603   1355
## 4   B-25608   1364
## 5   B-25610   1076
## 6   B-25613   1603
## 7   B-25626   1103
## 8   B-25629   1560
## 9   B-25639   1629
## 10  B-25643   1061
## 11  B-25653   1279
## 12  B-25653   1327
## 13  B-25656   1389
## 14  B-25657   1021
## 15  B-25667   1030
## 16  B-25676   1263
## 17  B-25681   1625
## 18  B-25681   1096
## 19  B-25686   1829
## 20  B-25697   1300
## 21  B-25728   1055
## 22  B-25729   1549
## 23  B-25730   1145
## 24  B-25738   1069
## 25  B-25749   1052
## 26  B-25752   1361
## 27  B-25755   1709
## 28  B-25757   3151
## 29  B-25761   2188
## 30  B-25762   1316
## 31  B-25768   1582
## 32  B-25772   1183
## 33  B-25777   1076
## 34  B-25778   1506
## 35  B-25779   1361
## 36  B-25786   1854
## 37  B-25793   1402
## 38  B-25797   1630
## 39  B-25798   2830
## 40  B-25810   1120
## 41  B-25823   2103
## 42  B-25830   1063
## 43  B-25830   1954
## 44  B-25836   1298
## 45  B-25839   1250
## 46  B-25842   1543
## 47  B-25853   2093
## 48  B-25855   1027
## 49  B-25855   1319
## 50  B-25858   2457
## 51  B-25862   2061
## 52  B-25868   1118
## 53  B-25873   1275
## 54  B-25877   1137
## 55  B-25881   1351
## 56  B-25881   2244
## 57  B-25881   2115
## 58  B-25887   2125
## 59  B-25894   1246
## 60  B-25902   1700
## 61  B-25909   1622
## 62  B-25910   1622
## 63  B-25919   1599
## 64  B-25923   3873
## 65  B-25925   1228
## 66  B-25929   1308
## 67  B-25935   1657
## 68  B-25937   1101
## 69  B-25943   1547
## 70  B-25950   1622
## 71  B-25953   1218
## 72  B-25955   1716
## 73  B-25955   2927
## 74  B-25957   1157
## 75  B-25964   1270
## 76  B-25969   2452
## 77  B-25973   4141
## 78  B-25978   1063
## 79  B-25993   4363
## 80  B-25995   1314
## 81  B-25997   2292
## 82  B-26003   1745
## 83  B-26006   1301
## 84  B-26022   1824
## 85  B-26023   1117
## 86  B-26028   1272
## 87  B-26048   1461
## 88  B-26048   1104
## 89  B-26051   1337
## 90  B-26055   5729
## 91  B-26055   1218
## 92  B-26067   1120
## 93  B-26067   1137
## 94  B-26073   1514
## 95  B-26085   1487
## 96  B-26093   2847
## 97  B-26099   2366
head(high_sales)
##   Order.ID Amount
## 1  B-25601   1275
## 2  B-25602   2617
## 3  B-25603   1355
## 4  B-25608   1364
## 5  B-25610   1076
## 6  B-25613   1603
top_orders <- ecommerce %>%
  arrange(desc(Amount)) %>%
  select(Order.ID, Amount) %>%
  head(10)

top_orders
##    Order.ID Amount
## 1   B-26055   5729
## 2   B-25993   4363
## 3   B-25973   4141
## 4   B-25923   3873
## 5   B-25757   3151
## 6   B-25955   2927
## 7   B-26093   2847
## 8   B-25798   2830
## 9   B-25602   2617
## 10  B-25858   2457
rank_orders <- ecommerce %>%
  arrange(desc(Amount)) %>%
  mutate(rank = row_number())

head(rank_orders)
##   Order.ID Order.Date CustomerName          State      City Amount Profit
## 1  B-26055 10-03-2019       Yaanvi Madhya Pradesh    Indore   5729     64
## 2  B-25993 03-02-2019      Harshal          Delhi     Delhi   4363    305
## 3  B-25973 24-01-2019        Seema  Uttar Pradesh Allahabad   4141   1698
## 4  B-25923 27-12-2018     Vishakha    Maharashtra    Mumbai   3873    891
## 5  B-25757 21-08-2018        Mohit Madhya Pradesh    Indore   3151    -35
## 6  B-25955 16-01-2019       Soumya    Maharashtra      Pune   2927    146
##   Quantity    Category Sub.Category rank
## 1       14   Furniture       Chairs    1
## 2        5   Furniture       Tables    2
## 3       13 Electronics     Printers    3
## 4        6 Electronics       Phones    4
## 5        7    Clothing     Trousers    5
## 6        8   Furniture    Bookcases    6
topper <- rank_orders %>%
  filter(rank == 1) %>%
  select(Order.ID, Amount, rank)

topper
##   Order.ID Amount rank
## 1  B-26055   5729    1
avg_top10 <- top_orders %>%
  summarise(avg_sales = mean(Amount, na.rm = TRUE))

avg_top10
##   avg_sales
## 1    3493.5
high_profit <- ecommerce %>%
  arrange(desc(Profit)) %>%
  select(Order.ID, Profit, Quantity)

head(high_profit, 10)
##    Order.ID Profit Quantity
## 1   B-25973   1698       13
## 2   B-25602   1151        4
## 3   B-25761   1050        5
## 4   B-25923    891        6
## 5   B-25830    782        3
## 6   B-26073    742        4
## 7   B-25853    721        5
## 8   B-26093    712        8
## 9   B-25862    701        5
## 10  B-25656    680        7
low_profit <- ecommerce %>%
  arrange(Profit) %>%
  select(Order.ID, Profit, Quantity)

head(low_profit, 10)
##    Order.ID Profit Quantity
## 1   B-25798  -1981       13
## 2   B-25608  -1864        5
## 3   B-26022  -1303        8
## 4   B-25601  -1148        7
## 5   B-25779   -980        3
## 6   B-25666   -916        7
## 7   B-25797   -802        5
## 8   B-25730   -706        3
## 9   B-25681   -658        7
## 10  B-25653   -640        8
ggplot(ecommerce, aes(x = Amount)) +
  geom_histogram(
    binwidth = 500,
    fill = "red",
    color = "black"
  ) +
  labs(
    title = "Distribution of Sales Amount",
    x = "Amount",
    y = "Count"
  )

ecommerce$sales_group <- ifelse(
  ecommerce$Amount >= 1000,
  "High",
  ifelse(
    ecommerce$Amount >= 500,
    "Medium",
    "Low"
  )
)

ggplot(ecommerce,
       aes(x = sales_group,
           fill = sales_group)) +
  geom_bar() +
  labs(
    title = "Sales Categories",
    x = "Category",
    y = "Count"
  )

ggplot(ecommerce,
       aes(x = Quantity,
           y = Amount)) +
  geom_point(
    color = "blue",
    na.rm = TRUE
  ) +
  labs(
    title = "Quantity vs Amount",
    x = "Quantity",
    y = "Amount"
  )

ggplot(ecommerce,
       aes(x = Profit,
           y = Amount)) +
  geom_point(
    color = "darkblue",
    na.rm = TRUE
  ) +
  labs(
    title = "Profit vs Amount",
    x = "Profit",
    y = "Amount"
  )

ggplot(ecommerce,
       aes(x = Quantity)) +
  geom_histogram(
    binwidth = 1,
    fill = "green",
    color = "black"
  ) +
  labs(
    title = "Quantity Distribution",
    x = "Quantity",
    y = "Count"
  )

ggplot(top_orders,
       aes(x = reorder(Order.ID, Amount),
           y = Amount)) +
  geom_bar(
    stat = "identity",
    fill = "purple"
  ) +
  coord_flip() +
  labs(
    title = "Top 10 Orders",
    x = "Order ID",
    y = "Amount"
  )

category_sales <- ecommerce %>%
  group_by(Category) %>%
  summarise(total_sales = sum(Amount, na.rm = TRUE))

category_sales
## # A tibble: 3 × 2
##   Category    total_sales
##   <chr>             <dbl>
## 1 Clothing         139054
## 2 Electronics      165267
## 3 Furniture        127181
ggplot(category_sales,
       aes(x = Category,
           y = total_sales,
           fill = Category)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Category vs Sales",
    x = "Category",
    y = "Total Sales"
  )

state_sales <- ecommerce %>%
  group_by(State) %>%
  summarise(total_sales = sum(Amount, na.rm = TRUE))

state_sales
## # A tibble: 19 × 2
##    State               total_sales
##    <chr>                     <dbl>
##  1 "Andhra Pradesh"          13256
##  2 "Bihar"                   12943
##  3 "Delhi"                   22531
##  4 "Goa"                      6705
##  5 "Gujarat"                 21058
##  6 "Haryana"                  8863
##  7 "Himachal Pradesh"         8666
##  8 "Jammu and Kashmir"       10829
##  9 "Karnataka"               15058
## 10 "Kerala "                 13459
## 11 "Madhya Pradesh"         105140
## 12 "Maharashtra"             95348
## 13 "Nagaland"                11903
## 14 "Punjab"                  16786
## 15 "Rajasthan"               21149
## 16 "Sikkim"                   5276
## 17 "Tamil Nadu"               6087
## 18 "Uttar Pradesh"           22359
## 19 "West Bengal"             14086
ggplot(state_sales,
       aes(x = reorder(State, total_sales),
           y = total_sales)) +
  geom_bar(
    stat = "identity",
    fill = "orange"
  ) +
  coord_flip() +
  labs(
    title = "State vs Sales",
    x = "State",
    y = "Total Sales"
  )

plot(
  ecdf(ecommerce$Amount),
  main = "CDF of Sales Amount",
  xlab = "Amount",
  ylab = "Cumulative Probability",
  col = "blue"
)

plot(
  ecdf(ecommerce$Profit),
  main = "CDF of Profit",
  xlab = "Profit",
  ylab = "Cumulative Probability",
  col = "darkgreen"
)

ggplot(ecommerce,
       aes(x = Category,
           y = Amount,
           fill = Category)) +
  geom_boxplot() +
  labs(
    title = "Box Plot of Amount by Category",
    x = "Category",
    y = "Amount"
  )

ggplot(ecommerce,
       aes(x = Category,
           y = Profit,
           fill = Category)) +
  geom_boxplot() +
  labs(
    title = "Box Plot of Profit by Category",
    x = "Category",
    y = "Profit"
  )

ggplot(ecommerce,
       aes(y = Quantity)) +
  geom_boxplot(fill = "skyblue") +
  labs(
    title = "Box Plot of Quantity",
    y = "Quantity"
  )

anova_amount <- aov(
  Amount ~ Category,
  data = ecommerce
)

summary(anova_amount)
##               Df    Sum Sq  Mean Sq F value Pr(>F)    
## Category       2  51488804 25744402   144.3 <2e-16 ***
## Residuals   1497 267149957   178457                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova_profit <- aov(
  Profit ~ Category,
  data = ecommerce
)

summary(anova_profit)
##               Df   Sum Sq Mean Sq F value Pr(>F)
## Category       2   128025   64013   2.241  0.107
## Residuals   1497 42756162   28561
cor(
  ecommerce$Amount,
  ecommerce$Profit,
  use = "complete.obs"
)
## [1] 0.2420446
cor(
  ecommerce$Quantity,
  ecommerce$Amount,
  use = "complete.obs"
)
## [1] 0.3520518
numeric_data <- ecommerce %>%
  select(Amount, Profit, Quantity)

cor(numeric_data)
##             Amount     Profit   Quantity
## Amount   1.0000000 0.24204460 0.35205176
## Profit   0.2420446 1.00000000 0.00244136
## Quantity 0.3520518 0.00244136 1.00000000
single_reg <- lm(
  Amount ~ Quantity,
  data = ecommerce
)

summary(single_reg)
## 
## Call:
## lm(formula = Amount ~ Quantity, data = ecommerce)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -859.0 -201.4 -103.2   57.3 4679.4 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    9.586     22.116   0.433    0.665    
## Quantity      74.287      5.103  14.558   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 431.7 on 1498 degrees of freedom
## Multiple R-squared:  0.1239, Adjusted R-squared:  0.1234 
## F-statistic: 211.9 on 1 and 1498 DF,  p-value: < 2.2e-16
ggplot(ecommerce,
       aes(x = Quantity,
           y = Amount)) +
  geom_point(color = "blue") +
  geom_smooth(
    method = "lm",
    se = FALSE,
    color = "red"
  ) +
  labs(
    title = "Single Regression Plot",
    x = "Quantity",
    y = "Amount"
  )
## `geom_smooth()` using formula = 'y ~ x'

single_reg2 <- lm(
  Profit ~ Quantity,
  data = ecommerce
)

summary(single_reg2)
## 
## Call:
## lm(formula = Profit ~ Quantity, data = ecommerce)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1998.72   -25.46    -6.92    22.18  1680.28 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)   15.262      8.668   1.761   0.0785 .
## Quantity       0.189      2.000   0.094   0.9247  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 169.2 on 1498 degrees of freedom
## Multiple R-squared:  5.96e-06,   Adjusted R-squared:  -0.0006616 
## F-statistic: 0.008928 on 1 and 1498 DF,  p-value: 0.9247
multiple_reg <- lm(
  Amount ~ Quantity + Profit,
  data = ecommerce
)

summary(multiple_reg)
## 
## Call:
## lm(formula = Amount ~ Quantity + Profit, data = ecommerce)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -837.6 -196.2  -98.7   76.9 4649.1 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.44862   21.39834  -0.021    0.983    
## Quantity    74.16314    4.93226  15.036   <2e-16 ***
## Profit       0.65744    0.06371  10.318   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 417.2 on 1497 degrees of freedom
## Multiple R-squared:  0.1821, Adjusted R-squared:  0.181 
## F-statistic: 166.7 on 2 and 1497 DF,  p-value: < 2.2e-16
predicted_values <- predict(multiple_reg)

head(predicted_values)
##          1          2          3          4          5          6 
## -236.04371  362.47785  220.72593  259.38750   74.90222  191.54437
ggplot(ecommerce,
       aes(x = Profit,
           y = Amount)) +
  geom_point(color = "darkblue") +
  geom_smooth(
    method = "lm",
    se = FALSE,
    color = "red"
  ) +
  labs(
    title = "Profit vs Amount Regression",
    x = "Profit",
    y = "Amount"
  )
## `geom_smooth()` using formula = 'y ~ x'

poly_reg <- lm(
  Amount ~ poly(Quantity, 2),
  data = ecommerce
)

summary(poly_reg)
## 
## Call:
## lm(formula = Amount ~ poly(Quantity, 2), data = ecommerce)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1175.4  -188.3  -114.6    45.9  4340.6 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          287.67      11.12  25.878   <2e-16 ***
## poly(Quantity, 2)1  6284.28     430.53  14.597   <2e-16 ***
## poly(Quantity, 2)2  1293.71     430.53   3.005   0.0027 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 430.5 on 1497 degrees of freedom
## Multiple R-squared:  0.1292, Adjusted R-squared:  0.128 
## F-statistic:   111 on 2 and 1497 DF,  p-value: < 2.2e-16
ggplot(ecommerce,
       aes(x = Quantity,
           y = Amount)) +
  geom_point(color = "purple") +
  stat_smooth(
    method = "lm",
    formula = y ~ poly(x, 2),
    se = FALSE,
    color = "red"
  ) +
  labs(
    title = "Polynomial Regression Plot",
    x = "Quantity",
    y = "Amount"
  )

poly_reg3 <- lm(
  Profit ~ poly(Quantity, 3),
  data = ecommerce
)

summary(poly_reg3)
## 
## Call:
## lm(formula = Profit ~ poly(Quantity, 3), data = ecommerce)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1984.87   -25.56    -6.81    22.15  1694.13 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          15.970      4.371   3.653 0.000268 ***
## poly(Quantity, 3)1   15.987    169.299   0.094 0.924778    
## poly(Quantity, 3)2   -3.944    169.299  -0.023 0.981416    
## poly(Quantity, 3)3  -72.659    169.299  -0.429 0.667856    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 169.3 on 1496 degrees of freedom
## Multiple R-squared:  0.0001294,  Adjusted R-squared:  -0.001876 
## F-statistic: 0.06455 on 3 and 1496 DF,  p-value: 0.9786
plot(
  single_reg$fitted.values,
  single_reg$residuals,
  main = "Residual Plot",
  xlab = "Fitted Values",
  ylab = "Residuals",
  col = "blue"
)

abline(h = 0, col = "red")

actual_predicted <- data.frame(
  Actual = ecommerce$Amount,
  Predicted = predicted_values
)

ggplot(actual_predicted,
       aes(x = Actual,
           y = Predicted)) +
  geom_point(color = "darkgreen") +
  labs(
    title = "Actual vs Predicted Values",
    x = "Actual Amount",
    y = "Predicted Amount"
  )

boxplot(
  ecommerce$Amount,
  main = "Outlier Detection",
  col = "pink"
)

qqnorm(ecommerce$Amount)

qqline(
  ecommerce$Amount,
  col = "red"
)

cov(
  ecommerce$Amount,
  ecommerce$Profit,
  use = "complete.obs"
)
## [1] 18875.2
ggplot(ecommerce,
       aes(x = Amount)) +
  geom_density(fill = "lightblue") +
  labs(
    title = "Density Plot of Amount",
    x = "Amount",
    y = "Density"
  )