book sales

analysis on book sales.(source: kaggle, date:17-10-2023)

data=read.csv("C:/Users/abhishek/Downloads/best-selling-books.csv")
str(data)
## 'data.frame':    174 obs. of  6 variables:
##  $ Book                         : chr  "A Tale of Two Cities" "The Little Prince (Le Petit Prince)" "Harry Potter and the Philosopher's Stone" "And Then There Were None" ...
##  $ Author.s.                    : chr  "Charles Dickens" "Antoine de Saint-Exupéry" "J. K. Rowling" "Agatha Christie" ...
##  $ Original.language            : chr  "English" "French" "English" "English" ...
##  $ First.published              : int  1859 1943 1997 1939 1791 1937 1950 1887 1992 2003 ...
##  $ Approximate.sales.in.millions: num  200 200 120 100 100 100 85 83 80 80 ...
##  $ Genre                        : chr  "Historical fiction" "Novella" "Fantasy" "Mystery" ...
summary(data)
##      Book            Author.s.         Original.language  First.published
##  Length:174         Length:174         Length:174         Min.   :1304   
##  Class :character   Class :character   Class :character   1st Qu.:1947   
##  Mode  :character   Mode  :character   Mode  :character   Median :1974   
##                                                           Mean   :1963   
##                                                           3rd Qu.:1995   
##                                                           Max.   :2018   
##  Approximate.sales.in.millions    Genre          
##  Min.   : 10.0                 Length:174        
##  1st Qu.: 14.0                 Class :character  
##  Median : 20.0                 Mode  :character  
##  Mean   : 30.1                                   
##  3rd Qu.: 36.3                                   
##  Max.   :200.0
head(data)
##                                       Book                Author.s.
## 1                     A Tale of Two Cities          Charles Dickens
## 2      The Little Prince (Le Petit Prince) Antoine de Saint-Exupéry
## 3 Harry Potter and the Philosopher's Stone            J. K. Rowling
## 4                 And Then There Were None          Agatha Christie
## 5        Dream of the Red Chamber (紅樓夢)               Cao Xueqin
## 6                               The Hobbit         J. R. R. Tolkien
##   Original.language First.published Approximate.sales.in.millions
## 1           English            1859                           200
## 2            French            1943                           200
## 3           English            1997                           120
## 4           English            1939                           100
## 5           Chinese            1791                           100
## 6           English            1937                           100
##                Genre
## 1 Historical fiction
## 2            Novella
## 3            Fantasy
## 4            Mystery
## 5        Family saga
## 6            Fantasy
tail(data)
##                                                                  Book
## 169                                                  The Front Runner
## 170                                                          The Goal
## 171                                                    Fahrenheit 451
## 172                                                    Angela's Ashes
## 173 The Story of My Experiments with Truth (સત્યના પ્રયોગો અથવા આત્મકથા)
## 174                                             Bridget Jones's Diary
##                      Author.s. Original.language First.published
## 169       Patricia Nell Warren           English            1974
## 170        Eliyahu M. Goldratt           English            1984
## 171               Ray Bradbury           English            1953
## 172              Frank McCourt           English            1996
## 173 Mohandas Karamchand Gandhi          Gujarati            1929
## 174             Helen Fielding           English            1996
##     Approximate.sales.in.millions Genre
## 169                            10      
## 170                            10      
## 171                            10      
## 172                            10      
## 173                            10      
## 174                            10
library(ggplot2)
#bar plot for comparing sales and published years
ggplot(data, aes(x = First.published, y=Approximate.sales.in.millions)) + 
  geom_bar(stat = "identity", fill = "blue") + 
  labs(title = "Book vs sale", x = "Published year", y = "Sales")

More books which were published between years 1900 to 2000 were sold

#histogram for sales
ggplot(data, aes(x = Approximate.sales.in.millions)) + 
  geom_histogram(binwidth = 3, fill = "green", color = "red") + 
  labs(title = "Book Sales", x = "Sales", y = "Frequency")

more books are sold below 50 million units . No books were sold around 150 million units.

#scatter plot for comparing sales and published years
ggplot(data, aes(x = First.published, y=Approximate.sales.in.millions)) + 
  geom_point(color = "red") + 
  labs(title = "Year vs sales", x = "Published year", y = "Sales")

More books which were published between years 1900 to 2000 were sold

a=table(data$Original.language)
b=names(a)
a#displaying no of books in each language
## 
##    Chinese      Czech      Dutch    English     French     German   Gujarati 
##          4          1          1        131          5          5          1 
##      Hindi    Italian   Japanese  Norwegian Portuguese    Russian    Spanish 
##          2          4          5          2          1          6          3 
##    Swedish    Yiddish 
##          2          1
share = round(a/sum(a)*100)
a = paste(share,"%",sep="")
#data frame with the data to be plotted
c <- data.frame(category = b, value = a)

ggplot(c, aes(x = "", y = value, fill = category)) +
  geom_bar(stat = "identity", width = 1) +
  coord_polar(theta = "y") +
  geom_text(aes(label = value), position = position_stack(vjust = 0.5)) +
  scale_fill_manual(values = rainbow(length(b))) +
  labs(title = "No.of books in a Language")

Most of the books are published in english language.

ggplot(data, aes(x = Approximate.sales.in.millions)) + 
  geom_boxplot(fill = "orange", color = "black") + 
  labs(title = "Sales", x = "Sales")