analysis on book sales.(source: kaggle, date:17-10-2023)
data=read.csv("C:/Users/abhishek/Downloads/best-selling-books.csv")
str(data)
## 'data.frame': 174 obs. of 6 variables:
## $ Book : chr "A Tale of Two Cities" "The Little Prince (Le Petit Prince)" "Harry Potter and the Philosopher's Stone" "And Then There Were None" ...
## $ Author.s. : chr "Charles Dickens" "Antoine de Saint-Exupéry" "J. K. Rowling" "Agatha Christie" ...
## $ Original.language : chr "English" "French" "English" "English" ...
## $ First.published : int 1859 1943 1997 1939 1791 1937 1950 1887 1992 2003 ...
## $ Approximate.sales.in.millions: num 200 200 120 100 100 100 85 83 80 80 ...
## $ Genre : chr "Historical fiction" "Novella" "Fantasy" "Mystery" ...
summary(data)
## Book Author.s. Original.language First.published
## Length:174 Length:174 Length:174 Min. :1304
## Class :character Class :character Class :character 1st Qu.:1947
## Mode :character Mode :character Mode :character Median :1974
## Mean :1963
## 3rd Qu.:1995
## Max. :2018
## Approximate.sales.in.millions Genre
## Min. : 10.0 Length:174
## 1st Qu.: 14.0 Class :character
## Median : 20.0 Mode :character
## Mean : 30.1
## 3rd Qu.: 36.3
## Max. :200.0
head(data)
## Book Author.s.
## 1 A Tale of Two Cities Charles Dickens
## 2 The Little Prince (Le Petit Prince) Antoine de Saint-Exupéry
## 3 Harry Potter and the Philosopher's Stone J. K. Rowling
## 4 And Then There Were None Agatha Christie
## 5 Dream of the Red Chamber (紅樓夢) Cao Xueqin
## 6 The Hobbit J. R. R. Tolkien
## Original.language First.published Approximate.sales.in.millions
## 1 English 1859 200
## 2 French 1943 200
## 3 English 1997 120
## 4 English 1939 100
## 5 Chinese 1791 100
## 6 English 1937 100
## Genre
## 1 Historical fiction
## 2 Novella
## 3 Fantasy
## 4 Mystery
## 5 Family saga
## 6 Fantasy
tail(data)
## Book
## 169 The Front Runner
## 170 The Goal
## 171 Fahrenheit 451
## 172 Angela's Ashes
## 173 The Story of My Experiments with Truth (સત્યના પ્રયોગો અથવા આત્મકથા)
## 174 Bridget Jones's Diary
## Author.s. Original.language First.published
## 169 Patricia Nell Warren English 1974
## 170 Eliyahu M. Goldratt English 1984
## 171 Ray Bradbury English 1953
## 172 Frank McCourt English 1996
## 173 Mohandas Karamchand Gandhi Gujarati 1929
## 174 Helen Fielding English 1996
## Approximate.sales.in.millions Genre
## 169 10
## 170 10
## 171 10
## 172 10
## 173 10
## 174 10
library(ggplot2)
#bar plot for comparing sales and published years
ggplot(data, aes(x = First.published, y=Approximate.sales.in.millions)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Book vs sale", x = "Published year", y = "Sales")
More books which were published between years 1900 to 2000 were sold
#histogram for sales
ggplot(data, aes(x = Approximate.sales.in.millions)) +
geom_histogram(binwidth = 3, fill = "green", color = "red") +
labs(title = "Book Sales", x = "Sales", y = "Frequency")
more books are sold below 50 million units . No books were sold around 150 million units.
#scatter plot for comparing sales and published years
ggplot(data, aes(x = First.published, y=Approximate.sales.in.millions)) +
geom_point(color = "red") +
labs(title = "Year vs sales", x = "Published year", y = "Sales")
More books which were published between years 1900 to 2000 were sold
a=table(data$Original.language)
b=names(a)
a#displaying no of books in each language
##
## Chinese Czech Dutch English French German Gujarati
## 4 1 1 131 5 5 1
## Hindi Italian Japanese Norwegian Portuguese Russian Spanish
## 2 4 5 2 1 6 3
## Swedish Yiddish
## 2 1
share = round(a/sum(a)*100)
a = paste(share,"%",sep="")
#data frame with the data to be plotted
c <- data.frame(category = b, value = a)
ggplot(c, aes(x = "", y = value, fill = category)) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
geom_text(aes(label = value), position = position_stack(vjust = 0.5)) +
scale_fill_manual(values = rainbow(length(b))) +
labs(title = "No.of books in a Language")
Most of the books are published in english language.
ggplot(data, aes(x = Approximate.sales.in.millions)) +
geom_boxplot(fill = "orange", color = "black") +
labs(title = "Sales", x = "Sales")