summary(CigarettesSW)
## X state year cpi
## Min. : 1.00 Length:96 Min. :1985 Min. :1.076
## 1st Qu.:24.75 Class :character 1st Qu.:1985 1st Qu.:1.076
## Median :48.50 Mode :character Median :1990 Median :1.300
## Mean :48.50 Mean :1990 Mean :1.300
## 3rd Qu.:72.25 3rd Qu.:1995 3rd Qu.:1.524
## Max. :96.00 Max. :1995 Max. :1.524
## population packs income tax
## Min. : 478447 Min. : 49.27 Min. : 6887097 Min. :18.00
## 1st Qu.: 1622606 1st Qu.: 92.45 1st Qu.: 25520384 1st Qu.:31.00
## Median : 3697472 Median :110.16 Median : 61661644 Median :37.00
## Mean : 5168866 Mean :109.18 Mean : 99878736 Mean :42.68
## 3rd Qu.: 5901500 3rd Qu.:123.52 3rd Qu.:127313964 3rd Qu.:50.88
## Max. :31493524 Max. :197.99 Max. :771470144 Max. :99.00
## price taxs
## Min. : 84.97 Min. : 21.27
## 1st Qu.:102.71 1st Qu.: 34.77
## Median :137.72 Median : 41.05
## Mean :143.45 Mean : 48.33
## 3rd Qu.:176.15 3rd Qu.: 59.48
## Max. :240.85 Max. :112.63
## [1] 99878736
mean(CigarettesSW$taxs)
## [1] 48.32615
median(CigarettesSW$income)
## [1] 61661644
median(CigarettesSW$taxs)
## [1] 41.04875
quantile(CigarettesSW$income)
## 0% 25% 50% 75% 100%
## 6887097 25520384 61661644 127313964 771470144
quantile(CigarettesSW$taxs)
## 0% 25% 50% 75% 100%
## 21.26800 34.77013 41.04875 59.47698 112.63300
we can ascertain the following from the data set as analyzed above: our means for taxes and income are respectively: 48.32615 and 99878736, medians for income and tax are respectively: 61661644 and 41.04875, whereas quartiles values are respectively : income(0%-6887097,25%-25520384,50%-61661644, 75%-127313964,100%-771470144) and tax (0%-21.26800,25%-34.77013,50%-41.04875, 75%-59.47698,100%-112.63300)
CigarettesSW_Wrangling <- subset(CigarettesSW, taxs > 48.5, c('income','tax','price'))
CigarettesSW_Wrangling
library(plyr)
CigarettesSW_Wrangling <- rename(CigarettesSW_Wrangling, c("income"="Gross_income","tax"="Total_tax", "price"="General_price"))
CigarettesSW_Wrangling
CigarettesSW_Wrangling$Gross_income[ CigarettesSW_Wrangling$Gross_income == 60063368]<- 70000000
CigarettesSW_Wrangling$Gross_income[ CigarettesSW_Wrangling$Gross_income == 45995496]<- 50000000
CigarettesSW_Wrangling$Gross_income[ CigarettesSW_Wrangling$Gross_income == 88870496]<-90000000
CigarettesSW_Wrangling
plot(x = CigarettesSW$income, y = CigarettesSW$tax, xlab = "income", ylab = "tax", xlim = c(20000000,800000000),ylim = c(42.00000,74.00000), main = "income vs. tax", col = "darkred")
boxplot(CigarettesSW$packs~CigarettesSW$price, data = CigarettesSW, xlab = "price" , ylab = "packs", main = "price vs. packs", col= "orange")
hist(CigarettesSW$population, main="Population", xlab= "population", breaks=6, col= "green")
library(gridExtra)
library(ggplot2)
library(plyr)
x<- ggplot(CigarettesSW, aes(x=income, y= tax)) + geom_point(shape=3) + geom_smooth(method=lm)
x+ggtitle("income vs. tax")
## `geom_smooth()` using formula 'y ~ x'
CigarettesSW_4 <- subset(CigarettesSW,state>4, c('income','tax','year','price'))
CigarettesSW_4$year[CigarettesSW_4$year==1985]<- 1995
CigarettesSW_4$year[CigarettesSW_4$year==1990]<- 2000
CigarettesSW_4$year[CigarettesSW_4$year==1995]<- 2005
x<-ggplot(data=CigarettesSW_4, aes(x=income,y=tax,fill=year)) + geom_bar(stat="identity", position=position_dodge(), color="black")
p1<-x+ggtitle("Income vs. tax")
CigarettesSW_5 <- subset(CigarettesSW,state>8, c('income','tax','year','price'))
CigarettesSW_5$year[CigarettesSW_5$year==1985]<- 2000
CigarettesSW_5$year[CigarettesSW_5$year==1990]<- 2005
CigarettesSW_5$year[CigarettesSW_5$year==1995]<- 2010
x<-ggplot(data=CigarettesSW_5, aes(x=income,y=tax,fill=year)) + geom_bar(stat="identity", position=position_dodge(), color="black")
p2<-x+ggtitle("Income vs. tax")
CigarettesSW_6 <- subset(CigarettesSW,state>10, c('income','tax','year','price'))
CigarettesSW_6$year[CigarettesSW_6$year==1985]<- 2005
CigarettesSW_6$year[CigarettesSW_6$year==1990]<- 2010
CigarettesSW_6$year[CigarettesSW_6$year==1995]<- 2015
x<-ggplot(data=CigarettesSW_6, aes(x=income,y=tax,fill=year)) + geom_bar(stat="identity", position=position_dodge(), color="black")
p3<-x+ggtitle("Income vs. tax")
grid.arrange(p1,p2,p3,nrow = 2)
CigarettesSW_7 <- subset(CigarettesSW,state>0, c('income','tax','year','price'))
CigarettesSW_7$year[CigarettesSW_7$year==1985]<- 2010
CigarettesSW_7$year[CigarettesSW_7$year==1990]<- 2015
CigarettesSW_7$year[CigarettesSW_7$year==1995]<- 2020
CigarettesSW_7$income<-mapvalues(CigarettesSW_7$income,from=c(1,2,3,4),to =c ("increase 5%","increase 5%","increase 5%","increase 5%"))
## The following `from` values were not present in `x`: 1, 2, 3, 4
CigarettesSW_7$income<-mapvalues(CigarettesSW_7$income,from=c(5,6,7,8),to =c ("increase 10%","increase 10%","increase 10%","increase 10%"))
## The following `from` values were not present in `x`: 5, 6, 7, 8
CigarettesSW_7$income<-mapvalues(CigarettesSW_7$income,from=c(9,10,11,12),to =c ("increase 15%","increase 15%","increase 15%","increase 15%"))
## The following `from` values were not present in `x`: 9, 10, 11, 12
x<-ggplot(data=CigarettesSW_7, aes(x=income,y=tax,fill=year)) + geom_bar(stat="identity", position=position_dodge(), color="black")
p4<-x+ggtitle("Income vs. tax")
p4
What kind of impact do the taxes have on gross income and price?
The main takeaway is that as taxes increase in percentage volume/size, the actual income is less, and the price is higher. We can see that by examining graphs where taxes versus income are presented. For instance, where gross income is 771470144, the total tax is 61.00000; on the other hand, gross income of 88870496 brings the tax to 65.33333, negatively affecting the price, which is 210.5047 in the first scenario and respectively 198.6075 in the second scenario. In conclusion, the size of tax negatively correlates with income and price, meaning that the higher the tax, the less positive correlation between gross income and price we will observe.
CigarettesSW_giturl<-"https://raw.githubusercontent.com/IvanGrozny88/Ivan/main/CigarettesSW.csv"
go_CigarettesSW<-read.csv(file = CigarettesSW_giturl )
go_CigarettesSW