summary(CigarettesSW)
##        X            state                year           cpi       
##  Min.   : 1.00   Length:96          Min.   :1985   Min.   :1.076  
##  1st Qu.:24.75   Class :character   1st Qu.:1985   1st Qu.:1.076  
##  Median :48.50   Mode  :character   Median :1990   Median :1.300  
##  Mean   :48.50                      Mean   :1990   Mean   :1.300  
##  3rd Qu.:72.25                      3rd Qu.:1995   3rd Qu.:1.524  
##  Max.   :96.00                      Max.   :1995   Max.   :1.524  
##    population           packs            income               tax       
##  Min.   :  478447   Min.   : 49.27   Min.   :  6887097   Min.   :18.00  
##  1st Qu.: 1622606   1st Qu.: 92.45   1st Qu.: 25520384   1st Qu.:31.00  
##  Median : 3697472   Median :110.16   Median : 61661644   Median :37.00  
##  Mean   : 5168866   Mean   :109.18   Mean   : 99878736   Mean   :42.68  
##  3rd Qu.: 5901500   3rd Qu.:123.52   3rd Qu.:127313964   3rd Qu.:50.88  
##  Max.   :31493524   Max.   :197.99   Max.   :771470144   Max.   :99.00  
##      price             taxs       
##  Min.   : 84.97   Min.   : 21.27  
##  1st Qu.:102.71   1st Qu.: 34.77  
##  Median :137.72   Median : 41.05  
##  Mean   :143.45   Mean   : 48.33  
##  3rd Qu.:176.15   3rd Qu.: 59.48  
##  Max.   :240.85   Max.   :112.63
## [1] 99878736
mean(CigarettesSW$taxs)
## [1] 48.32615
median(CigarettesSW$income)
## [1] 61661644
median(CigarettesSW$taxs)
## [1] 41.04875
quantile(CigarettesSW$income)
##        0%       25%       50%       75%      100% 
##   6887097  25520384  61661644 127313964 771470144
quantile(CigarettesSW$taxs)
##        0%       25%       50%       75%      100% 
##  21.26800  34.77013  41.04875  59.47698 112.63300

we can ascertain the following from the data set as analyzed above: our means for taxes and income are respectively: 48.32615 and 99878736, medians for income and tax are respectively: 61661644 and 41.04875, whereas quartiles values are respectively : income(0%-6887097,25%-25520384,50%-61661644, 75%-127313964,100%-771470144) and tax (0%-21.26800,25%-34.77013,50%-41.04875, 75%-59.47698,100%-112.63300)

CigarettesSW_Wrangling <- subset(CigarettesSW, taxs > 48.5, c('income','tax','price'))
CigarettesSW_Wrangling
library(plyr)
CigarettesSW_Wrangling <- rename(CigarettesSW_Wrangling, c("income"="Gross_income","tax"="Total_tax", "price"="General_price"))
CigarettesSW_Wrangling
CigarettesSW_Wrangling$Gross_income[ CigarettesSW_Wrangling$Gross_income == 60063368]<- 70000000
CigarettesSW_Wrangling$Gross_income[ CigarettesSW_Wrangling$Gross_income == 45995496]<- 50000000
CigarettesSW_Wrangling$Gross_income[ CigarettesSW_Wrangling$Gross_income == 88870496]<-90000000
CigarettesSW_Wrangling
plot(x = CigarettesSW$income, y = CigarettesSW$tax, xlab = "income", ylab = "tax", xlim = c(20000000,800000000),ylim = c(42.00000,74.00000), main = "income vs. tax", col = "darkred")

boxplot(CigarettesSW$packs~CigarettesSW$price, data = CigarettesSW, xlab = "price" , ylab = "packs", main = "price vs. packs", col= "orange")

hist(CigarettesSW$population, main="Population", xlab= "population", breaks=6, col= "green")

library(gridExtra)
library(ggplot2)
library(plyr)
x<- ggplot(CigarettesSW, aes(x=income, y= tax)) + geom_point(shape=3) + geom_smooth(method=lm)
x+ggtitle("income vs. tax")
## `geom_smooth()` using formula 'y ~ x'

CigarettesSW_4 <- subset(CigarettesSW,state>4, c('income','tax','year','price'))
CigarettesSW_4$year[CigarettesSW_4$year==1985]<- 1995
CigarettesSW_4$year[CigarettesSW_4$year==1990]<- 2000
CigarettesSW_4$year[CigarettesSW_4$year==1995]<- 2005
x<-ggplot(data=CigarettesSW_4, aes(x=income,y=tax,fill=year)) + geom_bar(stat="identity", position=position_dodge(), color="black")
p1<-x+ggtitle("Income vs. tax")

CigarettesSW_5 <- subset(CigarettesSW,state>8, c('income','tax','year','price'))
CigarettesSW_5$year[CigarettesSW_5$year==1985]<- 2000
CigarettesSW_5$year[CigarettesSW_5$year==1990]<- 2005
CigarettesSW_5$year[CigarettesSW_5$year==1995]<- 2010
x<-ggplot(data=CigarettesSW_5, aes(x=income,y=tax,fill=year)) + geom_bar(stat="identity", position=position_dodge(), color="black")
p2<-x+ggtitle("Income vs. tax")

CigarettesSW_6 <- subset(CigarettesSW,state>10, c('income','tax','year','price'))
CigarettesSW_6$year[CigarettesSW_6$year==1985]<- 2005
CigarettesSW_6$year[CigarettesSW_6$year==1990]<- 2010
CigarettesSW_6$year[CigarettesSW_6$year==1995]<- 2015
x<-ggplot(data=CigarettesSW_6, aes(x=income,y=tax,fill=year)) + geom_bar(stat="identity", position=position_dodge(), color="black")
p3<-x+ggtitle("Income vs. tax")
grid.arrange(p1,p2,p3,nrow = 2)

CigarettesSW_7 <- subset(CigarettesSW,state>0, c('income','tax','year','price'))
CigarettesSW_7$year[CigarettesSW_7$year==1985]<- 2010
CigarettesSW_7$year[CigarettesSW_7$year==1990]<- 2015
CigarettesSW_7$year[CigarettesSW_7$year==1995]<- 2020

CigarettesSW_7$income<-mapvalues(CigarettesSW_7$income,from=c(1,2,3,4),to =c ("increase 5%","increase 5%","increase 5%","increase 5%"))
## The following `from` values were not present in `x`: 1, 2, 3, 4
CigarettesSW_7$income<-mapvalues(CigarettesSW_7$income,from=c(5,6,7,8),to =c ("increase 10%","increase 10%","increase 10%","increase 10%"))
## The following `from` values were not present in `x`: 5, 6, 7, 8
CigarettesSW_7$income<-mapvalues(CigarettesSW_7$income,from=c(9,10,11,12),to =c ("increase 15%","increase 15%","increase 15%","increase 15%"))
## The following `from` values were not present in `x`: 9, 10, 11, 12
x<-ggplot(data=CigarettesSW_7, aes(x=income,y=tax,fill=year)) + geom_bar(stat="identity", position=position_dodge(), color="black")
p4<-x+ggtitle("Income vs. tax")
p4

What kind of impact do the taxes have on gross income and price?

The main takeaway is that as taxes increase in percentage volume/size, the actual income is less, and the price is higher. We can see that by examining graphs where taxes versus income are presented. For instance, where gross income is 771470144, the total tax is 61.00000; on the other hand, gross income of 88870496 brings the tax to 65.33333, negatively affecting the price, which is 210.5047 in the first scenario and respectively 198.6075 in the second scenario. In conclusion, the size of tax negatively correlates with income and price, meaning that the higher the tax, the less positive correlation between gross income and price we will observe.

CigarettesSW_giturl<-"https://raw.githubusercontent.com/IvanGrozny88/Ivan/main/CigarettesSW.csv"

go_CigarettesSW<-read.csv(file = CigarettesSW_giturl )
go_CigarettesSW