r_week3

# the csv is stored in my github and below is the link
path <- ('https://raw.githubusercontent.com/karmaggyatso/CUNY_SPS/main/week3/CigarettesB.csv')
cigData= read.csv(path)

#1 Data Exploration: This should include summary statistics, means, medians, quartiles, or any other relevant information about the data set. Please include some conclusions in the R Markdown text.
summary(cigData)

##       X                 packs           price             income     
##  Length:46          Min.   :4.409   Min.   :-0.0326   Min.   :4.529  
##  Class :character   1st Qu.:4.712   1st Qu.: 0.1405   1st Qu.:4.679  
##  Mode  :character   Median :4.815   Median : 0.2002   Median :4.759  
##                     Mean   :4.848   Mean   : 0.2055   Mean   :4.775  
##                     3rd Qu.:4.984   3rd Qu.: 0.2735   3rd Qu.:4.853  
##                     Max.   :5.379   Max.   : 0.3640   Max.   :5.103

#means for column packs

mean(cigData$packs)

## [1] 4.847844

#median for column packs

median(cigData$packs)

## [1] 4.81495

#SD for column packs

sd(cigData$packs)

## [1] 0.1914581

#range for column packs

range(cigData$packs)

## [1] 4.40859 5.37906

#mean for column price

mean(cigData$price)

## [1] 0.2055087

#median for column price

median(cigData$price)

## [1] 0.200205

#SD for column price

sd(cigData$price)

## [1] 0.08623038

#range for column price

range(cigData$price)

## [1] -0.03260  0.36399

#mean for column income

#income
mean(cigData$income)

## [1] 4.775455

#median for column income

median(cigData$income)

## [1] 4.758505

#SD for column income

sd(cigData$income)

## [1] 0.1422612

#range for column income

range(cigData$income)

## [1] 4.52938 5.10268

2. Data wrangling: Please perform some basic transformations. They will need to make sense but could include column renaming, creating a subset of the data, replacing values, or creating new columns with derived data (for example – if it makes sense you could sum two columns together)

dataDf <- data.frame(cigData$packs,cigData$price,cigData$income)
dataDf

##    cigData.packs cigData.price cigData.income
## 1        4.96213       0.20487        4.64039
## 2        4.66312       0.16640        4.68389
## 3        5.10709       0.23406        4.59435
## 4        4.50449       0.36399        4.88147
## 5        4.66983       0.32149        5.09472
## 6        5.04705       0.21929        4.87087
## 7        4.65637       0.28946        5.05960
## 8        4.80081       0.28733        4.81155
## 9        4.97974       0.12826        4.73299
## 10       4.74902       0.17541        4.64307
## 11       4.81445       0.24806        4.90387
## 12       5.11129       0.08992        4.72916
## 13       4.80857       0.24081        4.74211
## 14       4.79263       0.21642        4.79613
## 15       5.37906      -0.03260        4.64937
## 16       4.98602       0.23856        4.61461
## 17       4.98722       0.29106        4.75501
## 18       4.77751       0.12575        4.94692
## 19       4.73877       0.22613        4.99998
## 20       4.94744       0.23067        4.80620
## 21       4.69589       0.34297        4.81207
## 22       4.93990       0.13638        4.52938
## 23       5.06430       0.08731        4.78189
## 24       4.73313       0.15303        4.70417
## 25       4.77558       0.18907        4.79671
## 26       4.96642       0.32304        4.83816
## 27       5.10990       0.15852        5.00319
## 28       4.70633       0.30901        5.10268
## 29       4.58107       0.16458        4.58202
## 30       4.66496       0.34701        4.96075
## 31       4.58237       0.18197        4.69163
## 32       4.97952       0.12889        4.75875
## 33       4.72720       0.19554        4.62730
## 34       4.80363       0.22784        4.83516
## 35       4.84693       0.30324        4.84670
## 36       5.07801       0.07944        4.62549
## 37       4.81545       0.13139        4.67747
## 38       5.04939       0.15547        4.72525
## 39       4.65398       0.28196        4.73437
## 40       4.40859       0.19260        4.55586
## 41       5.08799       0.18018        4.77578
## 42       4.93065       0.11818        4.85490
## 43       4.66134       0.35053        4.85645
## 44       4.82454       0.12008        4.56859
## 45       4.83026       0.22954        4.75826
## 46       5.00087       0.10029        4.71169

colnames(dataDf) <- c("total_number_of_cig_packs", "price_per_pack", "grossIncome")
dataDf['totalCost'] <- dataDf$total_number_of_cig_packs * dataDf$price_per_pack
dataDf['netIncome'] <- abs(dataDf$grossIncome - dataDf$totalCost)
dataDf

##    total_number_of_cig_packs price_per_pack grossIncome  totalCost netIncome
## 1                    4.96213        0.20487     4.64039  1.0165916  3.623798
## 2                    4.66312        0.16640     4.68389  0.7759432  3.907947
## 3                    5.10709        0.23406     4.59435  1.1953655  3.398985
## 4                    4.50449        0.36399     4.88147  1.6395893  3.241881
## 5                    4.66983        0.32149     5.09472  1.5013036  3.593416
## 6                    5.04705        0.21929     4.87087  1.1067676  3.764102
## 7                    4.65637        0.28946     5.05960  1.3478329  3.711767
## 8                    4.80081        0.28733     4.81155  1.3794167  3.432133
## 9                    4.97974        0.12826     4.73299  0.6387015  4.094289
## 10                   4.74902        0.17541     4.64307  0.8330256  3.810044
## 11                   4.81445        0.24806     4.90387  1.1942725  3.709598
## 12                   5.11129        0.08992     4.72916  0.4596072  4.269553
## 13                   4.80857        0.24081     4.74211  1.1579517  3.584158
## 14                   4.79263        0.21642     4.79613  1.0372210  3.758909
## 15                   5.37906       -0.03260     4.64937 -0.1753574  4.824727
## 16                   4.98602        0.23856     4.61461  1.1894649  3.425145
## 17                   4.98722        0.29106     4.75501  1.4515803  3.303430
## 18                   4.77751        0.12575     4.94692  0.6007719  4.346148
## 19                   4.73877        0.22613     4.99998  1.0715781  3.928402
## 20                   4.94744        0.23067     4.80620  1.1412260  3.664974
## 21                   4.69589        0.34297     4.81207  1.6105494  3.201521
## 22                   4.93990        0.13638     4.52938  0.6737036  3.855676
## 23                   5.06430        0.08731     4.78189  0.4421640  4.339726
## 24                   4.73313        0.15303     4.70417  0.7243109  3.979859
## 25                   4.77558        0.18907     4.79671  0.9029189  3.893791
## 26                   4.96642        0.32304     4.83816  1.6043523  3.233808
## 27                   5.10990        0.15852     5.00319  0.8100213  4.193169
## 28                   4.70633        0.30901     5.10268  1.4543030  3.648377
## 29                   4.58107        0.16458     4.58202  0.7539525  3.828067
## 30                   4.66496        0.34701     4.96075  1.6187878  3.341962
## 31                   4.58237        0.18197     4.69163  0.8338539  3.857776
## 32                   4.97952        0.12889     4.75875  0.6418103  4.116940
## 33                   4.72720        0.19554     4.62730  0.9243567  3.702943
## 34                   4.80363        0.22784     4.83516  1.0944591  3.740701
## 35                   4.84693        0.30324     4.84670  1.4697831  3.376917
## 36                   5.07801        0.07944     4.62549  0.4033971  4.222093
## 37                   4.81545        0.13139     4.67747  0.6327020  4.044768
## 38                   5.04939        0.15547     4.72525  0.7850287  3.940221
## 39                   4.65398        0.28196     4.73437  1.3122362  3.422134
## 40                   4.40859        0.19260     4.55586  0.8490944  3.706766
## 41                   5.08799        0.18018     4.77578  0.9167540  3.859026
## 42                   4.93065        0.11818     4.85490  0.5827042  4.272196
## 43                   4.66134        0.35053     4.85645  1.6339395  3.222510
## 44                   4.82454        0.12008     4.56859  0.5793308  3.989259
## 45                   4.83026        0.22954     4.75826  1.1087379  3.649522
## 46                   5.00087        0.10029     4.71169  0.5015373  4.210153

3. Graphics: Please make sure to display at least one scatter plot, box plot and histogram. Don’t be limited to this. Please explore the many other options in R packages such as ggplot2

#scatterplot
plot(x=dataDf$grossIncome,y=dataDf$totalCost)

#histogram
hist(dataDf$total_number_of_cig_packs)

4. Meaningful question for analysis: Please state at the beginning a meaningful question for analysis. Use the first three steps and anything else that would be helpful to answer the question you are posing from the data set you chose. Please write a brief conclusion paragraph in R markdown at the end.

cor(dataDf$grossIncome,dataDf$totalCost)

## [1] 0.4878801

r_week3_hw

karmaGyatso

2022-08-01

2. Data wrangling: Please perform some basic transformations. They will need to make sense but could include column renaming, creating a subset of the data, replacing values, or creating new columns with derived data (for example – if it makes sense you could sum two columns together)

3. Graphics: Please make sure to display at least one scatter plot, box plot and histogram. Don’t be limited to this. Please explore the many other options in R packages such as ggplot2