“Impact of Age on Income with respect to the Education and Gender”

Now, to read tha data,

data=read.csv(file.choose(),header=T,sep=",")
data
##     Age income edu Gender
## 1    54  85000  18      1
## 2    24  20000  16      1
## 3    23  19000  16      1
## 4    16  12000  12      0
## 5    18   7000  12      0
## 6    23  15000  12      0
## 7    50  80000  18      1
## 8    23  10000  14      1
## 9    22  25000  16      1
## 10   29  30000  18      1
## 11   26  30000  16      0
## 12   25  20000  16      0
## 13   26  15000  16      0
## 14   24  10000  16      1
## 15   24  10000  16      0
## 16   24  10000  16      0
## 17   25  20000  16      0
## 18   23  50000  18      1
## 19   34  25000  16      1
## 20   24  40000  16      1
## 21   18  50000  14      1
## 22   21  25000  16      0
## 23   24  60000  16      1
## 24   26  20000  16      1
## 25   20  20000  12      1
## 26   21  20000  14      0
## 27   21  50000  16      1
## 28   23  40000  16      1
## 29   60  50000  18      1
## 30   53  50000  12      1
## 31   60  80000  16      1
## 32   36 150000  18      1
## 33   53 200000  18      1
## 34   32  50000  16      1
## 35   28  15000  12      1
## 36   23  15000  12      1
## 37   21  50000  16      1
## 38   26  30000  14      1
## 39   21  20000  12      1
## 40   22  50000  16      1
## 41   62  80000  16      1
## 42   31  40000  16      0
## 43   35  35000  14      1
## 44   35  40000  14      1
## 45   30  20000  16      1
## 46   25  14000  16      1
## 47   21  10000  14      1
## 48   20  15000  14      1
## 49   29 120000  18      1
## 50   26  12000  14      1
## 51   32  80000  16      1
## 52   27  15000  14      1
## 53   24  20000  12      1
## 54   36  30000  16      1
## 55   25 200000  16      1
## 56   18  15000  12      1
## 57   24  40000  16      1
## 58   21  12000  12      1
## 59   20  40000  14      1
## 60   22  30000  16      1
## 61   39  50000  18      1
## 62   22  12000  12      0
## 63   19  13000  14      0
## 64   26  40000  16      1
## 65   38 100000  16      1
## 66   26  20000  14      1
## 67   25  14000  16      1
## 68   22  15000  16      1
## 69   25  48000  16      1
## 70   25  25000  16      0
## 71   22  35000  16      1
## 72   24  20000  16      1
## 73   25  25000  16      1
## 74   22  25000  14      1
## 75   30  30000  16      1
## 76   24 250000  16      1
## 77   39 100000  16      1
## 78   28  25000  16      0
## 79   44  30000  16      1
## 80   24  15000  16      1
## 81   26  20000  16      0
## 82   25  20000  16      1
## 83   26  20000  16      0
## 84   23  25000  14      0
## 85   23  12500  16      1
## 86   23  10000  14      1
## 87   24  10000  14      1
## 88   25  15000  12      0
## 89   25  20000  18      1
## 90   21  10000  16      1
## 91   24  18000  16      0
## 92   22  15000  16      1
## 93   24  15000  16      0
## 94   23  12000  16      1
## 95   30  50000  14      1
## 96   24  70000  18      1
## 97   23  50000  16      1
## 98   24  50000  16      1
## 99   22  20000  16      1
## 100  32 150000  18      1
Gender=as.factor(data$Gender)
Gender
##   [1] 1 1 1 0 0 0 1 1 1 1 0 0 0 1 0 0 0 1 1 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 1
##  [36] 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 0
##  [71] 1 1 1 1 1 1 1 0 1 1 0 1 0 0 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1
## Levels: 0 1
edu=as.factor(data$edu)
edu
##   [1] 18 16 16 12 12 12 18 14 16 18 16 16 16 16 16 16 16 18 16 16 14 16 16
##  [24] 16 12 14 16 16 18 12 16 18 18 16 12 12 16 14 12 16 16 16 14 14 16 16
##  [47] 14 14 18 14 16 14 12 16 16 12 16 12 14 16 18 12 14 16 16 14 16 16 16
##  [70] 16 16 16 16 14 16 16 16 16 16 16 16 16 16 14 16 14 14 12 18 16 16 16
##  [93] 16 16 14 18 16 16 16 18
## Levels: 12 14 16 18
str(data)
## 'data.frame':    100 obs. of  4 variables:
##  $ Age   : int  54 24 23 16 18 23 50 23 22 29 ...
##  $ income: int  85000 20000 19000 12000 7000 15000 80000 10000 25000 30000 ...
##  $ edu   : int  18 16 16 12 12 12 18 14 16 18 ...
##  $ Gender: int  1 1 1 0 0 0 1 1 1 1 ...
income=data[,2]
edu=data[,3]
Age=data[,1]
Gender=data[,4]
data=data.frame(Age, income, edu, Gender)
data
##     Age income edu Gender
## 1    54  85000  18      1
## 2    24  20000  16      1
## 3    23  19000  16      1
## 4    16  12000  12      0
## 5    18   7000  12      0
## 6    23  15000  12      0
## 7    50  80000  18      1
## 8    23  10000  14      1
## 9    22  25000  16      1
## 10   29  30000  18      1
## 11   26  30000  16      0
## 12   25  20000  16      0
## 13   26  15000  16      0
## 14   24  10000  16      1
## 15   24  10000  16      0
## 16   24  10000  16      0
## 17   25  20000  16      0
## 18   23  50000  18      1
## 19   34  25000  16      1
## 20   24  40000  16      1
## 21   18  50000  14      1
## 22   21  25000  16      0
## 23   24  60000  16      1
## 24   26  20000  16      1
## 25   20  20000  12      1
## 26   21  20000  14      0
## 27   21  50000  16      1
## 28   23  40000  16      1
## 29   60  50000  18      1
## 30   53  50000  12      1
## 31   60  80000  16      1
## 32   36 150000  18      1
## 33   53 200000  18      1
## 34   32  50000  16      1
## 35   28  15000  12      1
## 36   23  15000  12      1
## 37   21  50000  16      1
## 38   26  30000  14      1
## 39   21  20000  12      1
## 40   22  50000  16      1
## 41   62  80000  16      1
## 42   31  40000  16      0
## 43   35  35000  14      1
## 44   35  40000  14      1
## 45   30  20000  16      1
## 46   25  14000  16      1
## 47   21  10000  14      1
## 48   20  15000  14      1
## 49   29 120000  18      1
## 50   26  12000  14      1
## 51   32  80000  16      1
## 52   27  15000  14      1
## 53   24  20000  12      1
## 54   36  30000  16      1
## 55   25 200000  16      1
## 56   18  15000  12      1
## 57   24  40000  16      1
## 58   21  12000  12      1
## 59   20  40000  14      1
## 60   22  30000  16      1
## 61   39  50000  18      1
## 62   22  12000  12      0
## 63   19  13000  14      0
## 64   26  40000  16      1
## 65   38 100000  16      1
## 66   26  20000  14      1
## 67   25  14000  16      1
## 68   22  15000  16      1
## 69   25  48000  16      1
## 70   25  25000  16      0
## 71   22  35000  16      1
## 72   24  20000  16      1
## 73   25  25000  16      1
## 74   22  25000  14      1
## 75   30  30000  16      1
## 76   24 250000  16      1
## 77   39 100000  16      1
## 78   28  25000  16      0
## 79   44  30000  16      1
## 80   24  15000  16      1
## 81   26  20000  16      0
## 82   25  20000  16      1
## 83   26  20000  16      0
## 84   23  25000  14      0
## 85   23  12500  16      1
## 86   23  10000  14      1
## 87   24  10000  14      1
## 88   25  15000  12      0
## 89   25  20000  18      1
## 90   21  10000  16      1
## 91   24  18000  16      0
## 92   22  15000  16      1
## 93   24  15000  16      0
## 94   23  12000  16      1
## 95   30  50000  14      1
## 96   24  70000  18      1
## 97   23  50000  16      1
## 98   24  50000  16      1
## 99   22  20000  16      1
## 100  32 150000  18      1
str(data)
## 'data.frame':    100 obs. of  4 variables:
##  $ Age   : int  54 24 23 16 18 23 50 23 22 29 ...
##  $ income: int  85000 20000 19000 12000 7000 15000 80000 10000 25000 30000 ...
##  $ edu   : int  18 16 16 12 12 12 18 14 16 18 ...
##  $ Gender: int  1 1 1 0 0 0 1 1 1 1 ...
nrow(data)
## [1] 100

to change the data in Columns and Rows

ncol(data)
## [1] 4

Now, to recall the library of ggplot2

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3

Plots

plot(data$Age, data$income)

The above Scatter plot shows the relationship of Age with respect to Income

qplot(data$Age, data$income)

The above qplot shows the samerelationship with more effective view that is age with circles and income with triangles.

qplot(Age, income,data=data, geom=c("line", "point"))

Now, in this scatterplot, the point of age and income are joined with the lines.

Equivalent

ggplot(data, aes(x=Age, y=income)) + geom_point()

Here, just the comparison of age and income is shown through ggplot.

Scatter plot

data[, c("Age", "income")]
##     Age income
## 1    54  85000
## 2    24  20000
## 3    23  19000
## 4    16  12000
## 5    18   7000
## 6    23  15000
## 7    50  80000
## 8    23  10000
## 9    22  25000
## 10   29  30000
## 11   26  30000
## 12   25  20000
## 13   26  15000
## 14   24  10000
## 15   24  10000
## 16   24  10000
## 17   25  20000
## 18   23  50000
## 19   34  25000
## 20   24  40000
## 21   18  50000
## 22   21  25000
## 23   24  60000
## 24   26  20000
## 25   20  20000
## 26   21  20000
## 27   21  50000
## 28   23  40000
## 29   60  50000
## 30   53  50000
## 31   60  80000
## 32   36 150000
## 33   53 200000
## 34   32  50000
## 35   28  15000
## 36   23  15000
## 37   21  50000
## 38   26  30000
## 39   21  20000
## 40   22  50000
## 41   62  80000
## 42   31  40000
## 43   35  35000
## 44   35  40000
## 45   30  20000
## 46   25  14000
## 47   21  10000
## 48   20  15000
## 49   29 120000
## 50   26  12000
## 51   32  80000
## 52   27  15000
## 53   24  20000
## 54   36  30000
## 55   25 200000
## 56   18  15000
## 57   24  40000
## 58   21  12000
## 59   20  40000
## 60   22  30000
## 61   39  50000
## 62   22  12000
## 63   19  13000
## 64   26  40000
## 65   38 100000
## 66   26  20000
## 67   25  14000
## 68   22  15000
## 69   25  48000
## 70   25  25000
## 71   22  35000
## 72   24  20000
## 73   25  25000
## 74   22  25000
## 75   30  30000
## 76   24 250000
## 77   39 100000
## 78   28  25000
## 79   44  30000
## 80   24  15000
## 81   26  20000
## 82   25  20000
## 83   26  20000
## 84   23  25000
## 85   23  12500
## 86   23  10000
## 87   24  10000
## 88   25  15000
## 89   25  20000
## 90   21  10000
## 91   24  18000
## 92   22  15000
## 93   24  15000
## 94   23  12000
## 95   30  50000
## 96   24  70000
## 97   23  50000
## 98   24  50000
## 99   22  20000
## 100  32 150000
ggplot(data, aes(x=Age, y=income)) + geom_point()

ggplot(data, aes(x=Age, y=income))  + geom_point(shape=21)

ggplot(data, aes(x=Age, y=income))  + geom_point(size=1.5)

In the above diagrams, we just still comparing the age vs income with different shape sizes using ggplot.

data[, c("Gender", "Age", "income")]
##     Gender Age income
## 1        1  54  85000
## 2        1  24  20000
## 3        1  23  19000
## 4        0  16  12000
## 5        0  18   7000
## 6        0  23  15000
## 7        1  50  80000
## 8        1  23  10000
## 9        1  22  25000
## 10       1  29  30000
## 11       0  26  30000
## 12       0  25  20000
## 13       0  26  15000
## 14       1  24  10000
## 15       0  24  10000
## 16       0  24  10000
## 17       0  25  20000
## 18       1  23  50000
## 19       1  34  25000
## 20       1  24  40000
## 21       1  18  50000
## 22       0  21  25000
## 23       1  24  60000
## 24       1  26  20000
## 25       1  20  20000
## 26       0  21  20000
## 27       1  21  50000
## 28       1  23  40000
## 29       1  60  50000
## 30       1  53  50000
## 31       1  60  80000
## 32       1  36 150000
## 33       1  53 200000
## 34       1  32  50000
## 35       1  28  15000
## 36       1  23  15000
## 37       1  21  50000
## 38       1  26  30000
## 39       1  21  20000
## 40       1  22  50000
## 41       1  62  80000
## 42       0  31  40000
## 43       1  35  35000
## 44       1  35  40000
## 45       1  30  20000
## 46       1  25  14000
## 47       1  21  10000
## 48       1  20  15000
## 49       1  29 120000
## 50       1  26  12000
## 51       1  32  80000
## 52       1  27  15000
## 53       1  24  20000
## 54       1  36  30000
## 55       1  25 200000
## 56       1  18  15000
## 57       1  24  40000
## 58       1  21  12000
## 59       1  20  40000
## 60       1  22  30000
## 61       1  39  50000
## 62       0  22  12000
## 63       0  19  13000
## 64       1  26  40000
## 65       1  38 100000
## 66       1  26  20000
## 67       1  25  14000
## 68       1  22  15000
## 69       1  25  48000
## 70       0  25  25000
## 71       1  22  35000
## 72       1  24  20000
## 73       1  25  25000
## 74       1  22  25000
## 75       1  30  30000
## 76       1  24 250000
## 77       1  39 100000
## 78       0  28  25000
## 79       1  44  30000
## 80       1  24  15000
## 81       0  26  20000
## 82       1  25  20000
## 83       0  26  20000
## 84       0  23  25000
## 85       1  23  12500
## 86       1  23  10000
## 87       1  24  10000
## 88       0  25  15000
## 89       1  25  20000
## 90       1  21  10000
## 91       0  24  18000
## 92       1  22  15000
## 93       0  24  15000
## 94       1  23  12000
## 95       1  30  50000
## 96       1  24  70000
## 97       1  23  50000
## 98       1  24  50000
## 99       1  22  20000
## 100      1  32 150000
str(data)
## 'data.frame':    100 obs. of  4 variables:
##  $ Age   : int  54 24 23 16 18 23 50 23 22 29 ...
##  $ income: int  85000 20000 19000 12000 7000 15000 80000 10000 25000 30000 ...
##  $ edu   : int  18 16 16 12 12 12 18 14 16 18 ...
##  $ Gender: int  1 1 1 0 0 0 1 1 1 1 ...
Gender=as.factor(data$Gender)
edu=as.factor(data$edu)
data=data.frame(Age, income, edu, Gender)
str(data)
## 'data.frame':    100 obs. of  4 variables:
##  $ Age   : int  54 24 23 16 18 23 50 23 22 29 ...
##  $ income: int  85000 20000 19000 12000 7000 15000 80000 10000 25000 30000 ...
##  $ edu   : Factor w/ 4 levels "12","14","16",..: 4 3 3 1 1 1 4 2 3 4 ...
##  $ Gender: Factor w/ 2 levels "0","1": 2 2 2 1 1 1 2 2 2 2 ...

Now, we added Gender with Age and Income. the Graphs are as follows:

ggplot(data, aes(x=Age, y=income, colour=Gender))   + geom_point()

In the above graphs, Age vs Income is compared with respect to the gender’

ps <- ggplot(data, aes(x=Age, y=income))
ps + geom_point()

data[, c("Gender", "Age", "income")]
##     Gender Age income
## 1        1  54  85000
## 2        1  24  20000
## 3        1  23  19000
## 4        0  16  12000
## 5        0  18   7000
## 6        0  23  15000
## 7        1  50  80000
## 8        1  23  10000
## 9        1  22  25000
## 10       1  29  30000
## 11       0  26  30000
## 12       0  25  20000
## 13       0  26  15000
## 14       1  24  10000
## 15       0  24  10000
## 16       0  24  10000
## 17       0  25  20000
## 18       1  23  50000
## 19       1  34  25000
## 20       1  24  40000
## 21       1  18  50000
## 22       0  21  25000
## 23       1  24  60000
## 24       1  26  20000
## 25       1  20  20000
## 26       0  21  20000
## 27       1  21  50000
## 28       1  23  40000
## 29       1  60  50000
## 30       1  53  50000
## 31       1  60  80000
## 32       1  36 150000
## 33       1  53 200000
## 34       1  32  50000
## 35       1  28  15000
## 36       1  23  15000
## 37       1  21  50000
## 38       1  26  30000
## 39       1  21  20000
## 40       1  22  50000
## 41       1  62  80000
## 42       0  31  40000
## 43       1  35  35000
## 44       1  35  40000
## 45       1  30  20000
## 46       1  25  14000
## 47       1  21  10000
## 48       1  20  15000
## 49       1  29 120000
## 50       1  26  12000
## 51       1  32  80000
## 52       1  27  15000
## 53       1  24  20000
## 54       1  36  30000
## 55       1  25 200000
## 56       1  18  15000
## 57       1  24  40000
## 58       1  21  12000
## 59       1  20  40000
## 60       1  22  30000
## 61       1  39  50000
## 62       0  22  12000
## 63       0  19  13000
## 64       1  26  40000
## 65       1  38 100000
## 66       1  26  20000
## 67       1  25  14000
## 68       1  22  15000
## 69       1  25  48000
## 70       0  25  25000
## 71       1  22  35000
## 72       1  24  20000
## 73       1  25  25000
## 74       1  22  25000
## 75       1  30  30000
## 76       1  24 250000
## 77       1  39 100000
## 78       0  28  25000
## 79       1  44  30000
## 80       1  24  15000
## 81       0  26  20000
## 82       1  25  20000
## 83       0  26  20000
## 84       0  23  25000
## 85       1  23  12500
## 86       1  23  10000
## 87       1  24  10000
## 88       0  25  15000
## 89       1  25  20000
## 90       1  21  10000
## 91       0  24  18000
## 92       1  22  15000
## 93       0  24  15000
## 94       1  23  12000
## 95       1  30  50000
## 96       1  24  70000
## 97       1  23  50000
## 98       1  24  50000
## 99       1  22  20000
## 100      1  32 150000
ggplot(data, aes(x=Age, y=income, colour=Gender))   + geom_point()

ggplot(data, aes(x=Age, y=income, colour=Gender, shape=Gender)) + geom_point()

ggplot(data, aes(x=Age, y=income, colour=edu))   + geom_point()

ggplot(data, aes(x=Age, y=income, colour=edu, shape=edu)) + geom_point()

All above graphs show the relationship of age, income and gender with respect to the colour and shape

ps <- ggplot(data, aes(x=Age, y=income))
ps + geom_point()

ps + geom_point(alpha=.1)

ps + geom_point(alpha=.01)

Still age vs income comparison in above graphs.

library(hexbin)
## Warning: package 'hexbin' was built under R version 3.5.3
ps + stat_binhex() +scale_fill_gradient(low="lightblue", high="red",  limits=c(0, 8000))

Now, we are going to draw the Boxplot.

library(plyr)
## Warning: package 'plyr' was built under R version 3.5.3
boxplot(income~Gender,data=data)

boxplot(income~edu, data=data)

boxplot(income~Gender+edu, data=data)

qplot(Gender, income, geom="boxplot")

qplot(interaction(Gender, edu), income, geom="boxplot") 

ggplot(data,  aes(x=interaction(Gender, edu), income))+geom_boxplot()

library(gcookbook)
## Warning: package 'gcookbook' was built under R version 3.5.2

Now, we are going to draw the Bar plots.

ggplot(data, aes(edu, income))+geom_bar(stat="identity")

qplot(factor(edu), data=data)

ggplot(data, aes(Gender, income))+geom_bar(stat="identity",fill="lightblue", col="blue") 

ggplot(data, aes(x=edu, y=income, fill=Gender))+geom_bar(position="dodge", stat="identity", col="blue")

ggplot(data, aes(edu, income, cultivar=Gender))+geom_bar(position="dodge", stat="identity",col="blue")+scale_fill_brewer(palette="pastel1")
## Warning in pal_name(palette, type): Unknown palette pastel1

ggplot(data, aes(edu))+geom_bar()

ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", width=0.5, position="dodge")

ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", width=0.5, position=position_dodge(0.7))

ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity") +guides(fill=guide_legend(reverse=TRUE))

ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", colour="black") +guides(fill=guide_legend(reverse=TRUE)) +scale_fill_brewer(palette="Pastel1")

data=data[1:6,]
ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", position="dodge") +geom_text(aes(label=income), vjust=1.3, colour="black",position=position_dodge(.9), size=3)

ggplot(data, aes(x=edu, y=income, fill=Gender)) +geom_bar(stat="identity", position="dodge") +geom_text(aes(label=income), vjust=-4, colour="black",position=position_dodge(.9), size=3)

In the above grphs, we compare the age and income with respect to the education level and gender. which realy depicts the difference between level of education with respect to the age and income. Also, gender differnces the huge difference between age and income.

Now, we draw the histogram

hist(data$income)

hist(data$Age)

hist(data$income, breaks=10)

qplot(income, data=data, binwidth=10)

ggplot(data, aes(x=Age)) + geom_histogram(binwidth=4)

ggplot(data, aes(x=Age)) +geom_histogram(binwidth=5, fill="white", colour="black")

ggplot(data, aes(x=Age)) + geom_histogram(fill="white", colour="black") +facet_grid(Gender ~ .)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(data, aes(x=Age, fill=Gender)) +geom_histogram(position="identity", alpha=0.4)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

In the above Histograms, we come to know the same difference of age vs income considering gender as well as education level with diffenrent badwidths.

Now, we are going to draw the Density graphs.

ggplot(data, aes(x=income)) + geom_density()

ggplot(data, aes(x=income)) + geom_line(stat="density") +expand_limits(y=0)

ggplot(data, aes(x=income)) +geom_line(stat="density", adjust=.25, colour="red") +geom_line(stat="density") +geom_line(stat="density", adjust=2, colour="blue")

ggplot(data, aes(x=Age, y=..density..)) +geom_histogram(fill="cornsilk", colour="grey60", size=.2) +geom_density() +xlim(35, 105)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 5 rows containing non-finite values (stat_bin).
## Warning: Removed 5 rows containing non-finite values (stat_density).
## Warning: Groups with fewer than two data points have been dropped.
## Warning: Removed 1 rows containing missing values (geom_bar).

ggplot(data, aes(x=income, fill=Gender)) + geom_density(alpha=.3)

### Here, in the density graphs, different colors and lines shows the comparisons of age vs income with respect to the age and gender.

Now, we are going to draw the violen and Dot plots.

p <- ggplot(data, aes(x=Gender, y=income))
p + geom_violin()

ggplot(data, aes(x=edu, y=income)) + geom_boxplot() +stat_summary(fun.y="mean", geom="point", shape=23, size=3, fill="white")

### By looking at the violen graphs, we come to know that males earn more as compared to the females. Also, if we consider the education level, higher the education, higher level of income you will recieve.

ggplot(data, aes(x=Gender, y=Age)) +geom_dotplot(binaxis="y", binwidth=.5, stackdir="center")

ggplot(data, aes(x=Gender, y=Age))+geom_boxplot(outlier.colour=NA, width=.4) +
  geom_dotplot(binaxis="y", binwidth=.5, stackdir="center", fill=NA)

In density graphs, Box plots represents that males on average earn more than females.

Now, we are going to draw the two dimensional density graphs. Which are as follows:

p <- ggplot(data, aes(x=Age, y=income))
p + geom_point() + stat_density2d()

p + stat_density2d(aes(colour=..level..))

p + stat_density2d(aes(fill=..density..), geom="raster", contour=FALSE)

p + geom_point() + stat_density2d(aes(alpha=..density..), geom="tile", contour=FALSE)

Annotation

p <- ggplot(data.frame(x=c(-3,3)), aes(x=x)) + stat_function(fun = dnorm)
p + annotate("text", x=2, y=0.3, parse=TRUE,label="frac(1, sqrt(2 * pi)) * e ^ {-x^2 / 2}")

p + annotate("text", x=0, y=0.05, parse=TRUE, size=4, label="'Function: ' * y==frac(1, sqrt(2*pi)) * e^{-x^2/2}")

p <- ggplot(data, aes(x=Age, y=income)) + geom_point()
p <- ggplot(data, aes(x=Age, y=income, colour=Gender)) + geom_point()
p + geom_hline(yintercept=40000) + geom_vline(xintercept=25)

p + geom_abline(intercept=120000, slope=1.75)

p + annotate("text", x=3, y=20000, label="Group 1") +annotate("text", x=3, y=66, label="Group 2")

p + annotate("text", x=-Inf, y=Inf, label="Upper left", hjust=-.2, vjust=2) +annotate("text", x=mean(range(data$Age)), y=-Inf, vjust=-0.4,label="Bottom middle")

hw_means <- ddply(data, "Gender", summarise, income=mean(income))
hw_means
##   Gender   income
## 1      0 11333.33
## 2      1 41333.33
p + geom_hline(aes(yintercept=income, colour=Gender), data=hw_means,linetype="dashed", size=1)

p <- ggplot(subset(data, Gender=="Gender"), aes(x=Age, y=income)) +
  geom_line()
p + annotate("segment", x=18, xend=62, y=25588, yend=200000)

Title of a Graph, means that How to add the titles in the graphs. Which are as follows:

p <- ggplot(data, aes(x=Age, y=income)) + geom_point()
p + ggtitle("Age and income of Visitors")

p + ggtitle("Age and income of Visitors") +theme(plot.title=element_text(vjust = -8))

p + annotate("text", x=mean(range(heightweight$ageYear)), y=Inf,label="Age and income of Visitors", vjust=1.5, size=4)

p + annotate("text", x=mean(range(data$income)), y=Inf,label="Age and income of Visitors", vjust=1.5, size=6)

p + annotate("text", x=25, y=53, label="Some text", size = 7, family="Times",fontface="bold.italic", colour="red")
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

p + theme_grey(base_size=16, base_family="Times")
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database
## Warning in grid.Call.graphics(C_text, as.graphicsAnnot(x$label), x$x,
## x$y, : font family not found in Windows font database

p <- ggplot(data, aes(x=Age, y=income, colour=Gender)) + geom_point()
p <- ggplot(data, aes(x=Age, y=income)) + geom_point()
p + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())
p + theme(panel.grid.major.y = element_blank(),panel.grid.minor.y = element_blank())

p + theme_grey()

p <- ggplot(data, aes(x=Age, y=income, colour=Gender)) + geom_point()
p <- ggplot(data, aes(x=Age, y=income)) + geom_point()
p + theme(panel.grid.major = element_blank(),panel.grid.minor = element_blank())

p + theme(panel.grid.major.y = element_blank(),panel.grid.minor.y = element_blank())

Legend

p<- ggplot(data, aes(x=edu, y=income, fill=edu)) + geom_boxplot()
p

p + theme(legend.position="none")

p <- ggplot(data, aes(x=edu, y=income, fill=edu)) + geom_boxplot() +scale_fill_brewer(palette="Pastel2")
p + theme(legend.position="top")

p + theme(legend.position=c(1,0), legend.justification=c(1,0))

p + scale_fill_discrete(limits=c("trt1", "trt2", "ctrl"))
## Scale for 'fill' is already present. Adding another scale for 'fill',
## which will replace the existing scale.

hw <- ggplot(data, aes(x=Age, y=income, colour=Gender)) +geom_point(aes(size=Age)) + scale_size_continuous(range=c(1,4))
hw

hw1 <- ggplot(data, aes(x=Age, y=income, shape=Gender, colour=Gender)) +geom_point()
hw1

p <- ggplot(data, aes(x=Gender, y=income, fill=Gender)) + geom_boxplot()
p + theme(legend.text=element_text(face="italic", family="Times", colour="red",size=14))
## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

## Warning in grid.Call(C_textBounds, as.graphicsAnnot(x$label), x$x, x$y, :
## font family not found in Windows font database

Regression Results

Means that we run the regression and find out the following results, which are as follows:

model <- lm(income ~ Age + I(Age^2), data=data)
summary(model)
## 
## Call:
## lm(formula = income ~ Age + I(Age^2), data = data)
## 
## Residuals:
##        1        2        3        4        5        6 
##   -44.87  1485.54  1858.14  2939.74 -4096.69 -2141.86 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   619.97   15610.89   0.040    0.971
## Age            91.35    1053.90   0.087    0.936
## I(Age^2)       27.26      14.42   1.891    0.155
## 
## Residual standard error: 3448 on 3 degrees of freedom
## Multiple R-squared:  0.9916, Adjusted R-squared:  0.986 
## F-statistic: 176.9 on 2 and 3 DF,  p-value: 0.0007708
xmin <- min(data$Age)
xmax <- max(data$Age)
predicted <- data.frame(Age=seq(xmin, xmax, length.out=100))
predicted$income <- predict(model, predicted)
predicted
##          Age    income
## 1   16.00000  9060.265
## 2   16.38384  9434.182
## 3   16.76768  9816.132
## 4   17.15152 10206.115
## 5   17.53535 10604.130
## 6   17.91919 11010.178
## 7   18.30303 11424.259
## 8   18.68687 11846.373
## 9   19.07071 12276.519
## 10  19.45455 12714.698
## 11  19.83838 13160.910
## 12  20.22222 13615.155
## 13  20.60606 14077.432
## 14  20.98990 14547.742
## 15  21.37374 15026.085
## 16  21.75758 15512.461
## 17  22.14141 16006.869
## 18  22.52525 16509.310
## 19  22.90909 17019.784
## 20  23.29293 17538.290
## 21  23.67677 18064.829
## 22  24.06061 18599.401
## 23  24.44444 19142.006
## 24  24.82828 19692.643
## 25  25.21212 20251.313
## 26  25.59596 20818.016
## 27  25.97980 21392.752
## 28  26.36364 21975.520
## 29  26.74747 22566.321
## 30  27.13131 23165.155
## 31  27.51515 23772.022
## 32  27.89899 24386.921
## 33  28.28283 25009.853
## 34  28.66667 25640.818
## 35  29.05051 26279.815
## 36  29.43434 26926.845
## 37  29.81818 27581.908
## 38  30.20202 28245.004
## 39  30.58586 28916.132
## 40  30.96970 29595.293
## 41  31.35354 30282.487
## 42  31.73737 30977.714
## 43  32.12121 31680.973
## 44  32.50505 32392.265
## 45  32.88889 33111.590
## 46  33.27273 33838.948
## 47  33.65657 34574.338
## 48  34.04040 35317.761
## 49  34.42424 36069.217
## 50  34.80808 36828.705
## 51  35.19192 37596.226
## 52  35.57576 38371.780
## 53  35.95960 39155.367
## 54  36.34343 39946.986
## 55  36.72727 40746.639
## 56  37.11111 41554.324
## 57  37.49495 42370.041
## 58  37.87879 43193.791
## 59  38.26263 44025.575
## 60  38.64646 44865.390
## 61  39.03030 45713.239
## 62  39.41414 46569.120
## 63  39.79798 47433.034
## 64  40.18182 48304.981
## 65  40.56566 49184.961
## 66  40.94949 50072.973
## 67  41.33333 50969.018
## 68  41.71717 51873.095
## 69  42.10101 52785.206
## 70  42.48485 53705.349
## 71  42.86869 54633.525
## 72  43.25253 55569.734
## 73  43.63636 56513.975
## 74  44.02020 57466.249
## 75  44.40404 58426.556
## 76  44.78788 59394.895
## 77  45.17172 60371.268
## 78  45.55556 61355.673
## 79  45.93939 62348.110
## 80  46.32323 63348.581
## 81  46.70707 64357.084
## 82  47.09091 65373.620
## 83  47.47475 66398.189
## 84  47.85859 67430.790
## 85  48.24242 68471.425
## 86  48.62626 69520.091
## 87  49.01010 70576.791
## 88  49.39394 71641.523
## 89  49.77778 72714.289
## 90  50.16162 73795.086
## 91  50.54545 74883.917
## 92  50.92929 75980.780
## 93  51.31313 77085.676
## 94  51.69697 78198.605
## 95  52.08081 79319.567
## 96  52.46465 80448.561
## 97  52.84848 81585.588
## 98  53.23232 82730.648
## 99  53.61616 83883.740
## 100 54.00000 85044.865
sp <- ggplot(data, aes(x=Age, y=income)) +geom_point(colour="grey40")
sp + geom_line(data=predicted, size=1)

modlinear <- lm(income ~ Age, data)
modloess <- loess(income ~ Age, data)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 15.81
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 7.19
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 972.82
summary(modloess)
## Call:
## loess(formula = income ~ Age, data = data)
## 
## Number of Observations: 6 
## Equivalent Number of Parameters: 4.52 
## Residual Standard Error: 3920 
## Trace of smoother matrix: 5  (exact)
## 
## Control settings:
##   span     :  0.75 
##   degree   :  2 
##   family   :  gaussian
##   surface  :  interpolate      cell = 0.2
##   normalize:  TRUE
##  parametric:  FALSE
## drop.square:  FALSE