library(readr)
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)
colnames(data)
## [1] "Age"
## [2] "City"
## [3] "Current Status"
## [4] "Do you own multiple profiles on Instagram?"
## [5] "Gender"
## [6] "Highest Education"
## [7] "Location (City Airport Code)"
## [8] "Phone OS"
## [9] "State"
## [10] "Zone"
## [11] "How many followers do you have on Instagram?"
## [12] "How many posts do you have on Instagram?"
## [13] "Latitude"
## [14] "Longitude"
## [15] "Time Spent on Facebook in last week (in minutes)"
## [16] "Time Spent on Facebook in last weekend (in minutes)"
## [17] "Time Spent on Instagram in last week (in minutes)"
## [18] "Time Spent on Instagram in last weekend (in minutes)"
## [19] "Time Spent on WhatsApp in last week (in minutes)"
## [20] "Time Spent on WhatsApp in last weekend (in minutes)"
## [21] "Total Facebook Usage"
## [22] "Total Instagram Usage"
## [23] "Total Social Media Usage"
## [24] "Total Week Usage"
## [25] "Total Weekend Usage"
## [26] "Total WhatsApp Usage"
## [27] "How many subscriber do you have on youtube"
## [28] "Income from YouTube (rs,month)"
## [29] "Internet Speed (Mbps)"
## [30] "Total YouTube Usage (minutes)"
## [31] "Weekly YouTube Usage (minutes)"
## [32] "Profession/Activity"
## [33] "Likes on Instagram"
## [34] "Shares on Instagram"
## [35] "Hobby"
## [36] "Verified Account on instagram"
## [37] "Date of Birth"
## [38] "Time Spent on Twitter per Week (minutes)"
## [39] "Total Time Spent on Twitter (minutes)"
## [40] "Groups Joined on Instagram"
str(data)
## spc_tbl_ [1,628 × 40] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:1628] 24 39 22 26 50 25 52 45 25 27 ...
## $ City : chr [1:1628] "Delhi" "Delhi" "Mumbai" "Bengaluru" ...
## $ Current Status : chr [1:1628] "Working professional" "Working professional" "Working professional" "Sabbatical" ...
## $ Do you own multiple profiles on Instagram? : chr [1:1628] "No" "No" "No" "Yes" ...
## $ Gender : chr [1:1628] "Female" "Female" "Male" "Female" ...
## $ Highest Education : chr [1:1628] "Graduation" "Post graduation" "Graduation" "Graduation" ...
## $ Location (City Airport Code) : chr [1:1628] "DEL" "DEL" "BOM" "BLR" ...
## $ Phone OS : chr [1:1628] "iOs" "iOs" "Android" "Android" ...
## $ State : chr [1:1628] "Delhi" "Delhi" "Maharashtra" "Karnataka" ...
## $ Zone : chr [1:1628] "Northern" "Northern" "Western" "Southern" ...
## $ How many followers do you have on Instagram? : num [1:1628] 456 0 400 485 0 ...
## $ How many posts do you have on Instagram? : num [1:1628] 20 0 6 16 0 220 0 0 340 37 ...
## $ Latitude : num [1:1628] 28.7 28.7 19 13 28.7 ...
## $ Longitude : num [1:1628] 77.2 77.2 72.8 77.6 77.2 ...
## $ Time Spent on Facebook in last week (in minutes) : num [1:1628] 0 6000 500 1500 1500 1000 300 983 1160 480 ...
## $ Time Spent on Facebook in last weekend (in minutes) : num [1:1628] 0 2160 2000 1500 1500 1200 900 873 870 840 ...
## $ Time Spent on Instagram in last week (in minutes) : num [1:1628] 770 0 1000 2000 0 3000 0 0 1240 720 ...
## $ Time Spent on Instagram in last weekend (in minutes): num [1:1628] 400 0 1000 2000 0 840 215 0 340 300 ...
## $ Time Spent on WhatsApp in last week (in minutes) : num [1:1628] 900 5000 7000 1680 2400 2100 1800 583 1760 3000 ...
## $ Time Spent on WhatsApp in last weekend (in minutes) : num [1:1628] 120 2000 2000 1680 1300 600 1500 834 450 600 ...
## $ Total Facebook Usage : num [1:1628] 0 8160 2500 3000 3000 ...
## $ Total Instagram Usage : num [1:1628] 1170 0 2000 4000 0 3840 215 0 1580 1020 ...
## $ Total Social Media Usage : num [1:1628] 2190 15160 13500 10360 6700 ...
## $ Total Week Usage : num [1:1628] 1670 11000 8500 5180 3900 ...
## $ Total Weekend Usage : num [1:1628] 520 4160 5000 5180 2800 ...
## $ Total WhatsApp Usage : num [1:1628] 1020 7000 9000 3360 3700 ...
## $ How many subscriber do you have on youtube : num [1:1628] 33356 25394 34603 13645 49876 ...
## $ Income from YouTube (rs,month) : num [1:1628] 88447 64764 4387 99695 81297 ...
## $ Internet Speed (Mbps) : num [1:1628] 46.6 83.5 50.5 99.6 22.1 ...
## $ Total YouTube Usage (minutes) : num [1:1628] 272543 220056 2629 154271 178485 ...
## $ Weekly YouTube Usage (minutes) : num [1:1628] 305 2090 528 1545 2836 ...
## $ Profession/Activity : chr [1:1628] "Photographer" "Singer" "Content Creator" "Blogger" ...
## $ Likes on Instagram : chr [1:1628] "679k" "400k" "447k" "443k" ...
## $ Shares on Instagram : chr [1:1628] "16K" "28K" "86K" "39K" ...
## $ Hobby : chr [1:1628] "Reading" "Reading" "Playing Musical Instruments" "Cooking/Baking" ...
## $ Verified Account on instagram : chr [1:1628] "Yes" "Yes" "No" "No" ...
## $ Date of Birth : chr [1:1628] "8/31/1982" "12/17/1992" "4/16/2002" "12/10/1965" ...
## $ Time Spent on Twitter per Week (minutes) : num [1:1628] 356 1774 1848 199 1210 ...
## $ Total Time Spent on Twitter (minutes) : num [1:1628] 2492 12418 12936 1393 8470 ...
## $ Groups Joined on Instagram : num [1:1628] 4 5 2 4 3 9 2 3 2 6 ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. City = col_character(),
## .. `Current Status` = col_character(),
## .. `Do you own multiple profiles on Instagram?` = col_character(),
## .. Gender = col_character(),
## .. `Highest Education` = col_character(),
## .. `Location (City Airport Code)` = col_character(),
## .. `Phone OS` = col_character(),
## .. State = col_character(),
## .. Zone = col_character(),
## .. `How many followers do you have on Instagram?` = col_number(),
## .. `How many posts do you have on Instagram?` = col_number(),
## .. Latitude = col_double(),
## .. Longitude = col_double(),
## .. `Time Spent on Facebook in last week (in minutes)` = col_number(),
## .. `Time Spent on Facebook in last weekend (in minutes)` = col_number(),
## .. `Time Spent on Instagram in last week (in minutes)` = col_number(),
## .. `Time Spent on Instagram in last weekend (in minutes)` = col_number(),
## .. `Time Spent on WhatsApp in last week (in minutes)` = col_number(),
## .. `Time Spent on WhatsApp in last weekend (in minutes)` = col_number(),
## .. `Total Facebook Usage` = col_number(),
## .. `Total Instagram Usage` = col_number(),
## .. `Total Social Media Usage` = col_number(),
## .. `Total Week Usage` = col_number(),
## .. `Total Weekend Usage` = col_number(),
## .. `Total WhatsApp Usage` = col_number(),
## .. `How many subscriber do you have on youtube` = col_double(),
## .. `Income from YouTube (rs,month)` = col_double(),
## .. `Internet Speed (Mbps)` = col_double(),
## .. `Total YouTube Usage (minutes)` = col_double(),
## .. `Weekly YouTube Usage (minutes)` = col_double(),
## .. `Profession/Activity` = col_character(),
## .. `Likes on Instagram` = col_character(),
## .. `Shares on Instagram` = col_character(),
## .. Hobby = col_character(),
## .. `Verified Account on instagram` = col_character(),
## .. `Date of Birth` = col_character(),
## .. `Time Spent on Twitter per Week (minutes)` = col_double(),
## .. `Total Time Spent on Twitter (minutes)` = col_double(),
## .. `Groups Joined on Instagram` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## Age City Current Status
## Min. :13.00 Length:1628 Length:1628
## 1st Qu.:22.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :26.86
## 3rd Qu.:27.00
## Max. :74.00
## Do you own multiple profiles on Instagram? Gender
## Length:1628 Length:1628
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## Highest Education Location (City Airport Code) Phone OS
## Length:1628 Length:1628 Length:1628
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## State Zone
## Length:1628 Length:1628
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## How many followers do you have on Instagram?
## Min. : 0.0
## 1st Qu.: 183.0
## Median : 370.0
## Mean : 868.1
## 3rd Qu.: 657.0
## Max. :116000.0
## How many posts do you have on Instagram? Latitude Longitude
## Min. : 0.00 Min. : 8.486 Min. :69.67
## 1st Qu.: 10.00 1st Qu.:18.988 1st Qu.:72.84
## Median : 43.50 Median :22.563 Median :77.23
## Mean : 99.08 Mean :22.760 Mean :77.89
## 3rd Qu.: 111.25 3rd Qu.:28.652 3rd Qu.:78.46
## Max. :2858.00 Max. :32.736 Max. :94.91
## Time Spent on Facebook in last week (in minutes)
## Min. : 0.0
## 1st Qu.: 2.0
## Median : 63.0
## Mean : 175.2
## 3rd Qu.: 240.0
## Max. :6000.0
## Time Spent on Facebook in last weekend (in minutes)
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 30.00
## Mean : 75.69
## 3rd Qu.: 89.00
## Max. :2160.00
## Time Spent on Instagram in last week (in minutes)
## Min. : 0.0
## 1st Qu.: 120.0
## Median : 357.0
## Mean : 505.2
## 3rd Qu.: 675.0
## Max. :6000.0
## Time Spent on Instagram in last weekend (in minutes)
## Min. : 0.0
## 1st Qu.: 48.0
## Median : 135.0
## Mean : 215.0
## 3rd Qu.: 281.5
## Max. :2560.0
## Time Spent on WhatsApp in last week (in minutes)
## Min. : 4.0
## 1st Qu.: 300.0
## Median : 600.0
## Mean : 854.9
## 3rd Qu.:1009.0
## Max. :7000.0
## Time Spent on WhatsApp in last weekend (in minutes) Total Facebook Usage
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 100.0 1st Qu.: 10.0
## Median : 200.0 Median : 101.5
## Mean : 294.9 Mean : 250.9
## 3rd Qu.: 360.0 3rd Qu.: 334.2
## Max. :2800.0 Max. :8160.0
## Total Instagram Usage Total Social Media Usage Total Week Usage
## Min. : 0.0 Min. : 12 Min. : 8
## 1st Qu.: 190.8 1st Qu.: 970 1st Qu.: 670
## Median : 522.5 Median : 1658 Median : 1170
## Mean : 720.2 Mean : 2121 Mean : 1535
## 3rd Qu.: 970.0 3rd Qu.: 2670 3rd Qu.: 1895
## Max. :8240.0 Max. :15780 Max. :12734
## Total Weekend Usage Total WhatsApp Usage
## Min. : 0.0 Min. : 9
## 1st Qu.: 243.0 1st Qu.: 450
## Median : 425.5 Median : 812
## Mean : 585.6 Mean :1150
## 3rd Qu.: 709.0 3rd Qu.:1400
## Max. :5180.0 Max. :9000
## How many subscriber do you have on youtube Income from YouTube (rs,month)
## Min. : 33 Min. : 11
## 1st Qu.:12783 1st Qu.:23870
## Median :24629 Median :47898
## Mean :24795 Mean :49166
## 3rd Qu.:36702 3rd Qu.:74162
## Max. :49939 Max. :99991
## Internet Speed (Mbps) Total YouTube Usage (minutes)
## Min. : 2.03 Min. : 224
## 1st Qu.:24.82 1st Qu.: 78476
## Median :50.47 Median :153087
## Mean :50.34 Mean :150846
## 3rd Qu.:75.83 3rd Qu.:225062
## Max. :99.93 Max. :299562
## Weekly YouTube Usage (minutes) Profession/Activity Likes on Instagram
## Min. : 2 Length:1628 Length:1628
## 1st Qu.: 782 Class :character Class :character
## Median :1456 Mode :character Mode :character
## Mean :1480
## 3rd Qu.:2226
## Max. :2998
## Shares on Instagram Hobby Verified Account on instagram
## Length:1628 Length:1628 Length:1628
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Date of Birth Time Spent on Twitter per Week (minutes)
## Length:1628 Min. : 0
## Class :character 1st Qu.: 561
## Mode :character Median :1168
## Mean :1179
## 3rd Qu.:1790
## Max. :2399
## Total Time Spent on Twitter (minutes) Groups Joined on Instagram
## Min. : 0 Min. : 0.00
## 1st Qu.: 3927 1st Qu.: 2.00
## Median : 8180 Median : 5.00
## Mean : 8255 Mean : 4.96
## 3rd Qu.:12532 3rd Qu.: 8.00
## Max. :16793 Max. :10.00
head(data)
## # A tibble: 6 × 40
## Age City `Current Status` Do you own multiple …¹ Gender `Highest Education`
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working profess… No Female Graduation
## 2 39 Delhi Working profess… No Female Post graduation
## 3 22 Mumb… Working profess… No Male Graduation
## 4 26 Beng… Sabbatical Yes Female Graduation
## 5 50 Delhi Working profess… No Male Graduation
## 6 25 Vish… Working profess… Yes Female Post graduation
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 34 more variables: `Location (City Airport Code)` <chr>, `Phone OS` <chr>,
## # State <chr>, Zone <chr>,
## # `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, `Time Spent on Facebook in last week (in minutes)` <dbl>,
## # `Time Spent on Facebook in last weekend (in minutes)` <dbl>, …
# Create a matrix with the first 3 rows and 3 columns of the data
matrix_data <- matrix(data[1:3, 1:3], nrow = 3, ncol = 3, byrow = TRUE)
# Print the matrix
matrix_data
## [,1] [,2] [,3]
## [1,] numeric,3 character,3 character,3
## [2,] numeric,3 character,3 character,3
## [3,] numeric,3 character,3 character,3
#Average age
mean(data$Age)
## [1] 26.85811
# Median age
median(data$Age)
## [1] 24
# Maximum age
max(data$Age)
## [1] 74
# Minimum age
min(data$Age)
## [1] 13
# Total number of males
sum(data$Gender == "Male")
## [1] 813
# Total number of females
sum(data$Gender == "Female")
## [1] 813
# Average number of followers on Instagram
mean(data$`How many followers do you have on Instagram?`)
## [1] 868.1474
# Median number of followers on Instagram
median(data$`How many followers do you have on Instagram?`)
## [1] 370
# Maximum number of followers on Instagram
max(data$`How many followers do you have on Instagram?`)
## [1] 116000
# Minimum number of followers on Instagram
min(data$`How many followers do you have on Instagram?`)
## [1] 0
# Average number of posts on Instagram
mean(data$`How many posts do you have on Instagram?`)
## [1] 99.07985
# Median number of posts on Instagram
median(data$`How many posts do you have on Instagram?`)
## [1] 43.5
# Maximum number of posts on Instagram
max(data$`How many posts do you have on Instagram?`)
## [1] 2858
# Minimum number of posts on Instagram
min(data$`How many posts do you have on Instagram?`)
## [1] 0
# Total time spent on Facebook in last week
sum(data$`Time Spent on Facebook in last week (in minutes)`)
## [1] 285275
# Total time spent on Instagram in last week
sum(data$`Time Spent on Instagram in last week (in minutes)`)
## [1] 822407
# Total time spent on WhatsApp in last week
sum(data$`Time Spent on WhatsApp in last week (in minutes)`)
## [1] 1391726
# Average total social media usage
mean(data$`Total Social Media Usage`)
## [1] 2120.885
# Median total social media usage
median(data$`Total Social Media Usage`)
## [1] 1658.5
# Maximum total social media usage
max(data$`Total Social Media Usage`)
## [1] 15780
#@**************************************************************************************************************
# Only females
females <- subset(data, Gender == "Female")
females
## # A tibble: 813 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 26 Bengaluru Sabbatical Yes Female
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 45 Durgapur Sabbatical No Female
## 6 45 Delhi Working professional No Female
## 7 21 Delhi Working professional No Female
## 8 26 Delhi Working professional No Female
## 9 25 Mumbai Sabbatical No Female
## 10 22 Kolkata Student No Female
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only males
males <- subset(data, Gender == "Male")
males
## # A tibble: 813 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 50 Delhi Working professional No Male
## 3 52 Jaipur Working professional No Male
## 4 25 Bengaluru Student No Male
## 5 27 Delhi Student Yes Male
## 6 27 Bengaluru Working professional No Male
## 7 22 Delhi Sabbatical Yes Male
## 8 26 Agra Working professional No Male
## 9 25 Ahmedabad Student No Male
## 10 18 Jaipur Student No Male
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only students
students <- subset(data, `Current Status` == "Student")
students
## # A tibble: 637 × 40
## Age City `Current Status` Do you own multiple profiles on Ins…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Bengaluru Student No Male
## 2 27 Delhi Student Yes Male
## 3 25 Ahmedabad Student No Male
## 4 18 Jaipur Student No Male
## 5 22 Kolkata Student No Female
## 6 26 Kolkata Student No Female
## 7 23 Delhi Student No Female
## 8 17 Mumbai Student Yes Male
## 9 23 Kolkata Student Yes Female
## 10 22 Ahmedabad Student No Male
## # ℹ 627 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only working professionals
working_professionals <- subset(data, `Current Status` == "Working professional")
working_professionals
## # A tibble: 796 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 50 Delhi Working professional No Male
## 5 25 Vishakhapatnam Working professional Yes Female
## 6 52 Jaipur Working professional No Male
## 7 27 Bengaluru Working professional No Male
## 8 45 Delhi Working professional No Female
## 9 21 Delhi Working professional No Female
## 10 26 Agra Working professional No Male
## # ℹ 786 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people from Northern zone
northern <- subset(data, Zone == "Northern")
northern
## # A tibble: 542 × 40
## Age City `Current Status` Do you own multiple profiles on In…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 50 Delhi Working professional No Male
## 4 52 Jaipur Working professional No Male
## 5 27 Delhi Student Yes Male
## 6 45 Delhi Working professional No Female
## 7 22 Delhi Sabbatical Yes Male
## 8 21 Delhi Working professional No Female
## 9 26 Agra Working professional No Male
## 10 26 Delhi Working professional No Female
## # ℹ 532 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people from Southern zone
southern <- subset(data, Zone == "Southern")
southern
## # A tibble: 211 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 26 Bengaluru Sabbatical Yes Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 25 Bengaluru Student No Male
## 4 27 Bengaluru Working professional No Male
## 5 32 Bengaluru Working professional No Male
## 6 27 Chennai Working professional No Male
## 7 23 Chennai Student No Female
## 8 22 Chennai Student Yes Female
## 9 32 Bengaluru Working professional No Female
## 10 21 Chennai Student No Female
## # ℹ 201 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people from Eastern zone
eastern <- subset(data, Zone == "Eastern")
eastern
## # A tibble: 271 × 40
## Age City `Current Status` Do you own multiple profiles …¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 45 Durgapur Sabbatical No Female
## 2 24 Cooch-behar Working professional No Male
## 3 22 Kolkata Student No Female
## 4 26 Kolkata Student No Female
## 5 50 Kolkata Working professional No Female
## 6 23 Kolkata Student Yes Female
## 7 25 Kolkata Working professional No Male
## 8 45 Bagdogra Working professional Yes Female
## 9 25 Kolkata Student No Male
## 10 45 Kolkata Working professional No Female
## # ℹ 261 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people from Western zone
western <- subset(data, Zone == "Western")
western
## # A tibble: 543 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 25 Ahmedabad Student No Male
## 3 25 Mumbai Sabbatical No Female
## 4 25 Ahmedabad Self Employed Yes Male
## 5 17 Mumbai Student Yes Male
## 6 22 Ahmedabad Student No Male
## 7 24 Pune Working professional No Female
## 8 21 Mumbai Sabbatical Yes Female
## 9 51 Ahmedabad Working professional No Male
## 10 22 Mumbai Sabbatical No Male
## # ℹ 533 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with Android phone OS
android <- subset(data, `Phone OS` == "Android")
android
## # A tibble: 1,115 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 26 Bengaluru Sabbatical Yes Female
## 3 25 Vishakhapatnam Working professional Yes Female
## 4 52 Jaipur Working professional No Male
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Delhi Student Yes Male
## 8 27 Bengaluru Working professional No Male
## 9 21 Delhi Working professional No Female
## 10 26 Agra Working professional No Male
## # ℹ 1,105 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people with iOs phone OS
ios <- subset(data, `Phone OS` == "iOs")
ios
## # A tibble: 508 × 40
## Age City `Current Status` Do you own multiple profiles on …¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 50 Delhi Working professional No Male
## 4 45 Delhi Working professional No Female
## 5 22 Delhi Sabbatical Yes Male
## 6 18 Jaipur Student No Male
## 7 22 Chennai Student Yes Female
## 8 23 Guwahati Student No Female
## 9 45 Bagdogra Working professional Yes Female
## 10 28 Kolkata Sabbatical No Female
## # ℹ 498 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with Graduation education
graduation <- subset(data, `Highest Education` == "Graduation")
graduation
## # A tibble: 950 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 22 Mumbai Working professional No Male
## 3 26 Bengaluru Sabbatical Yes Female
## 4 50 Delhi Working professional No Male
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Delhi Student Yes Male
## 8 27 Bengaluru Working professional No Male
## 9 45 Delhi Working professional No Female
## 10 21 Delhi Working professional No Female
## # ℹ 940 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with Post graduation education
post_graduation <- subset(data, `Highest Education` == "Post graduation")
post_graduation
## # A tibble: 541 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 39 Delhi Working professional No Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 52 Jaipur Working professional No Male
## 4 22 Delhi Sabbatical Yes Male
## 5 26 Delhi Working professional No Female
## 6 25 Mumbai Sabbatical No Female
## 7 22 Kolkata Student No Female
## 8 26 Kolkata Student No Female
## 9 27 Chennai Working professional No Male
## 10 32 Bengaluru Working professional No Female
## # ℹ 531 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with High School education
high_school <- subset(data, `Highest Education` == "High School")
high_school
## # A tibble: 137 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 18 Jaipur Student No Male
## 2 35 Delhi Working professional Yes Female
## 3 23 Delhi Student No Female
## 4 50 Kolkata Working professional No Female
## 5 17 Mumbai Student Yes Male
## 6 16 Kolkata Student No Male
## 7 16 Jaipur Student No Female
## 8 15 Chennai Student Yes Female
## 9 20 Hyderabad Student Yes Male
## 10 16 Mumbai Student No Male
## # ℹ 127 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with multiple profiles on Instagram
multiple_profiles <- subset(data, `Do you own multiple profiles on Instagram?` == "Yes")
multiple_profiles
## # A tibble: 308 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 26 Bengaluru Sabbatical Yes Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 27 Delhi Student Yes Male
## 4 22 Delhi Sabbatical Yes Male
## 5 25 Ahmedabad Self Employed Yes Male
## 6 35 Delhi Working professional Yes Female
## 7 17 Mumbai Student Yes Male
## 8 23 Kolkata Student Yes Female
## 9 26 Kanpur Working professional Yes Male
## 10 22 Chennai Student Yes Female
## # ℹ 298 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who own a single profile on Instagram
single_profile <- subset(data, `Do you own multiple profiles on Instagram?` == "No")
single_profile
## # A tibble: 1,316 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 50 Delhi Working professional No Male
## 5 52 Jaipur Working professional No Male
## 6 45 Durgapur Sabbatical No Female
## 7 25 Bengaluru Student No Male
## 8 27 Bengaluru Working professional No Male
## 9 45 Delhi Working professional No Female
## 10 21 Delhi Working professional No Female
## # ℹ 1,306 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who spent more than 500 minutes on Facebook in last week
more_than_500_fb <- subset(data, `Time Spent on Facebook in last week (in minutes)` > 500)
more_than_500_fb
## # A tibble: 130 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 39 Delhi Working professional No Female
## 2 26 Bengaluru Sabbatical Yes Female
## 3 50 Delhi Working professional No Male
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Bengaluru Working professional No Male
## 8 22 Delhi Sabbatical Yes Male
## 9 18 Jaipur Student No Male
## 10 25 Mumbai Sabbatical No Female
## # ℹ 120 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who spent more than 500 minutes on Instagram in last week
more_than_500_insta <- subset(data, `Time Spent on Instagram in last week (in minutes)` > 500)
more_than_500_insta
## # A tibble: 559 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 22 Mumbai Working professional No Male
## 3 26 Bengaluru Sabbatical Yes Female
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 25 Bengaluru Student No Male
## 6 27 Delhi Student Yes Male
## 7 45 Delhi Working professional No Female
## 8 22 Delhi Sabbatical Yes Male
## 9 18 Jaipur Student No Male
## 10 25 Mumbai Sabbatical No Female
## # ℹ 549 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who spent more than 500 minutes on WhatsApp in last week
more_than_500_whatsapp <- subset(data, `Time Spent on WhatsApp in last week (in minutes)` > 500)
more_than_500_whatsapp
## # A tibble: 913 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 26 Bengaluru Sabbatical Yes Female
## 5 50 Delhi Working professional No Male
## 6 25 Vishakhapatnam Working professional Yes Female
## 7 52 Jaipur Working professional No Male
## 8 45 Durgapur Sabbatical No Female
## 9 25 Bengaluru Student No Male
## 10 27 Delhi Student Yes Male
## # ℹ 903 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who have more than 1000 followers on Instagram
more_than_1000_followers <- subset(data,`How many followers do you have on Instagram?` > 1000)
more_than_1000_followers
## # A tibble: 199 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Bengaluru Student No Male
## 2 23 Kolkata Student Yes Female
## 3 21 Chennai Student No Female
## 4 21 Chennai Student Yes Female
## 5 21 Mumbai Sabbatical Yes Female
## 6 57 Delhi Sabbatical Yes Male
## 7 22 Delhi Student No Female
## 8 35 Mumbai Working professional Yes Female
## 9 26 Ahmedabad Working professional No Male
## 10 21 Mumbai Student No Female
## # ℹ 189 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who have more than 100 posts on Instagram
more_than_100_posts <- subset(data, `How many posts do you have on Instagram?` > 100)
more_than_100_posts
## # A tibble: 444 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Vishakhapatnam Working professional Yes Female
## 2 25 Bengaluru Student No Male
## 3 25 Ahmedabad Self Employed Yes Male
## 4 50 Kolkata Working professional No Female
## 5 21 Chennai Student No Female
## 6 25 Kolkata Working professional No Male
## 7 45 Bagdogra Working professional Yes Female
## 8 38 Bengaluru Working professional No Female
## 9 24 Kolkata Student Yes Male
## 10 21 Chennai Student Yes Female
## # ℹ 434 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
require(ggplot2)
## Loading required package: ggplot2
# Create a linear regression model
model <- lm(`Total Social Media Usage` ~ Age + `How many followers do you have on Instagram?` + `Time Spent on Facebook in last week (in minutes)`, data = data)
# Create a data frame with the actual and predicted values
predictions <- data.frame(data$Age,data$`How many followers do you have on Instagram?`,data$`Time Spent on Facebook in last week (in minutes)`)
colnames(predictions)<-c("Age","How many followers do you have on Instagram?","Time Spent on Facebook in last week (in minutes)")
predictions<-predict(model,predictions)
predictions<-as.data.frame(predictions)
plot_predict<-cbind.data.frame(data$Age,predictions)
colnames(plot_predict)<-c("Age","Prediction")
require(ggplot2)
# Create a scatter plot of the actual vs predicted values
ggplot(plot_predict, aes(x = Age, y = Prediction)) +
geom_point() +
geom_smooth() +
labs(title = "Actual vs Predicted Total Social Media Usage", x = "Actual", y = "Predicted")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'

# Scatterplot on more than 1000 followers vs more than 100 posts
plot(data[data$`How many followers do you have on Instagram?` > 1000, ]$`How many followers do you have on Instagram?`, data[data$`How many followers do you have on Instagram?` > 1000, ]$`How many posts do you have on Instagram?`, col = "blue", xlab = "Number of followers", ylab = "Number of posts")
points(data[data$`How many followers do you have on Instagram?` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many followers do you have on Instagram?`, data[data$`How many followers do you have on Instagram?` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many posts do you have on Instagram?`, col = "red")
legend("topright", legend = c("More than 1000 followers", "More than 100 posts"), col = c("blue", "red"), pch = 1)

require(ggplot2)
# Create a pie chart for gender distribution
ggplot(data, aes(x = "", fill = Gender)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Gender Distribution", fill = "Gender")

# Create a histogram for age distribution
ggplot(data, aes(x = Age, fill = factor(Age))) +
geom_histogram(binwidth = 5, color = "black") +
scale_fill_viridis_d() +
labs(title = "Age Distribution", x = "Age", y = "Count") +
theme_minimal()

# Create a bar chart with education level
ggplot(data, aes(x = `Highest Education`, fill = Gender)) +
geom_bar(color = "black", size = 0.5, width = 0.7, position = position_dodge()) +
labs(title = "Education Level Distribution", x = "Education Level", y = "Count") +
theme_minimal() +
theme(legend.position = "top", legend.title = element_blank()) +
scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90")) +
guides(fill = guide_legend(reverse = TRUE)) +
geom_text(aes(label=after_stat(count)), stat='count', position=position_dodge(width=0.7), vjust=-0.5, size=3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Create a bar chart with phone operating system
ggplot(data, aes(x = `Phone OS`, fill = Gender)) +
geom_bar() +
labs(title = "Phone Operating System Distribution", x = "Phone Operating System", y = "Count")

library(ggplot2)
# Create a ggplot bar chart for Gender by Zone/Area with Total Social Media Usage
ggplot(data, aes(x = `Zone`, y = `Total Social Media Usage`, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Zone/Area", y = "Total Social Media Usage", title = "Gender by Zone/Area with Total Social Media Usage") +
scale_fill_brewer(palette = "Set3") + # Choose a colorful palette
theme_minimal()

# Create a pie chart for phone operating system distribution
ggplot(data, aes(x = "", fill = `Phone OS`)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Phone Operating System Distribution", fill = "Phone Operating System")

# Create a scatter plot for time spent on Facebook and Instagram
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`, y = `Time Spent on Instagram in last week (in minutes)`)) +
geom_point(color = "#FFC0CB") +
labs(title = "Time Spent on Facebook vs. Time Spent on Instagram", x = "Time Spent on Facebook (in minutes)", y = "Time Spent on Instagram (in minutes)")

# Create a histogram for social media usage distribution
ggplot(data, aes(x = `Total Social Media Usage`)) +
geom_histogram(binwidth = 500, fill = "#FFC0CB", color = "black") +
labs(title = "Social Media Usage Distribution", x = "Total Social Media Usage (in minutes)", y = "Count") +
theme_minimal() +
theme(plot.background = element_rect(fill = "#ADD8E6"),
axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 14, color = "black"),
plot.title = element_text(size = 16, color = "black"))

library(ggplot2)
# Create a ggplot bar chart
ggplot(data, aes(x=`Profession/Activity`, y=`Total Social Media Usage`, fill=`Profession/Activity`)) +
geom_bar(stat="identity") +
labs(x="Profession/Activity", y="Total Social Media Usage", title="Total Social Media Usage by Profession/Activity") +
theme_minimal() +
scale_fill_brewer(palette="Set3") # Choose a colorful palette

# Create a histogram for time spent on WhatsApp distribution
ggplot(data, aes(x = `Time Spent on WhatsApp in last week (in minutes)`, fill = Gender)) +
geom_histogram(binwidth = 100) +
labs(title = "Time Spent on WhatsApp Distribution", x = "Time Spent on WhatsApp (in minutes)", y = "Count")

# Create a bar chart for education level and social media usage
ggplot(data, aes(x = `Highest Education`, y = `Total Social Media Usage`, fill = `Highest Education`)) +
geom_bar(stat = "summary", fun = "mean") +
labs(title = "Education Level vs. Social Media Usage", x = "Education Level", y = "Total Social Media Usage (in minutes)") +
scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90", "#FFD700")) +
theme_minimal()

# Distribution of the number of posts on Instagram
ggplot(data, aes(x = `How many posts do you have on Instagram?`)) +
geom_histogram(binwidth = 50, fill = "#ADD8E6") +
labs(title = "Number of Posts on Instagram Distribution", x = "Number of Posts on Instagram", y = "Count") +
theme_minimal()

# Relationship between age and the number of posts on Instagram
ggplot(data, aes(x = Age, y = `How many posts do you have on Instagram?`, color = Age)) +
geom_point() +
labs(title = "Age vs Number of Posts on Instagram", x = "Age", y = "Number of Posts on Instagram")

# Distribution of the time spent on Facebook
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`)) +
geom_histogram(binwidth = 100, fill = "pink") +
labs(title = "Time Spent on Facebook Distribution", x = "Time Spent on Facebook (in minutes)", y = "Count") +
theme_minimal()

# Relationship between age and the time spent on Facebook
ggplot(data, aes(x = Age, y = `Time Spent on Facebook in last week (in minutes)`, color = Age)) +
geom_point() +
labs(title = "Age vs Time Spent on Facebook", x = "Age", y = "Time Spent on Facebook (in minutes)")

# Relationship between age and the time spent on Instagram
ggplot(data, aes(x = Age, y = `Time Spent on Instagram in last week (in minutes)`, color = Age)) +
geom_point() +
labs(title = "Age vs Time Spent on Instagram", x = "Age", y = "Time Spent on Instagram (in minutes)")

# Distribution of the total social media usage
ggplot(data, aes(x = `Total Social Media Usage`, fill = Gender)) +
geom_histogram(binwidth = 500) +
labs(title = "Total Social Media Usage Distribution", x = "Total Social Media Usage", y = "Count") +
scale_fill_manual(values = c("#ADD8E6", "#FFC0CB", "#FF0000")) + # Add a third color value
theme_minimal() +
theme(legend.position = "bottom")

library(ggplot2)
library(maps)
## Warning: package 'maps' was built under R version 4.3.2
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create a ggplot object with your dataset
p <- ggplot(data, aes(x = Longitude, y = Latitude, color = State, fill = State)) +
geom_point(size = 3) + # Add points to represent data with size
coord_fixed(ratio = 1) + # Aspect ratio
theme_minimal() # Plain background theme
# Add India map outline
india_map <- map_data("world", region = "India")
p <- p + geom_polygon(data = india_map, aes(x = long, y = lat, group = group), fill = NA, color = "black")
print(p)

# Relationship between age and the total social media usage
library(ggplot2)
# Create a scatter plot with color and a specific theme
ggplot(data, aes(x = Age, y = `Total Social Media Usage`, color = Age)) +
geom_point() +
labs(title = "Age vs Total Social Media Usage", x = "Age", y = "Total Social Media Usage")

# Relationship between the number of followers on Instagram and the number of posts on Instagram
ggplot(data, aes(x = `How many followers do you have on Instagram?`, y = `How many posts do you have on Instagram?`)) +
geom_point() +
labs(title = "Number of Followers vs Number of Posts on Instagram", x = "Number of Followers on Instagram", y = "Number of Posts on Instagram")

# Pie chart for current status distribution
ggplot(data, aes(x = "", fill = `Current Status`)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Current Status Distribution", fill = "Current Status")

#.******************************************************************************************************************************************
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load the ggplot2 package
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ stringr 1.5.0
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::map() masks maps::map()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Sample data
data <- data <- data %>% select(Age,Gender,`Total WhatsApp Usage`,`Total Social Media Usage`,`Total Facebook Usage`,`Total Instagram Usage`,`Total YouTube Usage (minutes)`)
# Create a barplot for Age vs. Total Social Media Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total Facebook Usage`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Total Facebook Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "pink")) +
theme_minimal()

# Create a barplot for Age vs. Total Instagram Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total Instagram Usage`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Total Instagram Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal()

# Create a barplot for Age vs. Total WhatsApp Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total WhatsApp Usage`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Total WhatsApp Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "maroon", "Female" = "pink")) +
theme_minimal()

# Create a barplot for Age vs. Total YouTube Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total YouTube Usage (minutes)`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Total YouTube Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "maroon", "Female" = "skyblue")) +
theme_minimal()

# Combine all the data into a long format
data_long <- data %>%
pivot_longer(cols = c(`Total Facebook Usage`, `Total Instagram Usage`, `Total WhatsApp Usage`, `Total YouTube Usage (minutes)`),
names_to = "Platform", values_to = "Total Usage")
# Create a single plot with facets
ggplot(data_long, aes(x = Age, y = `Total Usage`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Social Media Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal() +
facet_wrap(~ Platform, scales = "free_y")

library(readr)
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load required library
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Create a data frame with the given data
data <- data[, c("Age", "Income from YouTube (rs,month)")]
# Perform k-means clustering with 2 clusters
set.seed(123) # For reproducibility
k <- 2
kmeans_result <- kmeans(data, centers = k)
# Add the cluster assignments to the original data
data$Cluster <- as.factor(kmeans_result$cluster)
# Print the cluster assignments
print(data)
## # A tibble: 1,628 × 3
## Age `Income from YouTube (rs,month)` Cluster
## <dbl> <dbl> <fct>
## 1 24 88447 2
## 2 39 64764 2
## 3 22 4387 1
## 4 26 99695 2
## 5 50 81297 2
## 6 25 51770 2
## 7 52 38003 1
## 8 45 54216 2
## 9 25 48149 1
## 10 27 12657 1
## # ℹ 1,618 more rows
# Elbow Method to determine the optimal number of clusters
wcss <- vector("numeric", length = 10) # Initialize a vector to store Within-Cluster Sum of Squares (WCSS)
# Calculate WCSS for different numbers of clusters (k)
for (i in 1:10) {
kmeans_temp <- kmeans(data[, c("Age", "Income from YouTube (rs,month)")], centers = i)
wcss[i] <- kmeans_temp$tot.withinss # Store the WCSS for the current k
}
# Plot the Elbow Method graph
plot(1:10, wcss, type = "b", xlab = "K (Number of Clusters)", ylab = "WCSS (Within-Cluster Sum of Squares)") # Set x and y axis labels
abline(v = k, col = "red", lty = 2) # Highlight the chosen k

# Visualization of the clustering with centroids
ggplot(data, aes(x = Age, y = `Income from YouTube (rs,month)`, color = Cluster)) +
geom_point() +
geom_point(data = as.data.frame(kmeans_result$centers), aes(x = Age, y = `Income from YouTube (rs,month)`), color = "darkred", size = 3, shape = 4) +
labs(title = "K-Means Clustering (k = 2) with Centroids", x = "Age", y = "Income from YouTube") +
scale_color_manual(values = c("blue", "green"))

library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Social Media Usage' variables
data <- data %>% select(Age, `Total Facebook Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create a linear regression model
model <- lm(`Total Facebook Usage` ~ Age, data = data)
# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Facebook Usage`, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color = "blue") +
labs(title = "Age vs. Total Facebook Usage", x = "Age", y = "Total Facebook Usage") +
scale_color_manual(values = c("Younger" = "green", "Older" = "purple")) +
geom_vline(xintercept = 35, linetype = "dashed") # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Instagram Usage' variables
data <- data %>% select(Age, `Total Instagram Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create a linear regression model
model <- lm(`Total Instagram Usage` ~ Age, data = data)
# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Instagram Usage`, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color = "blue") +
labs(title = "Age vs. Total Instagram Usage", x = "Age", y = "Total Instagram Usage") +
scale_color_manual(values = c("Younger" = "maroon", "Older" = "blue")) +
geom_vline(xintercept = 35, linetype = "dashed") # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total WhatsApp Usage' variables
data <- data %>% select(Age, `Total WhatsApp Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create a linear regression model
model <- lm(`Total WhatsApp Usage` ~ Age, data = data)
# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total WhatsApp Usage`, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color="blue") +
labs(title = "Age vs. Total WhatsApp Usage", x = "Age", y = "Total WhatsApp Usage") +
scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
geom_vline(xintercept = 35, linetype = "dashed") # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Social Media Usage' variables
data <- data %>% select(Age, `Total Social Media Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create a linear regression model
model <- lm(`Total Social Media Usage` ~ Age, data = data)
# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Social Media Usage`, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color = "blue") +
labs(title = "Age vs. Total Social Media Usage", x = "Age", y = "Total Social Media Usage") +
scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
geom_vline(xintercept = 35, linetype = "dashed") # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and social media usage variables
data <- data %>% select(Age, `Total Facebook Usage`, `Total Social Media Usage`, `Total Instagram Usage`, `Total WhatsApp Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create linear regression models for each platform and predict the next 5-year values
platforms <- c("Facebook", "Social Media", "Instagram", "WhatsApp")
for (platform in platforms) {
model <- lm(paste0("`Total ", platform, " Usage` ~ Age"), data = data)
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
cat("Predicted values for Total", platform, "Usage:", prediction, "\n")
}
## Predicted values for Total Facebook Usage: 284.0396 292.0368 300.0339 308.031 316.0281
## Predicted values for Total Social Media Usage: 2002.076 1973.391 1944.706 1916.022 1887.337
## Predicted values for Total Instagram Usage: 615.9642 590.8018 565.6394 540.477 515.3145
## Predicted values for Total WhatsApp Usage: 1102.072 1090.553 1079.033 1067.514 1055.994
# Melt the data for easier plotting
data_long <- data %>%
pivot_longer(-Age, names_to = "Platform", values_to = "Usage")
# Add a 'Group' column to distinguish older and younger based on age threshold
data_long <- data_long %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a single screen with all four scatter plots and linear regression models
ggplot(data_long, aes(x = Age, y = Usage, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color="blue") +
labs(title = "Age vs. Social Media Usage by Platform", x = "Age", y = "Usage") +
scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
geom_vline(xintercept = 35, linetype = "dashed") + # Add a vertical dashed line at age 35
facet_wrap(~Platform, scales = "free_y")
## `geom_smooth()` using formula = 'y ~ x'

#thank you mam
ggplot() +
geom_text(aes(x = 0.5, y = 0.5, label = "Thank You Mam", size = 10)) +
theme_void()
