Adding Library and Summary and Analysis of Social Media such as
Mean, Max, Sum
library(readr)
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)
colnames(data)
## [1] "Age"
## [2] "City"
## [3] "Current Status"
## [4] "Do you own multiple profiles on Instagram?"
## [5] "Gender"
## [6] "Highest Education"
## [7] "Location (City Airport Code)"
## [8] "Phone OS"
## [9] "State"
## [10] "Zone"
## [11] "How many followers do you have on Instagram?"
## [12] "How many posts do you have on Instagram?"
## [13] "Latitude"
## [14] "Longitude"
## [15] "Time Spent on Facebook in last week (in minutes)"
## [16] "Time Spent on Facebook in last weekend (in minutes)"
## [17] "Time Spent on Instagram in last week (in minutes)"
## [18] "Time Spent on Instagram in last weekend (in minutes)"
## [19] "Time Spent on WhatsApp in last week (in minutes)"
## [20] "Time Spent on WhatsApp in last weekend (in minutes)"
## [21] "Total Facebook Usage"
## [22] "Total Instagram Usage"
## [23] "Total Social Media Usage"
## [24] "Total Week Usage"
## [25] "Total Weekend Usage"
## [26] "Total WhatsApp Usage"
## [27] "How many subscriber do you have on youtube"
## [28] "Income from YouTube (rs,month)"
## [29] "Internet Speed (Mbps)"
## [30] "Total YouTube Usage (minutes)"
## [31] "Weekly YouTube Usage (minutes)"
## [32] "Profession/Activity"
## [33] "Likes on Instagram"
## [34] "Shares on Instagram"
## [35] "Hobby"
## [36] "Verified Account on instagram"
## [37] "Date of Birth"
## [38] "Time Spent on Twitter per Week (minutes)"
## [39] "Total Time Spent on Twitter (minutes)"
## [40] "Groups Joined on Instagram"
str(data)
## spc_tbl_ [1,628 × 40] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Age : num [1:1628] 24 39 22 26 50 25 52 45 25 27 ...
## $ City : chr [1:1628] "Delhi" "Delhi" "Mumbai" "Bengaluru" ...
## $ Current Status : chr [1:1628] "Working professional" "Working professional" "Working professional" "Sabbatical" ...
## $ Do you own multiple profiles on Instagram? : chr [1:1628] "No" "No" "No" "Yes" ...
## $ Gender : chr [1:1628] "Female" "Female" "Male" "Female" ...
## $ Highest Education : chr [1:1628] "Graduation" "Post graduation" "Graduation" "Graduation" ...
## $ Location (City Airport Code) : chr [1:1628] "DEL" "DEL" "BOM" "BLR" ...
## $ Phone OS : chr [1:1628] "iOs" "iOs" "Android" "Android" ...
## $ State : chr [1:1628] "Delhi" "Delhi" "Maharashtra" "Karnataka" ...
## $ Zone : chr [1:1628] "Northern" "Northern" "Western" "Southern" ...
## $ How many followers do you have on Instagram? : num [1:1628] 456 0 400 485 0 ...
## $ How many posts do you have on Instagram? : num [1:1628] 20 0 6 16 0 220 0 0 340 37 ...
## $ Latitude : num [1:1628] 28.7 28.7 19 13 28.7 ...
## $ Longitude : num [1:1628] 77.2 77.2 72.8 77.6 77.2 ...
## $ Time Spent on Facebook in last week (in minutes) : num [1:1628] 0 6000 500 1500 1500 1000 300 983 1160 480 ...
## $ Time Spent on Facebook in last weekend (in minutes) : num [1:1628] 0 2160 2000 1500 1500 1200 900 873 870 840 ...
## $ Time Spent on Instagram in last week (in minutes) : num [1:1628] 770 0 1000 2000 0 3000 0 0 1240 720 ...
## $ Time Spent on Instagram in last weekend (in minutes): num [1:1628] 400 0 1000 2000 0 840 215 0 340 300 ...
## $ Time Spent on WhatsApp in last week (in minutes) : num [1:1628] 900 5000 7000 1680 2400 2100 1800 583 1760 3000 ...
## $ Time Spent on WhatsApp in last weekend (in minutes) : num [1:1628] 120 2000 2000 1680 1300 600 1500 834 450 600 ...
## $ Total Facebook Usage : num [1:1628] 0 8160 2500 3000 3000 ...
## $ Total Instagram Usage : num [1:1628] 1170 0 2000 4000 0 3840 215 0 1580 1020 ...
## $ Total Social Media Usage : num [1:1628] 2190 15160 13500 10360 6700 ...
## $ Total Week Usage : num [1:1628] 1670 11000 8500 5180 3900 ...
## $ Total Weekend Usage : num [1:1628] 520 4160 5000 5180 2800 ...
## $ Total WhatsApp Usage : num [1:1628] 1020 7000 9000 3360 3700 ...
## $ How many subscriber do you have on youtube : num [1:1628] 33356 25394 34603 13645 49876 ...
## $ Income from YouTube (rs,month) : num [1:1628] 88447 64764 4387 99695 81297 ...
## $ Internet Speed (Mbps) : num [1:1628] 46.6 83.5 50.5 99.6 22.1 ...
## $ Total YouTube Usage (minutes) : num [1:1628] 272543 220056 2629 154271 178485 ...
## $ Weekly YouTube Usage (minutes) : num [1:1628] 305 2090 528 1545 2836 ...
## $ Profession/Activity : chr [1:1628] "Photographer" "Singer" "Content Creator" "Blogger" ...
## $ Likes on Instagram : chr [1:1628] "679k" "400k" "447k" "443k" ...
## $ Shares on Instagram : chr [1:1628] "16K" "28K" "86K" "39K" ...
## $ Hobby : chr [1:1628] "Reading" "Reading" "Playing Musical Instruments" "Cooking/Baking" ...
## $ Verified Account on instagram : chr [1:1628] "Yes" "Yes" "No" "No" ...
## $ Date of Birth : chr [1:1628] "8/31/1982" "12/17/1992" "4/16/2002" "12/10/1965" ...
## $ Time Spent on Twitter per Week (minutes) : num [1:1628] 356 1774 1848 199 1210 ...
## $ Total Time Spent on Twitter (minutes) : num [1:1628] 2492 12418 12936 1393 8470 ...
## $ Groups Joined on Instagram : num [1:1628] 4 5 2 4 3 9 2 3 2 6 ...
## - attr(*, "spec")=
## .. cols(
## .. Age = col_double(),
## .. City = col_character(),
## .. `Current Status` = col_character(),
## .. `Do you own multiple profiles on Instagram?` = col_character(),
## .. Gender = col_character(),
## .. `Highest Education` = col_character(),
## .. `Location (City Airport Code)` = col_character(),
## .. `Phone OS` = col_character(),
## .. State = col_character(),
## .. Zone = col_character(),
## .. `How many followers do you have on Instagram?` = col_number(),
## .. `How many posts do you have on Instagram?` = col_number(),
## .. Latitude = col_double(),
## .. Longitude = col_double(),
## .. `Time Spent on Facebook in last week (in minutes)` = col_number(),
## .. `Time Spent on Facebook in last weekend (in minutes)` = col_number(),
## .. `Time Spent on Instagram in last week (in minutes)` = col_number(),
## .. `Time Spent on Instagram in last weekend (in minutes)` = col_number(),
## .. `Time Spent on WhatsApp in last week (in minutes)` = col_number(),
## .. `Time Spent on WhatsApp in last weekend (in minutes)` = col_number(),
## .. `Total Facebook Usage` = col_number(),
## .. `Total Instagram Usage` = col_number(),
## .. `Total Social Media Usage` = col_number(),
## .. `Total Week Usage` = col_number(),
## .. `Total Weekend Usage` = col_number(),
## .. `Total WhatsApp Usage` = col_number(),
## .. `How many subscriber do you have on youtube` = col_double(),
## .. `Income from YouTube (rs,month)` = col_double(),
## .. `Internet Speed (Mbps)` = col_double(),
## .. `Total YouTube Usage (minutes)` = col_double(),
## .. `Weekly YouTube Usage (minutes)` = col_double(),
## .. `Profession/Activity` = col_character(),
## .. `Likes on Instagram` = col_character(),
## .. `Shares on Instagram` = col_character(),
## .. Hobby = col_character(),
## .. `Verified Account on instagram` = col_character(),
## .. `Date of Birth` = col_character(),
## .. `Time Spent on Twitter per Week (minutes)` = col_double(),
## .. `Total Time Spent on Twitter (minutes)` = col_double(),
## .. `Groups Joined on Instagram` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(data)
## Age City Current Status
## Min. :13.00 Length:1628 Length:1628
## 1st Qu.:22.00 Class :character Class :character
## Median :24.00 Mode :character Mode :character
## Mean :26.86
## 3rd Qu.:27.00
## Max. :74.00
## Do you own multiple profiles on Instagram? Gender
## Length:1628 Length:1628
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## Highest Education Location (City Airport Code) Phone OS
## Length:1628 Length:1628 Length:1628
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## State Zone
## Length:1628 Length:1628
## Class :character Class :character
## Mode :character Mode :character
##
##
##
## How many followers do you have on Instagram?
## Min. : 0.0
## 1st Qu.: 183.0
## Median : 370.0
## Mean : 868.1
## 3rd Qu.: 657.0
## Max. :116000.0
## How many posts do you have on Instagram? Latitude Longitude
## Min. : 0.00 Min. : 8.486 Min. :69.67
## 1st Qu.: 10.00 1st Qu.:18.988 1st Qu.:72.84
## Median : 43.50 Median :22.563 Median :77.23
## Mean : 99.08 Mean :22.760 Mean :77.89
## 3rd Qu.: 111.25 3rd Qu.:28.652 3rd Qu.:78.46
## Max. :2858.00 Max. :32.736 Max. :94.91
## Time Spent on Facebook in last week (in minutes)
## Min. : 0.0
## 1st Qu.: 2.0
## Median : 63.0
## Mean : 175.2
## 3rd Qu.: 240.0
## Max. :6000.0
## Time Spent on Facebook in last weekend (in minutes)
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 30.00
## Mean : 75.69
## 3rd Qu.: 89.00
## Max. :2160.00
## Time Spent on Instagram in last week (in minutes)
## Min. : 0.0
## 1st Qu.: 120.0
## Median : 357.0
## Mean : 505.2
## 3rd Qu.: 675.0
## Max. :6000.0
## Time Spent on Instagram in last weekend (in minutes)
## Min. : 0.0
## 1st Qu.: 48.0
## Median : 135.0
## Mean : 215.0
## 3rd Qu.: 281.5
## Max. :2560.0
## Time Spent on WhatsApp in last week (in minutes)
## Min. : 4.0
## 1st Qu.: 300.0
## Median : 600.0
## Mean : 854.9
## 3rd Qu.:1009.0
## Max. :7000.0
## Time Spent on WhatsApp in last weekend (in minutes) Total Facebook Usage
## Min. : 0.0 Min. : 0.0
## 1st Qu.: 100.0 1st Qu.: 10.0
## Median : 200.0 Median : 101.5
## Mean : 294.9 Mean : 250.9
## 3rd Qu.: 360.0 3rd Qu.: 334.2
## Max. :2800.0 Max. :8160.0
## Total Instagram Usage Total Social Media Usage Total Week Usage
## Min. : 0.0 Min. : 12 Min. : 8
## 1st Qu.: 190.8 1st Qu.: 970 1st Qu.: 670
## Median : 522.5 Median : 1658 Median : 1170
## Mean : 720.2 Mean : 2121 Mean : 1535
## 3rd Qu.: 970.0 3rd Qu.: 2670 3rd Qu.: 1895
## Max. :8240.0 Max. :15780 Max. :12734
## Total Weekend Usage Total WhatsApp Usage
## Min. : 0.0 Min. : 9
## 1st Qu.: 243.0 1st Qu.: 450
## Median : 425.5 Median : 812
## Mean : 585.6 Mean :1150
## 3rd Qu.: 709.0 3rd Qu.:1400
## Max. :5180.0 Max. :9000
## How many subscriber do you have on youtube Income from YouTube (rs,month)
## Min. : 33 Min. : 11
## 1st Qu.:12783 1st Qu.:23870
## Median :24629 Median :47898
## Mean :24795 Mean :49166
## 3rd Qu.:36702 3rd Qu.:74162
## Max. :49939 Max. :99991
## Internet Speed (Mbps) Total YouTube Usage (minutes)
## Min. : 2.03 Min. : 224
## 1st Qu.:24.82 1st Qu.: 78476
## Median :50.47 Median :153087
## Mean :50.34 Mean :150846
## 3rd Qu.:75.83 3rd Qu.:225062
## Max. :99.93 Max. :299562
## Weekly YouTube Usage (minutes) Profession/Activity Likes on Instagram
## Min. : 2 Length:1628 Length:1628
## 1st Qu.: 782 Class :character Class :character
## Median :1456 Mode :character Mode :character
## Mean :1480
## 3rd Qu.:2226
## Max. :2998
## Shares on Instagram Hobby Verified Account on instagram
## Length:1628 Length:1628 Length:1628
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## Date of Birth Time Spent on Twitter per Week (minutes)
## Length:1628 Min. : 0
## Class :character 1st Qu.: 561
## Mode :character Median :1168
## Mean :1179
## 3rd Qu.:1790
## Max. :2399
## Total Time Spent on Twitter (minutes) Groups Joined on Instagram
## Min. : 0 Min. : 0.00
## 1st Qu.: 3927 1st Qu.: 2.00
## Median : 8180 Median : 5.00
## Mean : 8255 Mean : 4.96
## 3rd Qu.:12532 3rd Qu.: 8.00
## Max. :16793 Max. :10.00
head(data)
## # A tibble: 6 × 40
## Age City `Current Status` Do you own multiple …¹ Gender `Highest Education`
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working profess… No Female Graduation
## 2 39 Delhi Working profess… No Female Post graduation
## 3 22 Mumb… Working profess… No Male Graduation
## 4 26 Beng… Sabbatical Yes Female Graduation
## 5 50 Delhi Working profess… No Male Graduation
## 6 25 Vish… Working profess… Yes Female Post graduation
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 34 more variables: `Location (City Airport Code)` <chr>, `Phone OS` <chr>,
## # State <chr>, Zone <chr>,
## # `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, `Time Spent on Facebook in last week (in minutes)` <dbl>,
## # `Time Spent on Facebook in last weekend (in minutes)` <dbl>, …
# Create a matrix with the first 3 rows and 3 columns of the data
matrix_data <- matrix(data[1:3, 1:3], nrow = 3, ncol = 3, byrow = TRUE)
# Print the matrix
matrix_data
## [,1] [,2] [,3]
## [1,] numeric,3 character,3 character,3
## [2,] numeric,3 character,3 character,3
## [3,] numeric,3 character,3 character,3
#Average age
mean(data$Age)
## [1] 26.85811
# Median age
median(data$Age)
## [1] 24
# Maximum age
max(data$Age)
## [1] 74
# Minimum age
min(data$Age)
## [1] 13
# Total number of males
sum(data$Gender == "Male")
## [1] 813
# Total number of females
sum(data$Gender == "Female")
## [1] 813
# Average number of followers on Instagram
mean(data$`How many followers do you have on Instagram?`)
## [1] 868.1474
# Median number of followers on Instagram
median(data$`How many followers do you have on Instagram?`)
## [1] 370
# Maximum number of followers on Instagram
max(data$`How many followers do you have on Instagram?`)
## [1] 116000
# Minimum number of followers on Instagram
min(data$`How many followers do you have on Instagram?`)
## [1] 0
# Average number of posts on Instagram
mean(data$`How many posts do you have on Instagram?`)
## [1] 99.07985
# Median number of posts on Instagram
median(data$`How many posts do you have on Instagram?`)
## [1] 43.5
# Maximum number of posts on Instagram
max(data$`How many posts do you have on Instagram?`)
## [1] 2858
# Minimum number of posts on Instagram
min(data$`How many posts do you have on Instagram?`)
## [1] 0
# Total time spent on Facebook in last week
sum(data$`Time Spent on Facebook in last week (in minutes)`)
## [1] 285275
# Total time spent on Instagram in last week
sum(data$`Time Spent on Instagram in last week (in minutes)`)
## [1] 822407
# Total time spent on WhatsApp in last week
sum(data$`Time Spent on WhatsApp in last week (in minutes)`)
## [1] 1391726
# Average total social media usage
mean(data$`Total Social Media Usage`)
## [1] 2120.885
# Median total social media usage
median(data$`Total Social Media Usage`)
## [1] 1658.5
# Maximum total social media usage
max(data$`Total Social Media Usage`)
## [1] 15780
Question : What are the different ways in which the data has been
filtered based on various conditions such as gender, education,
location, phone OS, and social media usage?”
# Only females
females <- subset(data, Gender == "Female")
females
## # A tibble: 813 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 26 Bengaluru Sabbatical Yes Female
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 45 Durgapur Sabbatical No Female
## 6 45 Delhi Working professional No Female
## 7 21 Delhi Working professional No Female
## 8 26 Delhi Working professional No Female
## 9 25 Mumbai Sabbatical No Female
## 10 22 Kolkata Student No Female
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only males
males <- subset(data, Gender == "Male")
males
## # A tibble: 813 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 50 Delhi Working professional No Male
## 3 52 Jaipur Working professional No Male
## 4 25 Bengaluru Student No Male
## 5 27 Delhi Student Yes Male
## 6 27 Bengaluru Working professional No Male
## 7 22 Delhi Sabbatical Yes Male
## 8 26 Agra Working professional No Male
## 9 25 Ahmedabad Student No Male
## 10 18 Jaipur Student No Male
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only students
students <- subset(data, `Current Status` == "Student")
students
## # A tibble: 637 × 40
## Age City `Current Status` Do you own multiple profiles on Ins…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Bengaluru Student No Male
## 2 27 Delhi Student Yes Male
## 3 25 Ahmedabad Student No Male
## 4 18 Jaipur Student No Male
## 5 22 Kolkata Student No Female
## 6 26 Kolkata Student No Female
## 7 23 Delhi Student No Female
## 8 17 Mumbai Student Yes Male
## 9 23 Kolkata Student Yes Female
## 10 22 Ahmedabad Student No Male
## # ℹ 627 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only working professionals
working_professionals <- subset(data, `Current Status` == "Working professional")
working_professionals
## # A tibble: 796 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 50 Delhi Working professional No Male
## 5 25 Vishakhapatnam Working professional Yes Female
## 6 52 Jaipur Working professional No Male
## 7 27 Bengaluru Working professional No Male
## 8 45 Delhi Working professional No Female
## 9 21 Delhi Working professional No Female
## 10 26 Agra Working professional No Male
## # ℹ 786 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people from Northern zone
northern <- subset(data, Zone == "Northern")
northern
## # A tibble: 542 × 40
## Age City `Current Status` Do you own multiple profiles on In…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 50 Delhi Working professional No Male
## 4 52 Jaipur Working professional No Male
## 5 27 Delhi Student Yes Male
## 6 45 Delhi Working professional No Female
## 7 22 Delhi Sabbatical Yes Male
## 8 21 Delhi Working professional No Female
## 9 26 Agra Working professional No Male
## 10 26 Delhi Working professional No Female
## # ℹ 532 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people from Southern zone
southern <- subset(data, Zone == "Southern")
southern
## # A tibble: 211 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 26 Bengaluru Sabbatical Yes Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 25 Bengaluru Student No Male
## 4 27 Bengaluru Working professional No Male
## 5 32 Bengaluru Working professional No Male
## 6 27 Chennai Working professional No Male
## 7 23 Chennai Student No Female
## 8 22 Chennai Student Yes Female
## 9 32 Bengaluru Working professional No Female
## 10 21 Chennai Student No Female
## # ℹ 201 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people from Eastern zone
eastern <- subset(data, Zone == "Eastern")
eastern
## # A tibble: 271 × 40
## Age City `Current Status` Do you own multiple profiles …¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 45 Durgapur Sabbatical No Female
## 2 24 Cooch-behar Working professional No Male
## 3 22 Kolkata Student No Female
## 4 26 Kolkata Student No Female
## 5 50 Kolkata Working professional No Female
## 6 23 Kolkata Student Yes Female
## 7 25 Kolkata Working professional No Male
## 8 45 Bagdogra Working professional Yes Female
## 9 25 Kolkata Student No Male
## 10 45 Kolkata Working professional No Female
## # ℹ 261 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people from Western zone
western <- subset(data, Zone == "Western")
western
## # A tibble: 543 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 25 Ahmedabad Student No Male
## 3 25 Mumbai Sabbatical No Female
## 4 25 Ahmedabad Self Employed Yes Male
## 5 17 Mumbai Student Yes Male
## 6 22 Ahmedabad Student No Male
## 7 24 Pune Working professional No Female
## 8 21 Mumbai Sabbatical Yes Female
## 9 51 Ahmedabad Working professional No Male
## 10 22 Mumbai Sabbatical No Male
## # ℹ 533 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with Android phone OS
android <- subset(data, `Phone OS` == "Android")
android
## # A tibble: 1,115 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 26 Bengaluru Sabbatical Yes Female
## 3 25 Vishakhapatnam Working professional Yes Female
## 4 52 Jaipur Working professional No Male
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Delhi Student Yes Male
## 8 27 Bengaluru Working professional No Male
## 9 21 Delhi Working professional No Female
## 10 26 Agra Working professional No Male
## # ℹ 1,105 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
# Only people with iOs phone OS
ios <- subset(data, `Phone OS` == "iOs")
ios
## # A tibble: 508 × 40
## Age City `Current Status` Do you own multiple profiles on …¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 50 Delhi Working professional No Male
## 4 45 Delhi Working professional No Female
## 5 22 Delhi Sabbatical Yes Male
## 6 18 Jaipur Student No Male
## 7 22 Chennai Student Yes Female
## 8 23 Guwahati Student No Female
## 9 45 Bagdogra Working professional Yes Female
## 10 28 Kolkata Sabbatical No Female
## # ℹ 498 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with Graduation education
graduation <- subset(data, `Highest Education` == "Graduation")
graduation
## # A tibble: 950 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 22 Mumbai Working professional No Male
## 3 26 Bengaluru Sabbatical Yes Female
## 4 50 Delhi Working professional No Male
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Delhi Student Yes Male
## 8 27 Bengaluru Working professional No Male
## 9 45 Delhi Working professional No Female
## 10 21 Delhi Working professional No Female
## # ℹ 940 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with Post graduation education
post_graduation <- subset(data, `Highest Education` == "Post graduation")
post_graduation
## # A tibble: 541 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 39 Delhi Working professional No Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 52 Jaipur Working professional No Male
## 4 22 Delhi Sabbatical Yes Male
## 5 26 Delhi Working professional No Female
## 6 25 Mumbai Sabbatical No Female
## 7 22 Kolkata Student No Female
## 8 26 Kolkata Student No Female
## 9 27 Chennai Working professional No Male
## 10 32 Bengaluru Working professional No Female
## # ℹ 531 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with High School education
high_school <- subset(data, `Highest Education` == "High School")
high_school
## # A tibble: 137 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 18 Jaipur Student No Male
## 2 35 Delhi Working professional Yes Female
## 3 23 Delhi Student No Female
## 4 50 Kolkata Working professional No Female
## 5 17 Mumbai Student Yes Male
## 6 16 Kolkata Student No Male
## 7 16 Jaipur Student No Female
## 8 15 Chennai Student Yes Female
## 9 20 Hyderabad Student Yes Male
## 10 16 Mumbai Student No Male
## # ℹ 127 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people with multiple profiles on Instagram
multiple_profiles <- subset(data, `Do you own multiple profiles on Instagram?` == "Yes")
multiple_profiles
## # A tibble: 308 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 26 Bengaluru Sabbatical Yes Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 27 Delhi Student Yes Male
## 4 22 Delhi Sabbatical Yes Male
## 5 25 Ahmedabad Self Employed Yes Male
## 6 35 Delhi Working professional Yes Female
## 7 17 Mumbai Student Yes Male
## 8 23 Kolkata Student Yes Female
## 9 26 Kanpur Working professional Yes Male
## 10 22 Chennai Student Yes Female
## # ℹ 298 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who own a single profile on Instagram
single_profile <- subset(data, `Do you own multiple profiles on Instagram?` == "No")
single_profile
## # A tibble: 1,316 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 50 Delhi Working professional No Male
## 5 52 Jaipur Working professional No Male
## 6 45 Durgapur Sabbatical No Female
## 7 25 Bengaluru Student No Male
## 8 27 Bengaluru Working professional No Male
## 9 45 Delhi Working professional No Female
## 10 21 Delhi Working professional No Female
## # ℹ 1,306 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who spent more than 500 minutes on Facebook in last week
more_than_500_fb <- subset(data, `Time Spent on Facebook in last week (in minutes)` > 500)
more_than_500_fb
## # A tibble: 130 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 39 Delhi Working professional No Female
## 2 26 Bengaluru Sabbatical Yes Female
## 3 50 Delhi Working professional No Male
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Bengaluru Working professional No Male
## 8 22 Delhi Sabbatical Yes Male
## 9 18 Jaipur Student No Male
## 10 25 Mumbai Sabbatical No Female
## # ℹ 120 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who spent more than 500 minutes on Instagram in last week
more_than_500_insta <- subset(data, `Time Spent on Instagram in last week (in minutes)` > 500)
more_than_500_insta
## # A tibble: 559 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 22 Mumbai Working professional No Male
## 3 26 Bengaluru Sabbatical Yes Female
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 25 Bengaluru Student No Male
## 6 27 Delhi Student Yes Male
## 7 45 Delhi Working professional No Female
## 8 22 Delhi Sabbatical Yes Male
## 9 18 Jaipur Student No Male
## 10 25 Mumbai Sabbatical No Female
## # ℹ 549 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who spent more than 500 minutes on WhatsApp in last week
more_than_500_whatsapp <- subset(data, `Time Spent on WhatsApp in last week (in minutes)` > 500)
more_than_500_whatsapp
## # A tibble: 913 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 26 Bengaluru Sabbatical Yes Female
## 5 50 Delhi Working professional No Male
## 6 25 Vishakhapatnam Working professional Yes Female
## 7 52 Jaipur Working professional No Male
## 8 45 Durgapur Sabbatical No Female
## 9 25 Bengaluru Student No Male
## 10 27 Delhi Student Yes Male
## # ℹ 903 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who have more than 1000 followers on Instagram
more_than_1000_followers <- subset(data,`How many followers do you have on Instagram?` > 1000)
more_than_1000_followers
## # A tibble: 199 × 40
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Bengaluru Student No Male
## 2 23 Kolkata Student Yes Female
## 3 21 Chennai Student No Female
## 4 21 Chennai Student Yes Female
## 5 21 Mumbai Sabbatical Yes Female
## 6 57 Delhi Sabbatical Yes Male
## 7 22 Delhi Student No Female
## 8 35 Mumbai Working professional Yes Female
## 9 26 Ahmedabad Working professional No Male
## 10 21 Mumbai Student No Female
## # ℹ 189 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
#Only people who have more than 100 posts on Instagram
more_than_100_posts <- subset(data, `How many posts do you have on Instagram?` > 100)
more_than_100_posts
## # A tibble: 444 × 40
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Vishakhapatnam Working professional Yes Female
## 2 25 Bengaluru Student No Male
## 3 25 Ahmedabad Self Employed Yes Male
## 4 50 Kolkata Working professional No Female
## 5 21 Chennai Student No Female
## 6 25 Kolkata Working professional No Male
## 7 45 Bagdogra Working professional Yes Female
## 8 38 Bengaluru Working professional No Female
## 9 24 Kolkata Student Yes Male
## 10 21 Chennai Student Yes Female
## # ℹ 434 more rows
## # ℹ abbreviated name: ¹`Do you own multiple profiles on Instagram?`
## # ℹ 35 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>, `How many followers do you have on Instagram?` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>,
## # Longitude <dbl>, …
The scatter plot shows the relationship between the number of
followers and the number of posts for Instagram users with more than
1000 followers. The blue points represent users with more than 1000
followers, and the red points represent users with more than 100 posts
but fewer than 1000 followers.
plot(data[data$`How many followers do you have on Instagram?` > 1000, ]$`How many followers do you have on Instagram?`, data[data$`How many followers do you have on Instagram?` > 1000, ]$`How many posts do you have on Instagram?`, col = "blue", xlab = "Number of followers", ylab = "Number of posts")
points(data[data$`How many followers do you have on Instagram?` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many followers do you have on Instagram?`, data[data$`How many followers do you have on Instagram?` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many posts do you have on Instagram?`, col = "red")
legend("topright", legend = c("More than 1000 followers", "More than 100 posts"), col = c("blue", "red"), pch = 1)

.
require(ggplot2)
## Loading required package: ggplot2
# Create a pie chart for gender distribution
ggplot(data, aes(x = "", fill = Gender)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Gender Distribution", fill = "Gender")
## The pie chart shows the gender distribution of a user. The pie is
divided into three slices, representing females, males, and non-binary
individuals. The slices are colored green, blue, and light red color
# Create a histogram for age distribution
ggplot(data, aes(x = Age, fill = factor(Age))) +
geom_histogram(binwidth = 5, color = "black") +
scale_fill_viridis_d() +
labs(title = "Age Distribution", x = "Age", y = "Count") +
theme_minimal()

The age distribution chart shows the number of people in each age
group for a given population. The x-axis shows the age group, and the
y-axis shows the number of people in that age group. The bars in the
chart are colored according to the age group
# Create a bar chart with education level
ggplot(data, aes(x = `Highest Education`, fill = Gender)) +
geom_bar(color = "black", size = 0.5, width = 0.7, position = position_dodge()) +
labs(title = "Education Level Distribution", x = "Education Level", y = "Count") +
theme_minimal() +
theme(legend.position = "top", legend.title = element_blank()) +
scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90")) +
guides(fill = guide_legend(reverse = TRUE)) +
geom_text(aes(label=after_stat(count)), stat='count', position=position_dodge(width=0.7), vjust=-0.5, size=3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## The bar chart shows the distribution of education levels for males,
females, and non-binary people in the data. The x-axis shows the
education level, and the y-axis shows the count of people. The bars are
grouped by gender, and the colors of the bars represent the gender (pink
for females, blue for males, and green for non-binary people).
# Create a bar chart with phone operating system
ggplot(data, aes(x = `Phone OS`, fill = Gender)) +
geom_bar() +
labs(title = "Phone Operating System Distribution", x = "Phone Operating System", y = "Count")
## The bar chart shows the distribution of phone operating systems by
gender. The y-axis shows the count of people using each operating
system, and the x-axis shows the operating system. The bars are colored
by gender. The chart shows that Android is the most popular operating
system for all gender
library(ggplot2)
# Create a ggplot bar chart for Gender by Zone/Area with Total Social Media Usage
ggplot(data, aes(x = `Zone`, y = `Total Social Media Usage`, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
labs(x = "Zone/Area", y = "Total Social Media Usage", title = "Gender by Zone/Area with Total Social Media Usage") +
scale_fill_brewer(palette = "Set3") + # Choose a colorful palette
theme_minimal()
## The bar graph shows the total social media usage by gender, by
zone/area. The zones/areas are Central, Eastern, North-Eastern,
Northern, Southern, and Western. the genders are Female, Male, and
Non-Binary. The total social media usage is highest in the Western zone,
The total social media usage is lowest in the central zone .
# Create a pie chart for phone operating system distribution
ggplot(data, aes(x = "", fill = `Phone OS`)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Phone Operating System Distribution", fill = "Phone Operating System")
## The pie chart shows the distribution of phones by operating system.
The largest slice is for Android and lowest slice in others phone
operating system
# Create a scatter plot for time spent on Facebook and Instagram
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`, y = `Time Spent on Instagram in last week (in minutes)`)) +
geom_point(color = "#FFC0CB") +
labs(title = "Time Spent on Facebook vs. Time Spent on Instagram", x = "Time Spent on Facebook (in minutes)", y = "Time Spent on Instagram (in minutes)")
## the scatter plot shows a positive correlation between time spent on
Facebook and time spent on Instagram. This means that people who spend
more time on Facebook also spend more time on Instagram. The scatter
plot also shows that there are more people who spend more time on
Facebook than on Instagram.
# Create a histogram for social media usage distribution
ggplot(data, aes(x = `Total Social Media Usage`)) +
geom_histogram(binwidth = 500, fill = "#FFC0CB", color = "black") +
labs(title = "Social Media Usage Distribution", x = "Total Social Media Usage (in minutes)", y = "Count") +
theme_minimal() +
theme(plot.background = element_rect(fill = "#ADD8E6"),
axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 14, color = "black"),
plot.title = element_text(size = 16, color = "black"))

the graph shows that WhatsApp is a popular communication platform
for people of all genders, with females spending slightly more time on
it than males.
# Create a bar chart for education level and social media usage
ggplot(data, aes(x = `Highest Education`, y = `Total Social Media Usage`, fill = `Highest Education`)) +
geom_bar(stat = "summary", fun = "mean") +
labs(title = "Education Level vs. Social Media Usage", x = "Education Level", y = "Total Social Media Usage (in minutes)") +
scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90", "#FFD700")) +
theme_minimal()
## The chart shows the total social media usage (in minutes) of people
with different levels of education. The bar graph shows that people with
higher education levels use social media more than those with other
education levels.
# Distribution of the number of posts on Instagram
ggplot(data, aes(x = `How many posts do you have on Instagram?`)) +
geom_histogram(binwidth = 50, fill = "#ADD8E6") +
labs(title = "Number of Posts on Instagram Distribution", x = "Number of Posts on Instagram", y = "Count") +
theme_minimal()
## The shows the distribution of Instagram posts by country. The x-axis
shows the number of posts on Instagram , and the y-axis shows the
count.
# Relationship between age and the number of posts on Instagram
ggplot(data, aes(x = Age, y = `How many posts do you have on Instagram?`, color = Age)) +
geom_point() +
labs(title = "Age vs Number of Posts on Instagram", x = "Age", y = "Number of Posts on Instagram")
## This chart shows the relationship between age and the number of posts
on Instagram. The x-axis represents the age of the user, and the y-axis
represents the number of posts that the user has made on Instagram. The
chart shows that, younger users post more on Instagram than older users.
There are a few possible explanations for this trend. First, younger
users may simply have more time to spend on social media. Second,
younger users may be more likely to use social media to connect with
their friends and family. Third, younger users may be more likely to use
social media to express themselves and share their creativity.
# Relationship between age and the time spent on Facebook
ggplot(data, aes(x = Age, y = `Time Spent on Facebook in last week (in minutes)`, color = Age)) +
geom_point() +
labs(title = "Age vs Time Spent on Facebook", x = "Age", y = "Time Spent on Facebook (in minutes)")
## the bar chart shows the distribution of time spent on Facebook in the
last week, measured in minutes. and the height of each bar represents
the number of people who spent that amount of time on Facebook. with a
majority of users spending a relatively small amount of time on Facebook
each week and a smaller group of users spending a significant amount of
time on Facebook each week.
# Relationship between age and the time spent on Instagram
ggplot(data, aes(x = Age, y = `Time Spent on Instagram in last week (in minutes)`, color = Age)) +
geom_point() +
labs(title = "Age vs Time Spent on Instagram", x = "Age", y = "Time Spent on Instagram (in minutes)")
## The chart shows a positive correlation between age and time spent on
Instagram, meaning that younger users spend more time on the platform
than older age users. One possibility is that younger users have more
time to spend on social media in general.
# Distribution of the total social media usage
ggplot(data, aes(x = `Total Social Media Usage`, fill = Gender)) +
geom_histogram(binwidth = 500) +
labs(title = "Total Social Media Usage Distribution", x = "Total Social Media Usage", y = "Count") +
scale_fill_manual(values = c("#ADD8E6", "#FFC0CB", "#FF0000")) + # Add a third color value
theme_minimal() +
theme(legend.position = "bottom")

The chart shows the distribution of total social media usage by
gender. It is a histogram, with the x-axis showing total social media
usage and the y-axis showing the number of people. The bars are
color-coded by gender, with blue representing females, pink representing
males, and red representing non-binary individuals.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ stringr 1.5.0
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(maps)
## Warning: package 'maps' was built under R version 4.3.2
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create a ggplot object with your dataset
p <- ggplot(data, aes(x = Longitude, y = Latitude, color = State, fill = State)) +
geom_point(size = 3) + # Add points to represent data with size
coord_fixed(ratio = 1) + # Aspect ratio
theme_minimal() # Plain background theme
# Add India map outline
india_map <- map_data("world", region = "India")
p <- p + geom_polygon(data = india_map, aes(x = long, y = lat, group = group), fill = NA, color = "black")
print(p)
## The chart shows the social media users in India, by state. the color
of the point represents the state.The states with the most social media
users are Uttar Pradesh, Maharashtra, and West Bengal. The states with
the fewest social media users are Jammu and Kashmir, Uttarakhand, and
Himachal Pradesh.
# Relationship between age and the total social media usage
library(ggplot2)
# Create a scatter plot with color and a specific theme
ggplot(data, aes(x = Age, y = `Total Social Media Usage`, color = Age)) +
geom_point() +
labs(title = "Age vs Total Social Media Usage", x = "Age", y = "Total Social Media Usage")
## The chart shows a scatter plot of age vs total social media usage.
The data points are colored by age, with younger people represented by
darker colors and older people represented by lighter colors.The overall
shows that younger people use social media more than older people. where
the x-axis (age) and the y-axis (total social media usage). One
possibility is that younger people are more likely with social media and
more comfortable using it.
# Relationship between the number of followers on Instagram and the number of posts on Instagram
ggplot(data, aes(x = `How many followers do you have on Instagram?`, y = `How many posts do you have on Instagram?`)) +
geom_point() +
labs(title = "Number of Followers vs Number of Posts on Instagram", x = "Number of Followers on Instagram", y = "Number of Posts on Instagram")
## The chart shows a positive correlation between the number of
followers on Instagram and the number of posts on Instagram, Instagram
users with more followers have more posts. with some users with a high
number of followers having a low number of posts.
# Pie chart for current status distribution
ggplot(data, aes(x = "", fill = `Current Status`)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Current Status Distribution", fill = "Current Status")
## The pie chart shows the distribution of people in different current
status, such as Sabbatical, Self Employed, Student, and Working
professional. The chart is labeled “Current Status Distribution” and the
legend shows the colors used to represent each status.
#.******************************************************************************************************************************************
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load the ggplot2 package
library(ggplot2)
library(tidyverse)
# Sample data
data <- data <- data %>% select(Age,Gender,`Total WhatsApp Usage`,`Total Social Media Usage`,`Total Facebook Usage`,`Total Instagram Usage`,`Total YouTube Usage (minutes)`)
# Create a barplot for Age vs. Total Facebook Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total Facebook Usage`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Total Facebook Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "pink")) +
theme_minimal()

The bar chart shows the total Facebook usage by age and gender. The
x-axis shows the age group, and the y-axis shows the total Facebook
usage in minutes. The bars are colored blue for males and pink for
females. the chart shows that younger user are use more The highest
total usage is in the 20-29 age group
# Create a barplot for Age vs. Total Instagram Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total Instagram Usage`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Total Instagram Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal()
## The bar chart shows the total Instagram usage by age and gender. The
chart is labeled “Total Instagram Usage by Age and Gender” and the
x-axis shows the age group, while the y-axis shows the total instagram
usage in minutes. The chart is color-coded by gender, with male users in
blue and female users in red.
# Create a barplot for Age vs. Total WhatsApp Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total WhatsApp Usage`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Total WhatsApp Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "maroon", "Female" = "pink")) +
theme_minimal()
## The bar chart shows the total WhatsApp usage by age and gender. The
x-axis shows the age group, and the y-axis shows the total usage in
minutes. The bars are colored with gender, maroon for males and pink for
females. The chart shows that overall, female use WhatsApp more than
male
# Create a barplot for Age vs. Total YouTube Usage with Gender as a legend
ggplot(data, aes(x = Age, y = `Total YouTube Usage (minutes)`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Total YouTube Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "maroon", "Female" = "skyblue")) +
theme_minimal()
## The bar chart you have sent shows the total YouTube usage by age and
gender. The chart is labeled “Total YouTube Usage by Age and Gender” and
the x-axis shows the age group, while the y-axis shows the total usage
in minutes. The chart is colored by gender, with male users in maroon
and female users in sky-blue. the chart shows that Female users use
YouTube more than male users in all age groups
# Combine all the data into a long format
data_long <- data %>%
pivot_longer(cols = c(`Total Facebook Usage`, `Total Instagram Usage`, `Total WhatsApp Usage`, `Total YouTube Usage (minutes)`),
names_to = "Platform", values_to = "Total Usage")
# Create a single plot with facets
ggplot(data_long, aes(x = Age, y = `Total Usage`, fill = Gender)) +
geom_bar(stat = "identity") +
labs(title = "Social Media Usage by Age and Gender", x = "Age Group", y = "Total Usage (minutes)") +
scale_fill_manual(values = c("Male" = "blue", "Female" = "red")) +
theme_minimal() +
facet_wrap(~ Platform, scales = "free_y")

Here all combined bar chart shows the total usage of four social
media platforms (Facebook, Instagram, WhatsApp, and YouTube) by age and
gender in minutes.
library(readr)
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Load required library
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
# Create a data frame with the given data
data <- data[, c("Age", "Income from YouTube (rs,month)")]
# Perform k-means clustering with 2 clusters
set.seed(123) # For reproducibility
k <- 2
kmeans_result <- kmeans(data, centers = k)
# Add the cluster assignments to the original data
data$Cluster <- as.factor(kmeans_result$cluster)
# Print the cluster assignments
print(data)
## # A tibble: 1,628 × 3
## Age `Income from YouTube (rs,month)` Cluster
## <dbl> <dbl> <fct>
## 1 24 88447 2
## 2 39 64764 2
## 3 22 4387 1
## 4 26 99695 2
## 5 50 81297 2
## 6 25 51770 2
## 7 52 38003 1
## 8 45 54216 2
## 9 25 48149 1
## 10 27 12657 1
## # ℹ 1,618 more rows
# Elbow Method to determine the optimal number of clusters
wcss <- vector("numeric", length = 10) # Initialize a vector to store Within-Cluster Sum of Squares (WCSS)
# Calculate WCSS for different numbers of clusters (k)
for (i in 1:10) {
kmeans_temp <- kmeans(data[, c("Age", "Income from YouTube (rs,month)")], centers = i)
wcss[i] <- kmeans_temp$tot.withinss # Store the WCSS for the current k
}
# Plot the Elbow Method graph
plot(1:10, wcss, type = "b", xlab = "K (Number of Clusters)", ylab = "WCSS (Within-Cluster Sum of Squares)") # Set x and y axis labels
abline(v = k, col = "red", lty = 2) # Highlight the chosen k

# Visualization of the clustering with centroids
ggplot(data, aes(x = Age, y = `Income from YouTube (rs,month)`, color = Cluster)) +
geom_point() +
geom_point(data = as.data.frame(kmeans_result$centers), aes(x = Age, y = `Income from YouTube (rs,month)`), color = "darkred", size = 3, shape = 4) +
labs(title = "K-Means Clustering (k = 2) with Centroids", x = "Age", y = "Income from YouTube") +
scale_color_manual(values = c("blue", "green"))
## The chart shows the results of the Elbow Method for determining the
optimal number of clusters for k-means clustering. The Elbow Method
plots the Within-Cluster Sum of Squares (WCSS) for different numbers of
clusters (k). The WCSS is a measure of how well the data points fit into
their assigned clusters. A lower WCSS indicates that the data points are
more tightly clustered around their centroids.
The chart is a k-means clustering of data points based on their age
and income from YouTube. K-means clustering is an unsupervised learning
algorithm. The chart shows that the data points are clustered into two
groups, with the blue cluster representing lower incomes and the green
cluster representing higher incomes. The centroids of the two clusters
are marked by the dark red X points.
library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Social Facebook Usage' variables
data <- data %>% select(Age, `Total Facebook Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create a linear regression model
model <- lm(`Total Facebook Usage` ~ Age, data = data)
# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Facebook Usage`, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "blue") +
geom_point(data = new_data, aes(x = Age, y = prediction), color = "red", size = 3) + # Add points for predicted ages
labs(title = "Age vs. Total Facebook Usage", x = "Age", y = "Total Facebook Usage") +
scale_color_manual(values = c("Younger" = "green", "Older" = "purple")) +
geom_vline(xintercept = 35, linetype = "dashed") # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'
## Here I use linear regression shows the relationship between age and
total Facebook usage. The data is divided into two groups: younger users
(age 35 or below) and older users (age 36 or above). The vertical dashed
line at age 35 separates the two groups. Younger and older age groups ,
younger with green color and older with purple color . The blue line
represents the linear regression model that model is predict total
Facebook usage based on age. The chart shows that there is a positive
correlation between age and total Facebook usage. This means that as
people get older use Facebook more. This is likely because older people
have more time to spend on social media and are more likely to have
friends and family members who use Facebook. red data points are used to
predict the values for ages 31, 32, 33, 34, and 35 based on the linear
regression model.
library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Instagram Usage' variables
data <- data %>% select(Age, `Total Instagram Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create a linear regression model
model <- lm(`Total Instagram Usage` ~ Age, data = data)
# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Instagram Usage`, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color = "blue") +
geom_point(data = new_data, aes(x = Age, y = prediction), color = "red", size = 3) +
labs(title = "Age vs. Total Instagram Usage", x = "Age", y = "Total Instagram Usage") +
scale_color_manual(values = c("Younger" = "maroon", "Older" = "blue")) +
geom_vline(xintercept = 35, linetype = "dashed") # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'
## Here I use linear regression shows the relationship between age and
total Instagram usage. The dataset is segmented into two distinct
groups: younger users (aged 35 or below) represented in purple, and
older users (aged 36 or above) shown in blue. A vertical dashed line at
age 35 separates these two groups. The blue line in the chart represents
the linear regression model, which predicts total Instagram usage based
on age, they younger use Instagram more frequently. red data points are
used to predict the values for ages 31, 32, 33, 34, and 35 based on the
linear regression model.
library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total WhatsApp Usage' variables
data <- data %>% select(Age, `Total WhatsApp Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create a linear regression model
model <- lm(`Total WhatsApp Usage` ~ Age, data = data)
# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total WhatsApp Usage`, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color="blue") +
geom_point(data = new_data, aes(x = Age, y = prediction), color = "green", size = 3) +
labs(title = "Age vs. Total WhatsApp Usage", x = "Age", y = "Total WhatsApp Usage") +
scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
geom_vline(xintercept = 35, linetype = "dashed") # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'

a linear regression model to explore the relationship between age
and total WhatsApp usage. The dataset is focus on two key variables:
‘Age’ and ‘Total WhatsApp Usage.’ Rows with A linear regression model is
then created, which predict ‘Total WhatsApp Usage’ based on an
individual’s age. To gain expected usage in the coming five years and. A
vertical dashed line is introduced at the age of 35. Data points are
colored differently between the two age groups, with ‘Younger’ users
represented in red and ‘Older’ users in blue. The blue line represents
the linear regression model, showcasing how age influences WhatsApp
usage, green data points are used to predict WhatsApp usage for ages 31,
32, 33, 34, and 35, offering valuable understandings into the future
trends in WhatsApp usage based on age.
library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and 'Total Social Media Usage' variables
data <- data %>% select(Age, `Total Social Media Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create a linear regression model
model <- lm(`Total Social Media Usage` ~ Age, data = data)
# Predict the next 5-year value
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
# Add a 'Group' column to distinguish older and younger based on age threshold
data <- data %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a scatter plot with a smoothing line, colored by 'Group'
ggplot(data, aes(x = Age, y = `Total Social Media Usage`, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color = "blue") +
geom_point(data = new_data, aes(x = Age, y = prediction), color = "yellow", size = 3) +
labs(title = "Age vs. Total Social Media Usage", x = "Age", y = "Total Social Media Usage") +
scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
geom_vline(xintercept = 35, linetype = "dashed") # Add a vertical dashed line at age 35
## `geom_smooth()` using formula = 'y ~ x'
## a linear regression model to explore the relationship between age and
total social media usage. selecting the ‘Age’ and ‘Total Social Media
Usage’ variables. The core of our analysis is a linear regression model
that models ‘Total Social Media Usage’ based on age. I use this model to
predict the expected usage for the next five years, with ages 31, 32,
33, 34, and 35 included in the new data. data points are color between
‘Younger’ users in red and ‘Older’ users in blue. The blue line
represents the linear regression model, yellow data points are used to
predict social media usage for ages 31, 32, 33, 34, and 35, expected
future trends based on age.
library(readr)
library(dplyr)
library(ggplot2)
# Read the CSV file
data <- read_csv("C:/Users/bhaga/OneDrive/Desktop/new dataset/social media_new.csv")
## Rows: 1628 Columns: 40
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (11): Age, Latitude, Longitude, How many subscriber do you have on youtu...
## num (14): How many followers do you have on Instagram?, How many posts do yo...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select the 'Age' and social media usage variables
data <- data %>% select(Age, `Total Facebook Usage`, `Total Social Media Usage`, `Total Instagram Usage`, `Total WhatsApp Usage`)
# Remove any rows with missing values
data <- na.omit(data)
# Convert the 'Age' variable to numeric
data$Age <- as.numeric(data$Age)
# Create linear regression models for each platform and predict the next 5-year values
platforms <- c("Facebook", "Social Media", "Instagram", "WhatsApp")
for (platform in platforms) {
model <- lm(paste0("`Total ", platform, " Usage` ~ Age"), data = data)
new_data <- data.frame(Age = c(31, 32, 33, 34, 35))
prediction <- predict(model, newdata = new_data)
cat("Predicted values for Total", platform, "Usage:", prediction, "\n")
}
## Predicted values for Total Facebook Usage: 284.0396 292.0368 300.0339 308.031 316.0281
## Predicted values for Total Social Media Usage: 2002.076 1973.391 1944.706 1916.022 1887.337
## Predicted values for Total Instagram Usage: 615.9642 590.8018 565.6394 540.477 515.3145
## Predicted values for Total WhatsApp Usage: 1102.072 1090.553 1079.033 1067.514 1055.994
# Melt the data for easier plotting
data_long <- data %>%
pivot_longer(-Age, names_to = "Platform", values_to = "Usage")
# Add a 'Group' column to distinguish older and younger based on age threshold
data_long <- data_long %>%
mutate(Group = ifelse(Age > 35, "Older", "Younger"))
# Create a single screen with all four scatter plots and linear regression models
ggplot(data_long, aes(x = Age, y = Usage, color = Group)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE,color="blue") +
labs(title = "Age vs. Social Media Usage by Platform", x = "Age", y = "Usage") +
scale_color_manual(values = c("Younger" = "red", "Older" = "blue")) +
geom_vline(xintercept = 35, linetype = "dashed") + # Add a vertical dashed line at age 35
facet_wrap(~Platform, scales = "free_y")
## `geom_smooth()` using formula = 'y ~ x'
## This combined chart provides understandings how age influences social
media usage across multiple platforms, including Facebook, Total social
media, Instagram, and WhatsApp, this chart allows us to compare how age
impacts social media usage across various platforms and gain
understandings into expected usage trends as people age.
#thank you mam
ggplot() +
geom_text(aes(x = 0.5, y = 0.5, label = "Thank You Mam", size = 10)) +
theme_void()
