library(readr)
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ stringr 1.5.0
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
data <- read_csv("D:/social media.csv")
## Rows: 1628 Columns: 26
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (9): City, Current Status, Do you own multiple profiles on Instagram?, ...
## dbl (3): Age, Latitude, Longitude
## num (14): How many followers do you have on Instagram? (In case of multiple ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(data)
#Average age
mean(data$Age)
## [1] 26.85811
# Median age
median(data$Age)
## [1] 24
# Maximum age
max(data$Age)
## [1] 74
# Minimum age
min(data$Age)
## [1] 13
# Total number of males
sum(data$Gender == "Male")
## [1] 813
# Total number of females
sum(data$Gender == "Female")
## [1] 813
# Average number of followers on Instagram
mean(data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`)
## [1] 868.1474
# Median number of followers on Instagram
median(data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`)
## [1] 370
# Maximum number of followers on Instagram
max(data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`)
## [1] 116000
# Minimum number of followers on Instagram
min(data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`)
## [1] 0
# Average number of posts on Instagram
mean(data$`How many posts do you have on Instagram?`)
## [1] 99.07985
# Median number of posts on Instagram
median(data$`How many posts do you have on Instagram?`)
## [1] 43.5
# Maximum number of posts on Instagram
max(data$`How many posts do you have on Instagram?`)
## [1] 2858
# Minimum number of posts on Instagram
min(data$`How many posts do you have on Instagram?`)
## [1] 0
# Total time spent on Facebook in last week
sum(data$`Time Spent on Facebook in last week (in minutes)`)
## [1] 285275
# Total time spent on Instagram in last week
sum(data$`Time Spent on Instagram in last week (in minutes)`)
## [1] 822407
# Total time spent on WhatsApp in last week
sum(data$`Time Spent on WhatsApp in last week (in minutes)`)
## [1] 1391726
# Average total social media usage
mean(data$`Total Social Media Usage`)
## [1] 2120.885
# Median total social media usage
median(data$`Total Social Media Usage`)
## [1] 1658.5
# Maximum total social media usage
max(data$`Total Social Media Usage`)
## [1] 15780
#@**************************************************************************************************************
# Only females
females <- subset(data, Gender == "Female")
females
## # A tibble: 813 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 26 Bengaluru Sabbatical Yes Female
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 45 Durgapur Sabbatical No Female
## 6 45 Delhi Working professional No Female
## 7 21 Delhi Working professional No Female
## 8 26 Delhi Working professional No Female
## 9 25 Mumbai Sabbatical No Female
## 10 22 Kolkata Student No Female
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only males
males <- subset(data, Gender == "Male")
males
## # A tibble: 813 × 26
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 50 Delhi Working professional No Male
## 3 52 Jaipur Working professional No Male
## 4 25 Bengaluru Student No Male
## 5 27 Delhi Student Yes Male
## 6 27 Bengaluru Working professional No Male
## 7 22 Delhi Sabbatical Yes Male
## 8 26 Agra Working professional No Male
## 9 25 Ahmedabad Student No Male
## 10 18 Jaipur Student No Male
## # ℹ 803 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only students
students <- subset(data, `Current Status` == "Student")
students
## # A tibble: 637 × 26
## Age City `Current Status` Do you own multiple profiles on Ins…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Bengaluru Student No Male
## 2 27 Delhi Student Yes Male
## 3 25 Ahmedabad Student No Male
## 4 18 Jaipur Student No Male
## 5 22 Kolkata Student No Female
## 6 26 Kolkata Student No Female
## 7 23 Delhi Student No Female
## 8 17 Mumbai Student Yes Male
## 9 23 Kolkata Student Yes Female
## 10 22 Ahmedabad Student No Male
## # ℹ 627 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only working professionals
working_professionals <- subset(data, `Current Status` == "Working professional")
working_professionals
## # A tibble: 796 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 50 Delhi Working professional No Male
## 5 25 Vishakhapatnam Working professional Yes Female
## 6 52 Jaipur Working professional No Male
## 7 27 Bengaluru Working professional No Male
## 8 45 Delhi Working professional No Female
## 9 21 Delhi Working professional No Female
## 10 26 Agra Working professional No Male
## # ℹ 786 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people from Northern zone
northern <- subset(data, Zone == "Northern")
northern
## # A tibble: 542 × 26
## Age City `Current Status` Do you own multiple profiles on In…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 50 Delhi Working professional No Male
## 4 52 Jaipur Working professional No Male
## 5 27 Delhi Student Yes Male
## 6 45 Delhi Working professional No Female
## 7 22 Delhi Sabbatical Yes Male
## 8 21 Delhi Working professional No Female
## 9 26 Agra Working professional No Male
## 10 26 Delhi Working professional No Female
## # ℹ 532 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people from Southern zone
southern <- subset(data, Zone == "Southern")
southern
## # A tibble: 211 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 26 Bengaluru Sabbatical Yes Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 25 Bengaluru Student No Male
## 4 27 Bengaluru Working professional No Male
## 5 32 Bengaluru Working professional No Male
## 6 27 Chennai Working professional No Male
## 7 23 Chennai Student No Female
## 8 22 Chennai Student Yes Female
## 9 32 Bengaluru Working professional No Female
## 10 21 Chennai Student No Female
## # ℹ 201 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people from Eastern zone
eastern <- subset(data, Zone == "Eastern")
eastern
## # A tibble: 271 × 26
## Age City `Current Status` Do you own multiple profiles …¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 45 Durgapur Sabbatical No Female
## 2 24 Cooch-behar Working professional No Male
## 3 22 Kolkata Student No Female
## 4 26 Kolkata Student No Female
## 5 50 Kolkata Working professional No Female
## 6 23 Kolkata Student Yes Female
## 7 25 Kolkata Working professional No Male
## 8 45 Bagdogra Working professional Yes Female
## 9 25 Kolkata Student No Male
## 10 45 Kolkata Working professional No Female
## # ℹ 261 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people from Western zone
western <- subset(data, Zone == "Western")
western
## # A tibble: 543 × 26
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 25 Ahmedabad Student No Male
## 3 25 Mumbai Sabbatical No Female
## 4 25 Ahmedabad Self Employed Yes Male
## 5 17 Mumbai Student Yes Male
## 6 22 Ahmedabad Student No Male
## 7 24 Pune Working professional No Female
## 8 21 Mumbai Sabbatical Yes Female
## 9 51 Ahmedabad Working professional No Male
## 10 22 Mumbai Sabbatical No Male
## # ℹ 533 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with Android phone OS
android <- subset(data, `Phone OS` == "Android")
android
## # A tibble: 1,115 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 22 Mumbai Working professional No Male
## 2 26 Bengaluru Sabbatical Yes Female
## 3 25 Vishakhapatnam Working professional Yes Female
## 4 52 Jaipur Working professional No Male
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Delhi Student Yes Male
## 8 27 Bengaluru Working professional No Male
## 9 21 Delhi Working professional No Female
## 10 26 Agra Working professional No Male
## # ℹ 1,105 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Only people with iOs phone OS
ios <- subset(data, `Phone OS` == "iOs")
ios
## # A tibble: 508 × 26
## Age City `Current Status` Do you own multiple profiles on …¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 50 Delhi Working professional No Male
## 4 45 Delhi Working professional No Female
## 5 22 Delhi Sabbatical Yes Male
## 6 18 Jaipur Student No Male
## 7 22 Chennai Student Yes Female
## 8 23 Guwahati Student No Female
## 9 45 Bagdogra Working professional Yes Female
## 10 28 Kolkata Sabbatical No Female
## # ℹ 498 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with Graduation education
graduation <- subset(data, `Highest Education` == "Graduation")
graduation
## # A tibble: 950 × 26
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 22 Mumbai Working professional No Male
## 3 26 Bengaluru Sabbatical Yes Female
## 4 50 Delhi Working professional No Male
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Delhi Student Yes Male
## 8 27 Bengaluru Working professional No Male
## 9 45 Delhi Working professional No Female
## 10 21 Delhi Working professional No Female
## # ℹ 940 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with Post graduation education
post_graduation <- subset(data, `Highest Education` == "Post graduation")
post_graduation
## # A tibble: 541 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 39 Delhi Working professional No Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 52 Jaipur Working professional No Male
## 4 22 Delhi Sabbatical Yes Male
## 5 26 Delhi Working professional No Female
## 6 25 Mumbai Sabbatical No Female
## 7 22 Kolkata Student No Female
## 8 26 Kolkata Student No Female
## 9 27 Chennai Working professional No Male
## 10 32 Bengaluru Working professional No Female
## # ℹ 531 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with High School education
high_school <- subset(data, `Highest Education` == "High School")
high_school
## # A tibble: 137 × 26
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 18 Jaipur Student No Male
## 2 35 Delhi Working professional Yes Female
## 3 23 Delhi Student No Female
## 4 50 Kolkata Working professional No Female
## 5 17 Mumbai Student Yes Male
## 6 16 Kolkata Student No Male
## 7 16 Jaipur Student No Female
## 8 15 Chennai Student Yes Female
## 9 20 Hyderabad Student Yes Male
## 10 16 Mumbai Student No Male
## # ℹ 127 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people with multiple profiles on Instagram
multiple_profiles <- subset(data, `Do you own multiple profiles on Instagram?` == "Yes")
multiple_profiles
## # A tibble: 308 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 26 Bengaluru Sabbatical Yes Female
## 2 25 Vishakhapatnam Working professional Yes Female
## 3 27 Delhi Student Yes Male
## 4 22 Delhi Sabbatical Yes Male
## 5 25 Ahmedabad Self Employed Yes Male
## 6 35 Delhi Working professional Yes Female
## 7 17 Mumbai Student Yes Male
## 8 23 Kolkata Student Yes Female
## 9 26 Kanpur Working professional Yes Male
## 10 22 Chennai Student Yes Female
## # ℹ 298 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who own a single profile on Instagram
single_profile <- subset(data, `Do you own multiple profiles on Instagram?` == "No")
single_profile
## # A tibble: 1,316 × 26
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 50 Delhi Working professional No Male
## 5 52 Jaipur Working professional No Male
## 6 45 Durgapur Sabbatical No Female
## 7 25 Bengaluru Student No Male
## 8 27 Bengaluru Working professional No Male
## 9 45 Delhi Working professional No Female
## 10 21 Delhi Working professional No Female
## # ℹ 1,306 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who spent more than 500 minutes on Facebook in last week
more_than_500_fb <- subset(data, `Time Spent on Facebook in last week (in minutes)` > 500)
more_than_500_fb
## # A tibble: 130 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 39 Delhi Working professional No Female
## 2 26 Bengaluru Sabbatical Yes Female
## 3 50 Delhi Working professional No Male
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 45 Durgapur Sabbatical No Female
## 6 25 Bengaluru Student No Male
## 7 27 Bengaluru Working professional No Male
## 8 22 Delhi Sabbatical Yes Male
## 9 18 Jaipur Student No Male
## 10 25 Mumbai Sabbatical No Female
## # ℹ 120 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who spent more than 500 minutes on Instagram in last week
more_than_500_insta <- subset(data, `Time Spent on Instagram in last week (in minutes)` > 500)
more_than_500_insta
## # A tibble: 559 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 22 Mumbai Working professional No Male
## 3 26 Bengaluru Sabbatical Yes Female
## 4 25 Vishakhapatnam Working professional Yes Female
## 5 25 Bengaluru Student No Male
## 6 27 Delhi Student Yes Male
## 7 45 Delhi Working professional No Female
## 8 22 Delhi Sabbatical Yes Male
## 9 18 Jaipur Student No Male
## 10 25 Mumbai Sabbatical No Female
## # ℹ 549 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who spent more than 500 minutes on WhatsApp in last week
more_than_500_whatsapp <- subset(data, `Time Spent on WhatsApp in last week (in minutes)` > 500)
more_than_500_whatsapp
## # A tibble: 913 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 24 Delhi Working professional No Female
## 2 39 Delhi Working professional No Female
## 3 22 Mumbai Working professional No Male
## 4 26 Bengaluru Sabbatical Yes Female
## 5 50 Delhi Working professional No Male
## 6 25 Vishakhapatnam Working professional Yes Female
## 7 52 Jaipur Working professional No Male
## 8 45 Durgapur Sabbatical No Female
## 9 25 Bengaluru Student No Male
## 10 27 Delhi Student Yes Male
## # ℹ 903 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who have more than 1000 followers on Instagram
more_than_1000_followers <- subset(data, `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` > 1000)
more_than_1000_followers
## # A tibble: 199 × 26
## Age City `Current Status` Do you own multiple profiles on…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Bengaluru Student No Male
## 2 23 Kolkata Student Yes Female
## 3 21 Chennai Student No Female
## 4 21 Chennai Student Yes Female
## 5 21 Mumbai Sabbatical Yes Female
## 6 57 Delhi Sabbatical Yes Male
## 7 22 Delhi Student No Female
## 8 35 Mumbai Working professional Yes Female
## 9 26 Ahmedabad Working professional No Male
## 10 21 Mumbai Student No Female
## # ℹ 189 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
#Only people who have more than 100 posts on Instagram
more_than_100_posts <- subset(data, `How many posts do you have on Instagram?` > 100)
more_than_100_posts
## # A tibble: 444 × 26
## Age City `Current Status` Do you own multiple profil…¹ Gender
## <dbl> <chr> <chr> <chr> <chr>
## 1 25 Vishakhapatnam Working professional Yes Female
## 2 25 Bengaluru Student No Male
## 3 25 Ahmedabad Self Employed Yes Male
## 4 50 Kolkata Working professional No Female
## 5 21 Chennai Student No Female
## 6 25 Kolkata Working professional No Male
## 7 45 Bagdogra Working professional Yes Female
## 8 38 Bengaluru Working professional No Female
## 9 24 Kolkata Student Yes Male
## 10 21 Chennai Student Yes Female
## # ℹ 434 more rows
## # ℹ abbreviated name: ¹​`Do you own multiple profiles on Instagram?`
## # ℹ 21 more variables: `Highest Education` <chr>,
## # `Location (City Airport Code)` <chr>, `Phone OS` <chr>, State <chr>,
## # Zone <chr>,
## # `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <dbl>,
## # `How many posts do you have on Instagram?` <dbl>, Latitude <dbl>, …
# Create a linear regression model
model <- lm(`Total Social Media Usage` ~ Age + `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` + `Time Spent on Facebook in last week (in minutes)`, data = data)
# Create a data frame with the actual and predicted values
predictions <- data.frame(Actual = data$`Total Social Media Usage`, Predicted = predict(model))
# Create a scatter plot of the actual vs predicted values
ggplot(predictions, aes(x = Actual, y = Predicted)) +
geom_point() +
geom_abline(intercept = coef(model)[1], slope = coef(model)[2], color = "red") +
labs(title = "Actual vs Predicted Total Social Media Usage", x = "Actual", y = "Predicted")

# Scatterplot on more than 1000 followers vs more than 100 posts
plot(data[data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` > 1000, ]$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`, data[data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` > 1000, ]$`How many posts do you have on Instagram?`, col = "blue", xlab = "Number of followers", ylab = "Number of posts")
points(data[data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`, data[data$`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)` <= 1000 & data$`How many posts do you have on Instagram?` > 100, ]$`How many posts do you have on Instagram?`, col = "red")
legend("topright", legend = c("More than 1000 followers", "More than 100 posts"), col = c("blue", "red"), pch = 1)

require(ggplot2)
# Create a pie chart for gender distribution
ggplot(data, aes(x = "", fill = Gender)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Gender Distribution", fill = "Gender")

# Create a histogram for age distribution
ggplot(data, aes(x = Age, fill = factor(Age))) +
geom_histogram(binwidth = 5, color = "black") +
scale_fill_viridis_d() +
labs(title = "Age Distribution", x = "Age", y = "Count") +
theme_minimal()

# Create a pie chart for the number of followers by age group
require(dplyr)
followers_by_age <- data %>%
group_by(Age) %>%
summarise(total_followers = sum(`How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`))
ggplot(followers_by_age, aes(x = "", y = total_followers, fill = Age)) +
geom_bar(width = 1, stat = "identity") +
coord_polar(theta = "y") +
labs(title = "Number of Followers on Instagram by Age Group", fill = "Age") +
theme_void() +
scale_fill_gradient(low = "#FFC0CB", high = "#ADD8E6")

# Create a bar chart with education level
ggplot(data, aes(x = `Highest Education`, fill = Gender)) +
geom_bar(color = "black", size = 0.5, width = 0.7, position = position_dodge()) +
labs(title = "Education Level Distribution", x = "Education Level", y = "Count") +
theme_minimal() +
theme(legend.position = "top", legend.title = element_blank()) +
scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90")) +
guides(fill = guide_legend(reverse = TRUE)) +
geom_text(aes(label=after_stat(count)), stat='count', position=position_dodge(width=0.7), vjust=-0.5, size=3)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Create a bar chart with phone operating system
ggplot(data, aes(x = `Phone OS`, fill = Gender)) +
geom_bar() +
labs(title = "Phone Operating System Distribution", x = "Phone Operating System", y = "Count")

# Create a pie chart for phone operating system distribution
ggplot(data, aes(x = "", fill = `Phone OS`)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Phone Operating System Distribution", fill = "Phone Operating System")

# Create a scatter plot for time spent on Facebook and Instagram
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`, y = `Time Spent on Instagram in last week (in minutes)`)) +
geom_point(color = "#FFC0CB") +
labs(title = "Time Spent on Facebook vs. Time Spent on Instagram", x = "Time Spent on Facebook (in minutes)", y = "Time Spent on Instagram (in minutes)") +
scale_color_manual(values = c("#ADD8E6", "#FFC0CB"))

# Create a histogram for social media usage distribution
ggplot(data, aes(x = `Total Social Media Usage`)) +
geom_histogram(binwidth = 500, fill = "#FFC0CB", color = "black") +
labs(title = "Social Media Usage Distribution", x = "Total Social Media Usage (in minutes)", y = "Count") +
theme_minimal() +
theme(plot.background = element_rect(fill = "#ADD8E6"),
axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 14, color = "black"),
plot.title = element_text(size = 16, color = "black"))

# Create a scatter plot for location and social media usage
ggplot(data, aes(x = Longitude, y = Latitude, color = `Total Social Media Usage`)) +
borders("world", colour="gray50", fill="lightgreen") +
geom_point(size = 3) +
scale_color_gradient(low = "#ADD8E6", high = "#FFC0CB") +
labs(title = "Location vs. Social Media Usage", x = "Longitude", y = "Latitude", color = "Total Social Media Usage (in minutes)")

# Create a histogram for time spent on WhatsApp distribution
ggplot(data, aes(x = `Time Spent on WhatsApp in last week (in minutes)`, fill = Gender)) +
geom_histogram(binwidth = 100) +
labs(title = "Time Spent on WhatsApp Distribution", x = "Time Spent on WhatsApp (in minutes)", y = "Count")

# Create a bar chart for education level and social media usage
ggplot(data, aes(x = `Highest Education`, y = `Total Social Media Usage`, fill = `Highest Education`)) +
geom_bar(stat = "summary", fun = "mean") +
labs(title = "Education Level vs. Social Media Usage", x = "Education Level", y = "Total Social Media Usage (in minutes)") +
scale_fill_manual(values = c("#FFC0CB", "#ADD8E6", "#90EE90", "#FFD700")) +
theme_minimal() +
theme(plot.background = element_rect(fill = "#F5F5F5"),
axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 14, color = "black"),
plot.title = element_text(size = 16, color = "black"))

# Distribution of the number of posts on Instagram
ggplot(data, aes(x = `How many posts do you have on Instagram?`)) +
geom_histogram(binwidth = 50, fill = "#ADD8E6") +
labs(title = "Number of Posts on Instagram Distribution", x = "Number of Posts on Instagram", y = "Count") +
theme_minimal() +
theme(plot.background = element_rect(fill = "#F5F5F5"),
axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 14, color = "black"),
plot.title = element_text(size = 16, color = "black"))

# Relationship between age and the number of posts on Instagram
ggplot(data, aes(x = Age, y = `How many posts do you have on Instagram?`)) +
geom_point() +
labs(title = "Age vs Number of Posts on Instagram", x = "Age", y = "Number of Posts on Instagram")

# Distribution of the time spent on Facebook
ggplot(data, aes(x = `Time Spent on Facebook in last week (in minutes)`)) +
geom_histogram(binwidth = 100, fill = "#ADD8E6") +
labs(title = "Time Spent on Facebook Distribution", x = "Time Spent on Facebook (in minutes)", y = "Count") +
theme_minimal() +
theme(plot.background = element_rect(fill = "#F5F5F5"),
axis.text = element_text(size = 12, color = "black"),
axis.title = element_text(size = 14, color = "black"),
plot.title = element_text(size = 16, color = "black"))

# Relationship between age and the time spent on Facebook
ggplot(data, aes(x = Age, y = `Time Spent on Facebook in last week (in minutes)`)) +
geom_point() +
labs(title = "Age vs Time Spent on Facebook", x = "Age", y = "Time Spent on Facebook (in minutes)")

# Relationship between age and the time spent on Instagram
ggplot(data, aes(x = Age, y = `Time Spent on Instagram in last week (in minutes)`)) +
geom_point() +
labs(title = "Age vs Time Spent on Instagram", x = "Age", y = "Time Spent on Instagram (in minutes)")

# Distribution of the total social media usage
ggplot(data, aes(x = `Total Social Media Usage`, fill = Gender)) +
geom_histogram(binwidth = 500) +
labs(title = "Total Social Media Usage Distribution", x = "Total Social Media Usage", y = "Count") +
scale_fill_manual(values = c("#ADD8E6", "#FFC0CB", "#FF0000")) + # Add a third color value
theme_minimal() +
theme(legend.position = "bottom")

# Relationship between age and the total social media usage
ggplot(data, aes(x = Age, y = `Total Social Media Usage`)) +
geom_point() +
labs(title = "Age vs Total Social Media Usage", x = "Age", y = "Total Social Media Usage")

# Relationship between the number of followers on Instagram and the number of posts on Instagram
ggplot(data, aes(x = `How many followers do you have on Instagram? (In case of multiple accounts, please mention the one with the maximum)`, y = `How many posts do you have on Instagram?`)) +
geom_point() +
labs(title = "Number of Followers vs Number of Posts on Instagram", x = "Number of Followers on Instagram", y = "Number of Posts on Instagram")

# Pie chart for current status distribution
ggplot(data, aes(x = "", fill = `Current Status`)) +
geom_bar(width = 1) +
coord_polar(theta = "y") +
labs(title = "Current Status Distribution", fill = "Current Status")

#thank you mam
ggplot() +
geom_text(aes(x = 0.5, y = 0.5, label = "Thank You Mam", size = 10)) +
theme_void()
