Emojis are increasingly important in social media communication. This
vignette demonstrates how to analyze emoji usage patterns in YouTube
comments using tuber’s built-in emoji functions.
all_emojis <- unlist(extract_emojis(comments$textDisplay))
emoji_freq <- as.data.frame(table(all_emojis), stringsAsFactors = FALSE)
names(emoji_freq) <- c("emoji", "count")
emoji_freq <- emoji_freq[order(-emoji_freq$count), ]
head(emoji_freq, 15)
emoji_freq |>
head(10) |>
ggplot(aes(x = reorder(emoji, count), y = count)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 Most Used Emojis",
x = "Emoji",
y = "Count"
) +
theme_minimal()comments <- comments |>
mutate(
date = as.Date(publishedAt),
emoji_count = count_emojis(textDisplay)
)
daily_emoji <- comments |>
group_by(date) |>
summarise(
total_comments = n(),
comments_with_emoji = sum(has_emoji, na.rm = TRUE),
total_emojis = sum(emoji_count, na.rm = TRUE),
emoji_rate = comments_with_emoji / total_comments * 100,
avg_emojis = total_emojis / total_comments
)
ggplot(daily_emoji, aes(x = date, y = emoji_rate)) +
geom_line(color = "steelblue") +
geom_smooth(method = "loess", se = TRUE, alpha = 0.2) +
labs(
title = "Emoji Usage Rate Over Time",
x = "Date",
y = "% of Comments with Emojis"
) +
theme_minimal()Emojis can indicate sentiment. Here’s a simple categorization approach:
positive_emojis <- c(
"\U0001F600", "\U0001F601", "\U0001F602", "\U0001F603", "\U0001F604",
"\U0001F605", "\U0001F606", "\U0001F60A", "\U0001F60D", "\U0001F618",
"\U0001F44D", "\U0001F44F", "\U00002764", "\U0001F389", "\U0001F38A"
)
negative_emojis <- c(
"\U0001F620", "\U0001F621", "\U0001F622", "\U0001F623", "\U0001F624",
"\U0001F625", "\U0001F62D", "\U0001F44E", "\U0001F4A9", "\U0001F61E"
)
comments <- comments |>
mutate(
emojis = extract_emojis(textDisplay),
pos_emoji = sapply(emojis, function(e) sum(e %in% positive_emojis)),
neg_emoji = sapply(emojis, function(e) sum(e %in% negative_emojis)),
emoji_sentiment = case_when(
pos_emoji > neg_emoji ~ "positive",
neg_emoji > pos_emoji ~ "negative",
pos_emoji == 0 & neg_emoji == 0 ~ "none",
TRUE ~ "neutral"
)
)
table(comments$emoji_sentiment)engagement_summary <- comments |>
group_by(has_emoji) |>
summarise(
n = n(),
mean_likes = mean(likeCount, na.rm = TRUE),
median_likes = median(likeCount, na.rm = TRUE)
)
print(engagement_summary)
ggplot(comments, aes(x = has_emoji, y = likeCount + 1)) +
geom_boxplot(fill = "steelblue", alpha = 0.7) +
scale_y_log10() +
labs(
title = "Like Counts: Emoji vs Non-Emoji Comments",
x = "Contains Emoji",
y = "Likes (log scale)"
) +
theme_minimal()video_ids <- c("video_id_1", "video_id_2", "video_id_3")
all_comments <- lapply(video_ids, function(vid) {
comments <- get_all_comments(video_id = vid, max_results = 200)
comments$video_id <- vid
comments
})
all_comments <- bind_rows(all_comments)
video_emoji_stats <- all_comments |>
mutate(emoji_count = count_emojis(textDisplay)) |>
group_by(video_id) |>
summarise(
total_comments = n(),
emoji_rate = mean(emoji_count > 0) * 100,
avg_emojis = mean(emoji_count)
)
print(video_emoji_stats)For text analysis that should exclude emojis:
For large datasets:
Key functions used in this analysis:
| Function | Purpose |
|---|---|
has_emoji() |
Check if text contains emojis |
count_emojis() |
Count emojis in text |
extract_emojis() |
Get list of emojis from text |
remove_emojis() |
Strip emojis from text |
replace_emojis() |
Replace emojis with custom text |
These functions work directly on character vectors, making them easy
to use with dplyr::mutate() and other tidyverse
workflows.