head()
df %>%
group_by(tag) +
head()
df %>%
group_by(tag) +
head()
df %>%
group_by(tag)
df %>%
group_by(tag) +
head(5)
df %>%
group_by(tag) +
top(5)
?head
df %>%
group_by(tag) +
head(5)
df %>%
group_by(tag) +
head(5)
df %>%
group_by(tag)  %>%
head(5)
df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
head()
df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
head()
df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total))
df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
head(20)
df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
filter(total > 100000)
df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
filter(total > 100000) %>%
head(20)
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
filter(total > 100000) %>%
select(tag) %>%
unlist(use.names = FALSE)
tags
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
filter(total > 100000) %>%
select(tag) %>%
unlist(use.names = FALSE)
tags
?unlist
df %>%
filter(tag %IN% tags) %>%
head()
df %>%
filter(tag %IN% tags) %>%
head()
df %>%
filter(tag %in% tags) %>%
head()
df %>%
filter(tag %in% tags) %>%
select(qlinkAbs, count_n) %>%
qqplot(aes(x = qlinkAbs, y = count_n)) +
geom_point()
df %>%
filter(tag %in% tags) %>%
select(qlinkAbs, count_n) %>%
head()
df %>%
filter(tag %in% tags) %>%
select(qlinkAbs, count_n) %>%
qqplot(aes(x = qlinkAbs, y = count_n)) +
geom_point()
df %>%
filter(tag %in% tags) %>%
select(qlinkAbs, count_n) %>%
qqplot(aes(x = qlinkAbs, y = count_n)) +
geom_point()
df %>%
filter(tag %in% tags) %>%
select(qlinkAbs, count_n) %>%
qqplot(aes(x = qlinkAbs, y = count_n)) +
geom_point()
df %>%
filter(tag %in% tags) %>%
# select(qlinkAbs, count_n) %>%
qqplot(aes(x = qlinkAbs, y = count_n)) +
geom_point()
df %>%
filter(tag %in% tags) %>%
# select(qlinkAbs, count_n) %>%
qqplot() +
geom_point(aes(x = qlinkAbs, y = count_n))
df %>%
filter(tag %in% tags) %>%
# select(qlinkAbs, count_n) %>%
qqplot() +
geom_point(aes(x = qlinkAbs, y = count_n))
df %>%
filter(tag %in% tags) %>%
count()
df %>%
filter(tag %in% tags) %>%
head()
df %>%
filter(tag %in% tags) %>%
qqplot() +
geom_point(aes(x = qlinkAbs, y = count_n))
df %>% count()
df2 %>% count()
df2 <- df %>%
filter(tag %in% tags)
df2 %>% count()
df2 %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n))
df2 %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n)) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
?facet_grid
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n)) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), alpha=0.1) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), alpha=0.1, size=1) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), alpha=0.1, size=0.1) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), alpha=0.5, size=0.1) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.1) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
filter(total > 100000) %>%
select(tag) %>%
top_n(10)
tags
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
# filter(total > 100000) %>%
select(tag) %>%
top_n(10)
unlist(use.names = FALSE)
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
# filter(total > 100000) %>%
select(tag) %>%
top_n(10) %>%
unlist(use.names = FALSE)
tags
df %>% count()
df2 <- df %>%
filter(tag %in% tags)
df2 %>% count()
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
# filter(total > 100000) %>%
select(tag) %>%
top_n(10) %>%
unlist(use.names = FALSE)
tags
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total))
tags
?top_n
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
slice_max(10)
tags
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
slice_max(10)
tags
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
slice_max(total, n = 10)
tags
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
slice_max(total, n = 10)
tags <- df %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
slice_max(total, n = 10) %>%
select(tag) %>%
unlist(use.names = FALSE)
tags
df2 <- df %>%
filter(tag %in% tags)
df2 %>% count()
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10() +
facet_wraap(~ tag, ncol=2)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~ tag, ncol=2)
tags <- df %>%
filter(tag != 'all') %>%
group_by(tag) %>%
summarise(total = sum(count_n)) %>%
arrange(desc(total)) %>%
slice_max(total, n = 10) %>%
select(tag) %>%
unlist(use.names = FALSE)
tags
df %>% count()
df2 <- df %>%
filter(tag %in% tags)
df2 %>% count()
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~ tag, ncol=2)
tags
?facet_wrap
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
stat_summary(
geom = "point",
fun.y = "mean",
col = "black",
size = 3,
shape = 24,
fill = "red"
) +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~ tag, ncol=2)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
stat_summary(
geom = "point",
fun = "mean",
col = "black",
size = 3,
shape = 24,
fill = "red"
) +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~ tag, ncol=2)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
stat_summary(
x = qlinkAbs, y = count_n
geom = "point",
fun = "mean",
col = "black",
size = 3,
shape = 24,
fill = "red"
) +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~ tag, ncol=2)
?geom_line
geom_vline()
?geom_vline()
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
geom_vline(xintercept = mean(qlinkAbs)) +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~ tag, ncol=2)
df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
geom_vline(xintercept = mean(df$qlinkAbs)) +
scale_x_log10() +
scale_y_log10() +
facet_wrap(~ tag, ncol=2)
ggplot(iris, aes(Sepal.Length, Petal.Length)) +
facet_wrap(~Species, scales="free") +
geom_point() +
geom_vline(data=filter(iris, Species=="setosa"), aes(xintercept=5), colour="pink") +
geom_vline(data=filter(iris, Species=="versicolor"), aes(xintercept=6), colour="blue") +
geom_hline(data=filter(iris, Species=="virginica"), aes(yintercept=6), colour="green")
p <- df2 %>%
filter(tag != 'all') %>%
ggplot() +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
facet_wrap(~ tag, ncol=2)
p <- df2 %>%
filter(tag != 'all') %>%
ggplot() +
facet_wrap(~ tag, ncol=2) +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10()
p
p <- df2 %>%
ggplot() +
facet_wrap(~ tag, ncol=2) +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10()
p
for (tag in tags) {
print(tag)
}
p <- df2 %>%
ggplot() +
facet_wrap(~ tag, ncol=2) +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10()
for (tag in tags) {
print(tag)
p <- p + geom_vline(data=filter(df2, tag==tag),
aes(xintercept=mean(qlinkAbs)),
colour="blue") +
}
for (tag in tags) {
print(tag)
p <- p + geom_vline(
data=filter(df2, tag==tag),
aes(xintercept=mean(qlinkAbs)),
colour="blue"
)
}
p
p <- df2 %>%
ggplot() +
facet_wrap(~ tag, ncol=2) +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10()
for (tag_name in tags) {
print(tag_name)
df3 <- filter(df2, tag==tag_name)
p <- p + geom_vline(
data=df3,
aes(xintercept=mean(qlinkAbs)),
colour="blue"
)
}
p
p <- df2 %>%
ggplot() +
facet_wrap(~ tag, ncol=2) +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10()
for (tag_name in tags) {
print(tag_name)
df3 <- filter(df2, tag==tag_name)
m <- sum(df3$qlinkAbs) / sum(df3$count_n)
p <- p + geom_vline(
data=df3,
aes(xintercept=m),
colour="blue"
)
}
p
p <- df2 %>%
ggplot() +
facet_wrap(~ tag, ncol=2) +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10()
for (tag_name in tags) {
print(tag_name)
df3 <- filter(df2, tag==tag_name)
s1 <- sum(df3$qlinkAbs)
s2 <- sum(df3$qlinkAbs * df3$count_n)
m <- s1 / s2
p <- p + geom_vline(
data=df3,
aes(xintercept=m),
colour="blue"
)
}
p
p <- df2 %>%
ggplot() +
facet_wrap(~ tag, ncol=2) +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10()
for (tag_name in tags) {
for (tag_name in tags) {
p <- df2 %>%
ggplot() +
facet_wrap(~ tag, ncol=2) +
geom_point(aes(x = qlinkAbs, y = count_n), size=0.01) +
scale_x_log10() +
scale_y_log10()
p
geom_vline(xintercept = mean(df$qlinkAbs)) +
df %>%
# filter(tag == 'all') %>%
ggplot(aes(qlinkAbs)) +
geom_histogram(bins = 100) +
geom_density(col = 2) +
scale_x_log10() +
scale_y_log10() +
facet_grid(tag ~ .)
l
