library('tidyverse')

df <- read_csv('~/bin/marc/_output/gent/network-scores2/network-scores-density.csv')
qlink <- read_csv('~/bin/marc/_output/gent/network-scores2/network-scores-qlink-stat.csv')

qlink_normalized <- qlink %>% 
  filter(statistic == 'mean') %>% 
  mutate(qlink = value) %>% 
  select('tag', 'qlink')

df2 <- df %>% 
  left_join(qlink_normalized, by = 'tag') %>% 
  filter(records > 0)

cor(df2$avgDegree, df2$qlink, method = 'pearson')

plot_avg_degree <- df2 %>% 
  filter(tag != 'all') %>% 
  filter(records > 0) %>% 
  filter(links > 1000) %>% 
  ggplot(aes(x = avgDegree, y = records)) +
  geom_point(
    aes(size = links),
    color="cornflowerblue"
  ) +
  geom_text(
    aes(
      angle = 25,
      hjust = -.3,
      label = tag), color = "red") +
  ggtitle(
    "Networked records in the catalogue",
    subtitle='How does different subject fields connect records together') +
  xlab('connectedness (average degree)') +
  ylab('number of records') +
  scale_y_continuous(
    breaks = c(250000, 500000, 750000),
    labels = c('250K', '500K', '750K')
  ) +
  theme_bw()

img_path <- '~/bin/marc/_output/gent/network-scores2/avg_degree.png'
ggsave(plot_avg_degree, device="png", filename=img_path, width=10, height=5)
print(paste('creating', img_path))

plot_qlink <- df2 %>% 
  filter(tag != 'all') %>% 
  filter(records > 0) %>% 
  filter(links > 1000) %>% 
  ggplot(aes(x = qlink, y = records)) +
  geom_point(
    aes(size = links),
    color="cornflowerblue"
  ) +
  geom_text(
    aes(
      angle = 25,
      hjust = -.3,
      label = tag), color = "red") +
  ggtitle(
    "Networked records in the catalogue",
    subtitle='How does different subject fields connect records together') +
  xlab('connectedness (qlink or logical accessibility)') +
  ylab('number of records') +
  scale_y_continuous(
    breaks = c(250000, 500000, 750000),
    labels = c('250K', '500K', '750K')
  ) +
  theme_bw()

img_path <- '~/bin/marc/_output/gent/network-scores2/qlink.png'
ggsave(plot_qlink, device="png", filename=img_path, width=10, height=5)
print(paste('creating', img_path))
