#!/usr/bin/env bash

SECONDS=0
PREFIX=${BASE_INPUT_DIR}/_reports/${NAME}
DATE=$(date +"%F")
if [[ "$VERSION" != "" ]]; then
  OUTPUT_DIR=${BASE_OUTPUT_DIR}/${NAME}-${VERSION}
else
  OUTPUT_DIR=${BASE_OUTPUT_DIR}/${NAME}
fi

echo "OUTPUT_DIR: ${OUTPUT_DIR}"

if [[ ! -d $PREFIX ]]; then
  mkdir -p $PREFIX
fi

# printf "%s %s> Logging to ${PREFIX}.log"

if [ ! -d ${OUTPUT_DIR} ]; then
  mkdir -p ${OUTPUT_DIR}
fi

do_validate() {
  GENERAL_PARAMS="--details --trimId --summary --format csv --defaultRecordType BOOKS"
  OUTPUT_PARAMS="--outputDir ${OUTPUT_DIR} --detailsFileName issue-details.csv --summaryFileName issue-summary.csv"

  printf "%s %s> [validator]\n" $(date +"%F %T")
  printf "%s %s> ./validator -Xms8g ${GENERAL_PARAMS} ${OUTPUT_PARAMS} ${TYPE_PARAMS} ${MARC_DIR}/${MASK} 2> ${PREFIX}/validate.log\n" $(date +"%F %T")
  ./validator ${GENERAL_PARAMS} ${OUTPUT_PARAMS} ${TYPE_PARAMS} ${MARC_DIR}/$MASK 2> ${PREFIX}/validate.log
}

do_prepare_solr() {
  printf "%s %s> [prepare-solr]\n" $(date +"%F %T")
  printf "%s %s> ./prepare-solr ${NAME} 2> ${PREFIX}/solr.log\n" $(date +"%F %T")
  ./prepare-solr $NAME 2> ${PREFIX}/solr.log
}

do_index() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors//')
  printf "%s %s> [index]\n" $(date +"%F %T")
  printf "%s %s> ./index --db $NAME --file-path ${MARC_DIR} --file-mask $MASK ${PARAMS} --trimId 2> ${PREFIX}/solr.log\n" $(date +"%F %T")
  ./index --db $NAME --file-path ${MARC_DIR} --file-mask $MASK ${PARAMS} --trimId 2>> ${PREFIX}/solr.log
}

do_completeness() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [completeness]\n" $(date +"%F %T")
  printf "%s %s> ./completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/completeness.log\n" $(date +"%F %T")
  ./completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/completeness.log
}

do_classifications() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [classifications]\n" $(date +"%F %T")
  printf "%s %s> ./classifications --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/classifications.log\n" $(date +"%F %T")
  ./classifications --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/classifications.log
  printf "%s %s> Rscript scripts/classifications-type.R ${OUTPUT_DIR} 2>> ${PREFIX}/classifications.log\n" $(date +"%F %T")
  Rscript scripts/classifications-type.R ${OUTPUT_DIR}
}

do_authorities() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [authorities]\n" $(date +"%F %T")
  printf "%s %s> ./authorities --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/authorities.log\n" $(date +"%F %T")
  ./authorities --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/authorities.log
}

do_tt_completeness() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [tt-completeness]\n" $(date +"%F %T")
  printf "%s %s> ./tt-completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/tt-completeness.log\n" $(date +"%F %T")
  ./tt-completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/tt-completeness.log

  printf "%s %s> Rscript scripts/tt-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/tt-completeness.log\n" $(date +"%F %T")
  Rscript scripts/tt-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/tt-completeness.log
}

do_shelf_ready_completeness() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [shelf-ready-completeness]\n" $(date +"%F %T")
  printf "%s %s> ./shelf-ready-completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/shelf-ready-completeness.log\n" $(date +"%F %T")
  ./shelf-ready-completeness \
    --defaultRecordType BOOKS \
    ${PARAMS} \
    --outputDir ${OUTPUT_DIR}/ \
    --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/shelf-ready-completeness.log

  printf "%s %s> Rscript scripts/shelf-ready-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/shelf-ready-completeness.log\n" $(date +"%F %T")
  Rscript scripts/shelf-ready-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/shelf-ready-completeness.log
}

do_serial_score() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [serial-score]\n" $(date +"%F %T")
  printf "%s %s> ./serial-score --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/serial-score.log\n" $(date +"%F %T")
  ./serial-score --defaultRecordType BOOKS \
                 ${PARAMS} \
                 --outputDir ${OUTPUT_DIR}/ \
                 --trimId ${MARC_DIR}/${MASK} 2> ${PREFIX}/serial-score.log

  printf "%s %s> Rscript scripts/serial-score-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/serial-score.log\n" $(date +"%F %T")
  Rscript scripts/serial-score-histogram.R ${OUTPUT_DIR} &>> ${PREFIX}/serial-score.log
}

do_format() {
  ./formatter --defaultRecordType BOOKS ${MARC_DIR}/${MASK}
}

do_functional_analysis() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [functional-analysis]\n" $(date +"%F %T")
  printf "%s %s> ./functional-analysis --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/functional-analysis.log\n" $(date +"%F %T")
  ./functional-analysis --defaultRecordType BOOKS \
                        ${PARAMS} \
                        --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/functional-analysis.log
}

do_network_analysis() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [network-analysis]\n" $(date +"%F %T")
  printf "%s %s> ./network-analysis --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${PREFIX}/network-analysis.log\n" $(date +"%F %T")
  ./network-analysis --defaultRecordType BOOKS \
                     ${PARAMS} \
                     --outputDir ${OUTPUT_DIR}/ \
                     ${MARC_DIR}/${MASK} 2> ${PREFIX}/network-analysis.log

  # network.csv (concept, id) ->
  #   network-by-concepts.csv (concept, count, ids)
  #   network-by-record.csv (id, count, concepts)
  #   network-statistics.csv (type, total, single, multi)
  printf "%s %s> Rscript scripts/network-transform.R ${OUTPUT_DIR} &>> ${PREFIX}/network-analysis.log\n" $(date +"%F %T")
  Rscript scripts/network-transform.R ${OUTPUT_DIR} &>> ${PREFIX}/network-analysis.log

  # network-by-concepts (concept, count, ids) ->
  #   network-pairs.csv (id1 id2)
  #   network-nodes.csv (id, id)
  printf "%s %s> ./network-analysis --outputDir ${OUTPUT_DIR} --action pairing --group-limit 2000 &>> ${PREFIX}/network-analysis.log\n" $(date +"%F %T")
  ./network-analysis --outputDir ${OUTPUT_DIR} \
                     --action pairing \
                     &>> ${PREFIX}/network-analysis.log

  cat network-pairs.csv | sort | uniq -c | sort -nr > network-pairs-uniq-with-count.csv
  awk '{print $2 " " $3}' network-pairs-uniq-with-count.csv > network-pairs-all.csv

  printf "%s %s> ziping output\n" $(date +"%F %T")
  PWD=`pdw`
  cd ${OUTPUT_DIR}
  zip network-input network-nodes.csv network-nodes-???.csv network-pairs-???.csv network-by-concepts-tags.csv
  cd $PWD



  printf "%s %s> upload output\n" $(date +"%F %T")
  scp ${OUTPUT_DIR}/network-input.zip pkiraly@roedel.etrap.eu:/roedel/pkiraly/network/input

  # spark-shell -I scripts/network.scala --conf spark.driver.metadata.qa.dir="${OUTPUT_DIR}"
  # ./network-export.sh ${OUTPUT_DIR}
}

do_pareto() {
  printf "%s %s> [pareto]\n" $(date +"%F %T")
  printf "%s %s> Rscript scripts/frequency-range.R ${OUTPUT_DIR} &> ${PREFIX}/pareto.log\n" $(date +"%F %T")
  Rscript scripts/frequency-range.R ${OUTPUT_DIR} &>> ${PREFIX}/pareto.log

  . ./common-variables
  if [[ "$WEB_DIR" != "" ]]; then
    if [[ ! -d $WEB_DIR/images ]]; then
      mkdir $WEB_DIR/images
    fi
    ln -s ${OUTPUT_DIR}/img $WEB_DIR/images/${NAME}
  fi
}

do_marc_history() {
  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/--emptyLargeCollectors|--with-delete//')
  printf "%s %s> [marc-history]\n" $(date +"%F %T")
  printf "%s %s> ./formatter --selector \"008~7-10;008~0-5\" --defaultRecordType BOOKS ${PARAMS} --separator \",\" --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} &> ${PREFIX}/marc-history.log\n" $(date +"%F %T")
  ./formatter --selector "008~7-10;008~0-5" --defaultRecordType BOOKS ${PARAMS} --separator "," --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} &> ${PREFIX}/marc-history.log

  printf "%s %s> Rscript scripts/marc-history.R ${OUTPUT_DIR} &>> ${PREFIX}/marc-history.log\n" $(date +"%F %T")
  Rscript scripts/marc-history.R ${OUTPUT_DIR} &>> ${PREFIX}/marc-history.log
}

do_version_link() {
  printf "%s %s> [version-link]\n" $(date +"%F %T")
  if [[ "$VERSION" != "" ]]; then
    OUTPUT_LINK=${BASE_OUTPUT_DIR}/${NAME}
    if [[ -e ${OUTPUT_LINK} ]]; then
      rm ${OUTPUT_LINK}
    fi
    ln -s ${OUTPUT_DIR} ${OUTPUT_LINK}
  fi
}

do_all_analyses() {
  do_validate
  do_completeness
  do_classifications
  do_authorities
  do_tt_completeness
  do_shelf_ready_completeness
  do_serial_score
  do_functional_analysis
  # do_network_analysis
  do_pareto
  do_marc_history
}

do_all_solr() {
  do_prepare_solr
  do_index
}

case "$1" in
  validate)
    do_validate
    ;;
  prepare-solr)
    do_prepare_solr
    ;;
  index)
    do_index
    ;;
  completeness)
    do_completeness
    ;;
  classifications)
    do_classifications
    ;;
  authorities)
    do_authorities
    ;;
  tt-completeness)
    do_tt_completeness
    ;;
  shelf-ready-completeness)
    do_shelf_ready_completeness
    ;;
  serial-score)
    do_serial_score
    ;;
  format)
    do_format
    ;;
  functional-analysis)
    do_functional_analysis
    ;;
  network-analysis)
    do_network_analysis
    ;;
  pareto)
    do_pareto
    ;;
  marc-history)
    do_marc_history
    ;;
  all-analyses)
    do_all_analyses
    ;;
  all-solr)
    do_all_solr
    ;;
  all)
    do_all_analyses
    do_all_solr
    do_version_link
    ;;
esac

duration=$SECONDS
hours=$(($duration / (60*60)))
mins=$(($duration % (60*60) / 60))
secs=$(($duration % 60))

printf "%s %s> DONE. %02d:%02d:%02d elapsed.\n" $(date +"%F %T") $hours $mins $secs
