#!/usr/bin/perl

use strict;
use warnings;

package Jls::TapMedianEvalue;

use Exporter;
our @ISA = 'Exporter';
our @EXPORT = 'median_threshold_for_retrieval_lists';

=head 1

# The routines in this file are based on the following object:
#
# The $retrieval_lists object can be found in the 'TapInput.pm' module.
#

# Calculates the score (or E-value) threshold for given quantile_ (median) k EPQ-s.
# Requires that Jls::Tap::precompute_for_taps ($retrieval_lists_) be called first.
# $pad_0_ = 0 : Requires quantile_ (k) $epq_ appear among the retrieval lists.
# $pad_0_ = 1 : Returns 'NaN' if insufficient $epq_ appear among the retrieval lists.

sub median_threshold_for_retrieval_lists
# \@retrieval_lists_
# $epq_
# $quantile_ // (0.5)
# $unweighted_ // (0) ? Treat the lists as though unweighted ?
# $pad_0_ // (0) ? Pad insufficient retrieval lists with 0's ?

=cut

sub is_epq 
# $epq_
{
    my $epq_ = $_ [0];
    
    use Jls::File;
    
    unless (Jls::File::is_integer ($epq_)) { return 0; }
    unless ($epq_ > 0) { return 0; }

    return 1;
}

sub msg_epq_positive_integer 
# $epq_
{
    my $epq_ = $_ [0];
    
    if (is_epq ($epq_)) { return ''; }
    
    my $msg = "\nThe median errors per query ";
    $msg .= $epq_;
    $msg .= " must be a positive integer.";
    $msg .= "\nDied";

    return $msg;
}

sub msg_epq_false_positives 
# $id_
# $epq_
# $fp_retrieved_
{
    (my $id_, my $epq_, my $fp_retrieved_) = @_;
    
    my $msg = "\nThe retrieval list with id ";
    $msg .= $id_;
    $msg = " has only ";
    $msg .= $fp_retrieved_;
    $msg = " false positives."; 
    $msg = "\nFor the calculation, every retrieval list must have ";
    $msg .= $epq_;
    $msg = " false positives."; 
    $msg = "\nDied";

    return $msg;
}

# Calculates the score (or E-value) threshold for given quantile_ (median) k EPQ-s.
# Requires that Jls::Tap::precompute_for_taps ($retrieval_lists_) be called first.
# $pad_0_ = 0 : Requires quantile_ (k) $epq_ appear among the retrieval lists.
# $pad_0_ = 1 : Returns 'NaN' if insufficient $epq_ appear among the retrieval lists.

sub median_threshold_for_retrieval_lists
# \@retrieval_lists_
# $epq_
# $quantile_ // (0.5)
# $unweighted_ // (0) ? Treat the lists as though unweighted ?
# $pad_0_ // (0) ? Pad insufficient retrieval lists with 0's ?
{
    (my $retrieval_lists_, my $epq_, my $quantile_, my $unweighted_, my $pad_0_) = @_;

    my $msg = msg_epq_positive_integer ($epq_); 
    if ($msg) { die $msg; }

    if (! $quantile_) { $quantile_ = 0.5; }
    if (! $unweighted_) { $unweighted_ = 0; }
    if (! $pad_0_) { $pad_0_ = 0; }
    
    my @scores = ();
    my $i = 0;
    my $is_descending;

    foreach my $retrieval_list (@$retrieval_lists_)
    {
        if ($i == 0) 
        { 
            use Jls::TapCalculate;
    
            $is_descending = 
                $retrieval_list->{'monotonicity'} == Jls::TapCalculate::descending () ? 1 : 0;
            $i++;
        }
         
        my $list_r = $retrieval_list->{'list'};

        my $fp_retrieved = 0;

        foreach my $hash_r (@$list_r)
        {
            if ($hash_r->{'is_relevant'} == 0) 
            {
                $fp_retrieved++;
                
                if ($epq_ <= $fp_retrieved) 
                { 
                    my %hash;
                    $hash{'score'} = $hash_r->{'score'};
                    $hash{'weight'} = $retrieval_list->{'normalized_weight'};
                    push (@scores, \%hash);
                    
                    last; 
                }
            }
        }
    }
    
    if (! $pad_0_ && $#scores == -1)
    {
        die "\nNo retrieval list has $epq_ errors." .
            "\nDied";
    }

    my @sorted = sort { $a->{'score'} <=> $b->{'score'} } @scores;
    
    if ($is_descending == 1) { @sorted = reverse (@sorted); }
    
    $i = 0;
    my $weight;
    my $sum_weight = 0;
    my $DOUBLE_EPSILON = 1.0e-12;
    
    foreach my $hash_r (@sorted)
    {
        if (! $unweighted_) { $weight = $hash_r->{'weight'}; }
        elsif ($i == 0) 
        { 
            $weight = 1.0 / scalar (@$retrieval_lists_); 
            $i++;
        }

        $sum_weight += $weight;
        
        if ($quantile_ <= $sum_weight * (1.0 + $DOUBLE_EPSILON)) 
        { 
            return $hash_r->{'score'}; 
        }
    }

    if (! $pad_0_)
    {
        die "\nFewer than $quantile_ of the retrieval lists have $epq_ errors." .
            "\nDied";
    }
    
    return 'NaN'; 
}

1;
