#!/usr/bin/perl

use strict;
use warnings;

package Jls::TapCheck;

use Exporter;
our @ISA = 'Exporter';
our @EXPORT = 'check_retrieval_lists';

=head 1

# Is the string a set of retrieval lists separated by blank lines?
# There must be at least one retrieval list.
# Blank lines may precede and follow the retrieval lists.
# The check permits :
#     duplicate 'unique' identifiers.
#     the relevant records to exceed the stated number in the complete retrieval list.
# Returns an appropriate error message, or '' if no error.

sub check_retrieval_lists 
# $string_ // Contains the retrieval lists.

=cut

sub msg_id_line_id
{
    my $msg = "\nThe line for a unique identifier should not have been blank.";
    $msg .= "\nDied";

    return $msg;
}

sub msg_id_line_weight
# $line_
{
    my $line_ = $_ [0];
    
    my $msg = "\nColumn 2 in the line for a unique identifier should be a positive weight.";
    $msg .= "\nLINE:\n";
    $msg .= $line_;
    $msg .= "\nDied";

    return $msg;
}

sub msg_id_line_too_many_cols
# $line_
{
    my $line_ = $_ [0];
    
    my $msg = "\nThe line for a unique identifier should have at most 2 columns.";
    $msg .= "\nLINE:\n";
    $msg .= $line_;
    $msg .= "\nDied";

    return $msg;
}

# Is a line is a valid unique identifier plus an optional weight?

sub check_id_and_weight
# $line_
{
    use Jls::File;

    my $line_ = $_ [0];
    if (Jls::File::is_blank ($line_)) { return msg_id_line_id (); }
    
    my @array = split (/\s+/, $line_);
    my @lines = ();
    
    foreach my $str (@array)
    {
        if (! Jls::File::is_blank ($str)) { push (@lines, $str); } 
    }
    
    if ($#lines >= 2) { return msg_id_line_too_many_cols ($line_); }
    elsif ($#lines == 1) 
    { 
        if (! Jls::File::is_float ($lines [1])) { return msg_id_line_weight ($line_); }
        elsif ($lines [1] <= 0) { return msg_id_line_weight ($line_); }
    }

    return '';
}

sub msg_relevant_records
# $line_
{
    my $line_ = $_ [0];
    
    my $msg = "\nColumn 1 should have been the number of relevant records.";
    $msg .= "\nLINE:\n";
    $msg .= $line_;
    $msg .= "\nDied";

    return $msg;
}

# Is a line a valid number of true positives?
# A valid number of true positives is of the form /integer.*/.

sub check_relevant_records
# $line_
{
    use Jls::File;

    my $line_ = $_ [0];
    
    my @datum = split (/\s+/, $line_);
    unless (0 <= $#datum) { return msg_relevant_records ($line_); }
    unless (Jls::File::is_integer ($datum [0])) { return msg_relevant_records ($line_); }
    unless (1 <= $datum [0]) { return msg_relevant_records ($line_); }

    return '';
}

sub msg_retrieval_item 
# $line_
{
    my $line_ = $_ [0];
    
    my $msg = "\nColumn 1 should have shown record relevancy as a 0 or 1.";
    $msg .= "\nColumn 2 should have shown the record score as a float.";
    $msg .= "\nLINE:\n";
    $msg .= $line_;
    $msg .= "\nDied";

    return $msg;
}

# Is a line a valid retrieval item?
# A valid retrieval item is of the form /[01]\s+float.*/.

sub check_retrieval_item 
# $line_
{
    use Jls::File;

    my $line_ = $_ [0];

    my @datum = split (/\s+/, $line_);

    unless (1 <= $#datum) { return msg_retrieval_item ($line_); }
    unless (Jls::File::is_01 ($datum [0])) { return msg_retrieval_item ($line_); }
    unless (Jls::File::is_float ($datum [1])) { return msg_retrieval_item ($line_); }

    return '';
}

# Is the array of lines a single retrieval list?
# No line can be blank.
# Returns an appropriate error message, or '' if no error.

sub check_single_retrieval_list
# \@lines_ // Contains a retrieval list as a sequence of lines.
{
    my $lines_ = $_ [0]; 
    my $i = 0;
    
    if (scalar (@$lines_) == 0) 
    { 
        return "\nThe first line should have been the unique identifier of the retrieval list.\nDied"; 
    }
    
    if (! $lines_->[$i])  
    { 
        return 'The line containing the id and weight was missing.'; 
    }
    
    my $msg = check_id_and_weight ($lines_->[$i++]);
    if ($msg) { return $msg; }
    
    if (! $lines_->[$i])  
    { 
        return 'The line containing the number of relevant records was missing.'; 
    }
    
    $msg = Jls::TapCheck::check_relevant_records ($lines_->[$i++]);
    if ($msg) { return $msg; }
    
    for ( ; $i < scalar (@$lines_); $i++)
    {
        $msg = Jls::TapCheck::check_retrieval_item ($lines_->[$i]);
        if ($msg) { return $msg; }
    }

    return '';
}

# Is the string a set of retrieval lists separated by blank lines?
# There must be at least one retrieval list.
# Returns an appropriate error message, or '' if no error.

sub check_retrieval_lists 
# $string_ // Contains the retrieval lists.
{
    my $string_ = $_ [0];

    my @lines_ = split ('\n', $string_); # no blank lines are permitted
    my $number_of_lists = 0;

    for (my $i = 0; $i <= $#lines_; $i++)
    {
        for ( ; $i <= $#lines_ && Jls::File::is_blank ($lines_ [$i]); $i++) {}
        
        if ($#lines_ < $i) { last; }
        
        my @array = ();
        
        for ( ; $i <= $#lines_ && ! Jls::File::is_blank ($lines_ [$i]); $i++) 
        {
            push (@array, $lines_ [$i]);
        }
        
        $number_of_lists++;
        my $msg = check_single_retrieval_list (\@array);
        if ($msg) { return $msg; }
    }

    if ($number_of_lists == 0) 
    { 
        return "\nThere should have been at least one retrieval list.\nDied"; 
    }

    return '';
}

1;
