/*-------------------------------------------------------------------------
 Copyright 2009 Olivier Berlanger

 Licensed under the Apache License, Version 2.0 (the "License");
 you may not use this file except in compliance with the License.
 You may obtain a copy of the License at

 http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 -------------------------------------------------------------------------*/
package net.sf.sfac.string;


import java.io.IOException;
import java.io.Reader;


/**
 * Char iterator fetching chars from a simple string.
 */
public class ReaderCharIterator implements CharIterator {

    private Reader data;
    /** true if there is no more available char to iterate. */
    private boolean finished;
    /** true if the last returned char was a space. */
    private boolean lastWhite;
    /** the count of bytes processed so far. */
    private long count;


    public ReaderCharIterator(Reader rdr) {
        setData(rdr);
    }


    public void setData(Reader rdr) {
        data = rdr;
        finished = false;
        lastWhite = true;
        count = 0;
    }


    public String getNormalizedString() {
        // Warning: avoid to use this with very big files ;-)
        StringBuffer sb = new StringBuffer();
        char ch;
        while ((ch = nextNormalizedChar()) != '\0')
            sb.append(ch);
        return sb.toString();
    }


    public char nextChar() {
        count++;
        if (finished) return '\0';
        try {
            int ch = data.read();
            if (ch < 0) {
                finished = true;
                return '\0';
            }
            if (ch == 0) {
                // '\0' is a end-of-character-sequence marquer, so we cannot return it.
                // we return '\1' instead because most likely, those type of control char will not be interpreted.
                return '\1';
            }
            return (char) ch;
        } catch (IOException ioe) {
            throw new IllegalStateException("Unable to read next character", ioe);
        }
    }


    public char nextNormalizedChar() {
        count++;
        if (finished) return '\0';
        char ch = '\0';
        boolean continueRead = true;
        while (continueRead) {
            ch = nextChar();
            ch = StringUtils.removeDiacritic(ch);
            if (ch == '\0') {
                finished = true;
                continueRead = false;
            } else if (Character.isLetter(ch)) {
                ch = Character.toLowerCase(ch);
                lastWhite = false;
                continueRead = false;
            } else if (Character.isDigit(ch)) {
                lastWhite = false;
                continueRead = false;
            } else {
                if (!lastWhite) {
                    ch = ' ';
                    lastWhite = true;
                    continueRead = false;
                }
            }
        }
        return ch;
    }


    public long getProcessedCharCount() {
        return count;
    }


    public void reset() {
        try {
            finished = false;
            lastWhite = true;
            data.reset();
        } catch (IOException ioe) {
            throw new IllegalStateException("Unable to reset reader", ioe);
        }
    }

}
