001 package org.apache.fulcrum.parser;
002
003
004 /*
005 * Licensed to the Apache Software Foundation (ASF) under one
006 * or more contributor license agreements. See the NOTICE file
007 * distributed with this work for additional information
008 * regarding copyright ownership. The ASF licenses this file
009 * to you under the Apache License, Version 2.0 (the
010 * "License"); you may not use this file except in compliance
011 * with the License. You may obtain a copy of the License at
012 *
013 * http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing,
016 * software distributed under the License is distributed on an
017 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
018 * KIND, either express or implied. See the License for the
019 * specific language governing permissions and limitations
020 * under the License.
021 */
022
023
024 import java.io.Reader;
025 import java.io.StreamTokenizer;
026 import java.util.List;
027
028 /**
029 * CSVParser is used to parse a stream with comma-separated values and
030 * generate ParameterParser objects which can be used to
031 * extract the values in the desired type.
032 *
033 * <p>The class extends the abstract class DataStreamParser and implements
034 * initTokenizer with suitable values for CSV files to provide this
035 * functionality.
036 *
037 * <p>The class (indirectly through DataStreamParser) implements the
038 * java.util.Iterator interface for convenience.
039 * This allows simple use in a Velocity template for example:
040 *
041 * <pre>
042 * #foreach ($row in $csvfile)
043 * Name: $row.Name
044 * Description: $row.Description
045 * #end
046 * </pre>
047 *
048 * @author <a href="mailto:sean@informage.net">Sean Legassick</a>
049 * @version $Id: CSVParser.java 535465 2007-05-05 06:58:06Z tv $
050 */
051 public class CSVParser extends DataStreamParser
052 {
053 /**
054 * Create a new CSVParser instance. Requires a Reader to read the
055 * comma-separated values from. The column headers must be set
056 * independently either explicitly, or by reading the first line
057 * of the CSV values.
058 *
059 * @param in the input reader.
060 */
061 public CSVParser(Reader in)
062 {
063 super(in, null, null);
064 }
065
066 /**
067 * Create a new CSVParser instance. Requires a Reader to read the
068 * comma-separated values from, and a list of column names.
069 *
070 * @param in the input reader.
071 * @param columnNames a list of column names.
072 */
073 public CSVParser(Reader in, List columnNames)
074 {
075 super(in, columnNames, null);
076 }
077
078 /**
079 * Create a new CSVParser instance. Requires a Reader to read the
080 * comma-separated values from, a list of column names and a
081 * character encoding.
082 *
083 * @param in the input reader.
084 * @param columnNames a list of column names.
085 * @param characterEncoding the character encoding of the input.
086 */
087 public CSVParser(Reader in, List columnNames, String characterEncoding)
088 {
089 super(in, columnNames, characterEncoding);
090 }
091
092 /**
093 * Initialize the StreamTokenizer instance used to read the lines
094 * from the input reader.
095 */
096 protected void initTokenizer(StreamTokenizer tokenizer)
097 {
098 // set all numeric characters as ordinary characters
099 // (switches off number parsing)
100 tokenizer.ordinaryChars('0', '9');
101 tokenizer.ordinaryChars('-', '-');
102 tokenizer.ordinaryChars('.', '.');
103
104 // set all printable characters to be treated as word chars
105 tokenizer.wordChars(' ', Integer.MAX_VALUE);
106
107 // now set comma as the whitespace character
108 tokenizer.whitespaceChars(',', ',');
109
110 // and set the quote mark as the quoting character
111 tokenizer.quoteChar('"');
112
113 // and finally say that end of line is significant
114 tokenizer.eolIsSignificant(true);
115 }
116 }