Parser / Formatter building

The CloverETL engine provides support for reader and writer development. The developer can utilize the benefits that the parser and formatter interfaces offer to create their own component faster and more efficiently. These interfaces have many functions that other Clover classes use. The easiest way to understand how to use the parser and formatter is to look at the Clover DataReader or DataWriter source code. This document introduces some examples and suggest a variety of formats to use.

Parser

Most of Clover readers use MultiFileReader which needs an instance of the parser interface. After receiving this the Multifile reader provides support to different features, prepares input streams from the URL for the parser and supports 'incremental reading' or 'autofilling'. The developer only implements a set of functions. Of course some of these functions can be empty while others are more important:

  • init/reset - you can initialize/reset your parser, the reset method is used in CloverServer.
  • setDataSource - the multifile reader passes ReadableByteChannel as an input
  • getNext - the parser have to return output record or null if there is no input
  • close - the parser releases its resources


The parser interface

   public interface Parser {
 
	/**
	 *  Initialization of data parser by given metadata.
	 *
	 * @param  _metadata  Description of Parameter
	 */
	public void init(DataRecordMetadata _metadata) throws ComponentNotReadyException;
 
        /**
	 * Reset parser for next graph execution. 
         */
	public void reset() throws ComponentNotReadyException;
 
        /**
         * Sets input data source. Some of parsers allow to call this method repeatedly.
         * 
         * @param inputDataSource
         */
        public void setDataSource(Object inputDataSource) throws ComponentNotReadyException;
 
	/**
	 *  An operation that produces next record from Input data or null. The method can be used for multi output port.
	 *
	 * @return                     The Next value
	 */
	public DataRecord getNext() throws JetelException;
 
	/**
	 *  An operation that produces next record from Input data or null. The method is used for one output port.
         *
	 * @return                  The Next value
	 * @param record
	 */
	public DataRecord getNext(DataRecord record) throws JetelException;
 
	/**
	 * Skips specified number of records.
         *
	 * @param nRec Number of records to be skipped. 
	 * @return Number of skipped records.
	 * @throws JetelException
	 */
	public int skip(int nRec) throws JetelException;
 
       /**
         * If releaseInputSource is false, the previous input data source is not released (input stream is not closed).
         * The input data source release is performing into the method 'setDataSource'. Default value is true.
         * 
         * @param releaseInputSource
         */
       public void setReleaseDataSource(boolean releaseInputSource);
 
	/**
	 *  Closing/deinitialization of parser
	 */
	public void close();
 
	/**
	 * Sets exception handler for exceptions.
         *
	 * @param handler
	 */
	public void setExceptionHandler(IParserExceptionHandler handler);
 
	/**
	 * Gets exception handler for exceptions.
	 */
        public IParserExceptionHandler getExceptionHandler();
 
	/**
	 * How to handle an exception.
	 */
        public PolicyType getPolicyType();
 
	/**
	 * Gets current position of source file (used for incremental reading).
	 * 
	 * @return position
	 */
	public Object getPosition();
 
	/**
	 * Sets position for increamental reading.
	 * 
	 * @param position
	 */
	public void movePosition(Object position) throws IOException;
   }

Examples

These examples show how to use the parser in a reader.

  /**
    * An example how to integrate the parser to general reader.
    */
  public class DataReader extends Node {
        // multifile reader calls the parser
        private MultiFileReader reader;
 
        @Override
        public void init() throws ComponentNotReadyException {
                DataParser parser = new DataParser();
                reader = new MultiFileReader(parser, getGraph().getProjectURL(), fileURL);
                ...
                reader.init(getOutputPort(OUTPUT_PORT).getMetadata()); // multifile reader initializes the parser
        }
 
        @Override
	public synchronized void reset() throws ComponentNotReadyException {
                super.reset();
                reader.reset();           // multifile reader calls parser.reset()
        }
 
        @Override
        public Result execute() throws Exception {
                DataRecord record = new DataRecord(getOutputPort(OUTPUT_PORT).getMetadata());
                ...
                reader.getNext(record);   // multifile reader calls parser.getNext(record)
        }
 
        @Override
        public synchronized void free() {
                super.free();
                reader.close();           // multifile reader calls parser.close()
        }
  }
 
  /**
    * An example of the parser.
    */
  public class DataParser implements Parser {
 
	private IParserExceptionHandler exceptionHandler;
	private DataRecordMetadata metadata;
	private ReadableByteChannel reader;
	private ByteBuffer byteBuffer;
	private boolean releaseInputSource = true;
 
	/*
	 * @see org.jetel.data.parser.Parser#init(org.jetel.metadata.DataRecordMetadata)
	 */
	public void init(DataRecordMetadata metadata) throws ComponentNotReadyException {
		if (metadata == null) {
			throw new ComponentNotReadyException("Metadata are null");
		}
		this.metadata = metadata;
		byteBuffer = ByteBuffer.allocateDirect(Defaults.Record.MAX_RECORD_SIZE);
	}
 
	/*
	 * @see org.jetel.data.parser.Parser#reset()
	 */
	public void reset() {
		if (releaseInputSource)
			releaseDataSource();
		byteBuffer.reset();// reset CharsetDecoder
	}
 
	/*
	 * @see org.jetel.data.parser.Parser#getNext(org.jetel.data.DataRecord)
	 */
	public DataRecord getNext(DataRecord record) throws JetelException {
		record = parseNext(record);
		if (exceptionHandler != null) { // use handler only if configured
			while (exceptionHandler.isExceptionThrowed()) {
				exceptionHandler.setRawRecord(getLastRawRecord());
				exceptionHandler.handleException();
				record = parseNext(record);
			}
		}
		return record;
	}
 
	/*
	 * @see org.jetel.data.parser.Parser#setDataSource(java.lang.Object)
	 */
	public void setDataSource(Object inputDataSource) {
		if (releaseInputSource)
			releaseDataSource(); // close resource or something like that
		byteBuffer.clear();
		byteBuffer.flip();
	}
 
	/*
	 * @see org.jetel.data.parser.Parser#close()
	 */
	public void close() {
		if (reader != null && reader.isOpen()) {
			try {
				reader.close();
			} catch (IOException ex) {
				ex.printStackTrace();
			}
		}
	}
 
        ...
  }


Formatter

As Parser, the formatter is used in the MultiFileWriter. The Multifile writer prepares output streams from a URL for the formatter and also supports 'partition'. The developer only needs to implements a set of functions. Of course some of these functions can be empty while others are more important:

  • init/reset - you can initialize/reset your formatter, the reset method is used in CloverServer.
  • setDataTarget - the multifile writter passes WritableByteChannel as an output
  • write - the formater writes output records to output
  • close - the formater releases its resources


The formatter interface

  public interface Formatter {
 
        /**
         * Initialization of data formatter by given metadata.
         *
         *@param  _metadata  Description of the Parameter
         */
        public void init(DataRecordMetadata _metadata) throws ComponentNotReadyException;
 
        /**
         * Reset formatter by given metadata.
         *
         *@param  _metadata  Description of the Parameter
         */
        public void reset();
 
        /**
         * Sets output data destination. Some of formatters allow to call this method repeatedly.
         * 
         * @param outputDataTarget
         */
        public void setDataTarget(Object outputDataTarget);
 
        /**
         * Formats data record based on provided metadata
         *
         *@param  record           Data record to format and send to output stream
         *@exception  IOException  Description of the Exception
         */
        public int write(DataRecord record) throws IOException;
 
        /**
         * Formats header based on provided metadata
         * 
         * @throws IOException
         */
        public int writeHeader() throws IOException;
 
        /**
         * Formats footer based on provided metadata
         * 
         * @throws IOException
         */
        public int writeFooter() throws IOException;
 
        /**
         * Flush any unwritten data into output stream
         * 
         * @throws IOException
         */
        public void flush() throws IOException;
 
        /**
         * This method writes all data (header, body, footer) which are to write, but doesn't close underlying streams.
         * 
         * @throws IOException
         */
        public void finish() throws IOException;
 
        /**
         *  Closing/deinitialization of formatter
         */
        public void close();
  }
 
  /**
    * The multifile writer needs FormatterProvider that cretes data formatters.
    */
  public interface FormatterProvider {
 
	/**
	 * Creates new data formatter.
	 * 
	 * @return data formatter
	 */
	public Formatter getNewFormatter();
  }


Examples

These examples show how to use the formatter in a writer.

  /**
    * An example how to integrate the formatter to general writer.
    */
  public class DataWriter extends Node {
        private DataFormatterProvider formatterProvider;
        private MultiFileWriter writer;
        private WritableByteChannel writableByteChannel;
 
        private final static int READ_FROM_PORT = 0;
 
	@Override
        public Result execute() throws Exception {
                InputPort inPort = getInputPort(READ_FROM_PORT);
                DataRecord record = new DataRecord(inPort.getMetadata());
                record.init();
                while (record != null && runIt) {
                        record = inPort.readRecord(record);
                        if (record != null) {
                                writer.write(record);        // multifile writer calls formatter.write(record)
                        }
                        SynchronizeUtils.cloverYield();
                }
                writer.finish();                             // multifile writer calls formatter.finish(record)
                return runIt ? Result.FINISHED_OK : Result.ABORTED;
        }
 
        @Override
        public void init() throws ComponentNotReadyException {
                if(isInitialized()) return;
                super.init();
                writer = new MultiFileWriter(formatterProvider, getGraph().getProjectURL(), fileURL);
                writer.init(getInputPort(READ_FROM_PORT).getMetadata()); // multifile writer calls formatter.init()
        }
 
        @Override
        public synchronized void reset() throws ComponentNotReadyException {
                super.reset();
                writer.reset();                    // multifile writer calls formatter.reset()
        }
 
        @Override
        public synchronized void free() {
                super.free();
 
                if(writer != null) {
                        writer.close();            // multifile writer calls formatter.close()
        }
        ...
  }
 
  /**
    * An example of the formatter provider.
    */
  public class DataFormatterProvider implements FormatterProvider {
 
	/**
	 * Creates new data formatter.
	 * 
	 * @return data formatter
	 */
	public Formatter getNewFormatter() {
		return new DataFormatter();
	}
  }
 
  /**
    * An example of the formatter.
    */
  public class DataFormatter implements Formatter {
        private DataRecordMetadata metadata;
        private WritableByteChannel writer;
        private ByteBuffer dataBuffer;
 
	/* (non-Javadoc)
	 * @see org.jetel.data.formatter.Formatter#init(org.jetel.metadata.DataRecordMetadata)
	 */
        public void init(DataRecordMetadata _metadata) {
                metadata = _metadata;
        }
 
        /* (non-Javadoc)
         * @see org.jetel.data.formatter.Formatter#reset()
         */
        public void reset() {
                if (writer != null && writer.isOpen()) {
                        try {
                                flush();
                                writer.close();
                        } catch (IOException ex){
                                ex.printStackTrace();
                        }
                }
                encoder.reset();
        }
 
        /* (non-Javadoc)
         * @see org.jetel.data.formatter.Formatter#setDataTarget(java.lang.Object)
         */
        public void setDataTarget(Object out) {
                close();
 
                // create buffered output stream reader 
                if (out == null) {
                        writer = null;
                } else if (out instanceof WritableByteChannel) {
                        writer = (WritableByteChannel) out;
                } else {
                        riter = Channels.newChannel((OutputStream) out);
                }
        }
 
        /* (non-Javadoc)
         * @see org.jetel.data.formatter.Formatter#write(org.jetel.data.DataRecord)
         */
        public int write(DataRecord record) throws IOException {
                return 0;
        }
 
        /* (non-Javadoc)
         * @see org.jetel.data.formatter.Formatter#close()
         */
        public void close() {
                if (writer == null || !writer.isOpen()) {
                        return;
                }
                try {
                        flush();
                        writer.close();
                } catch(IOException ex) {
                        ex.printStackTrace();
                }
                writer = null;
        }
 
        /* (non-Javadoc)
         * @see org.jetel.data.formatter.Formatter#finish()
         */
        public void finish() throws IOException {
                dataBuffer.flip();
                writer.write(dataBuffer);
                dataBuffer.clear();
        }
        ...
  }

parser_formatter_building.txt · Last modified: 2009/09/16 12:27 (external edit)
Back to top
chimeric.de = chi`s home Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0