001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.scheme.hadoop;
023
024import java.beans.ConstructorProperties;
025import java.io.IOException;
026
027import cascading.flow.FlowProcess;
028import cascading.scheme.SinkCall;
029import cascading.scheme.SourceCall;
030import cascading.tap.Tap;
031import cascading.tuple.Fields;
032import cascading.tuple.Tuple;
033import cascading.tuple.TupleEntry;
034import org.apache.hadoop.conf.Configuration;
035import org.apache.hadoop.io.NullWritable;
036import org.apache.hadoop.io.Writable;
037import org.apache.hadoop.mapred.OutputCollector;
038import org.apache.hadoop.mapred.OutputFormat;
039import org.apache.hadoop.mapred.RecordReader;
040import org.apache.hadoop.mapred.SequenceFileOutputFormat;
041
042/**
043 * Class WritableSequenceFile is a sub-class of {@link SequenceFile} that reads and writes values of the given
044 * {@code writableType} {@code Class}, instead of {@link Tuple} instances used by default in SequenceFile.
045 * <p>
046 * This Class is a convenience for those who need to read/write specific types from existing sequence files without
047 * them being wrapped in a Tuple instance.
048 * <p>
049 * Note due to the nature of sequence files, only one type can be stored in the key and value positions, they they can be
050 * uniquely different types (LongWritable, Text).
051 * <p>
052 * If keyType is null, valueType must not be null, and vice versa, assuming you only wish to store a single value.
053 * <p>
054 * {@link NullWritable} is used as the empty type for either a null keyType or valueType.
055 */
056public class WritableSequenceFile extends SequenceFile
057  {
058  protected final Class<? extends Writable> keyType;
059  protected final Class<? extends Writable> valueType;
060
061  /**
062   * Constructor WritableSequenceFile creates a new WritableSequenceFile instance.
063   *
064   * @param fields    of type Fields
065   * @param valueType of type Class, may not be null
066   */
067  @ConstructorProperties({"fields", "valueType"})
068  public WritableSequenceFile( Fields fields, Class<? extends Writable> valueType )
069    {
070    this( fields, null, valueType );
071    }
072
073  /**
074   * Constructor WritableSequenceFile creates a new WritableSequenceFile instance.
075   *
076   * @param fields    of type Fields
077   * @param keyType   of type Class
078   * @param valueType of type Class
079   */
080  @ConstructorProperties({"fields", "keyType", "valueType"})
081  public WritableSequenceFile( Fields fields, Class<? extends Writable> keyType, Class<? extends Writable> valueType )
082    {
083    super( fields );
084    this.keyType = keyType;
085    this.valueType = valueType;
086
087    if( keyType == null && valueType == null )
088      throw new IllegalArgumentException( "both keyType and valueType may not be null" );
089
090    if( keyType == null && fields.size() != 1 )
091      throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'keys' from a sequence file" );
092    else if( valueType == null && fields.size() != 1 )
093      throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'values' from a sequence file" );
094    else if( keyType != null && valueType != null && fields.size() != 2 )
095      throw new IllegalArgumentException( "fields must declare exactly two fields when only reading/writing 'keys' and 'values' from a sequence file" );
096    }
097
098  @Override
099  public void sinkConfInit( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf )
100    {
101    if( keyType != null )
102      conf.setClass( "mapred.output.key.class", keyType, Object.class );
103    else
104      conf.setClass( "mapred.output.key.class", NullWritable.class, Object.class );
105
106    if( valueType != null )
107      conf.setClass( "mapred.output.value.class", valueType, Object.class );
108    else
109      conf.setClass( "mapred.output.value.class", NullWritable.class, Object.class );
110
111    conf.setClass( "mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class );
112    }
113
114  @Override
115  public boolean source( FlowProcess<? extends Configuration> flowProcess, SourceCall<Object[], RecordReader> sourceCall ) throws IOException
116    {
117    Object key = sourceCall.getContext()[ 0 ];
118    Object value = sourceCall.getContext()[ 1 ];
119    boolean result = sourceCall.getInput().next( key, value );
120
121    if( !result )
122      return false;
123
124    int count = 0;
125    TupleEntry entry = sourceCall.getIncomingEntry();
126
127    if( keyType != null )
128      entry.setObject( count++, key );
129
130    if( valueType != null )
131      entry.setObject( count, value );
132
133    return true;
134    }
135
136  @Override
137  public void sink( FlowProcess<? extends Configuration> flowProcess, SinkCall<Void, OutputCollector> sinkCall ) throws IOException
138    {
139    TupleEntry tupleEntry = sinkCall.getOutgoingEntry();
140
141    Writable keyValue = NullWritable.get();
142    Writable valueValue = NullWritable.get();
143
144    if( keyType == null )
145      {
146      valueValue = (Writable) tupleEntry.getObject( 0 );
147      }
148    else if( valueType == null )
149      {
150      keyValue = (Writable) tupleEntry.getObject( 0 );
151      }
152    else
153      {
154      keyValue = (Writable) tupleEntry.getObject( 0 );
155      valueValue = (Writable) tupleEntry.getObject( 1 );
156      }
157
158    sinkCall.getOutput().collect( keyValue, valueValue );
159    }
160
161  @Override
162  public boolean equals( Object object )
163    {
164    if( this == object )
165      return true;
166    if( !( object instanceof WritableSequenceFile ) )
167      return false;
168    if( !super.equals( object ) )
169      return false;
170
171    WritableSequenceFile that = (WritableSequenceFile) object;
172
173    if( keyType != null ? !keyType.equals( that.keyType ) : that.keyType != null )
174      return false;
175    if( valueType != null ? !valueType.equals( that.valueType ) : that.valueType != null )
176      return false;
177
178    return true;
179    }
180
181  @Override
182  public int hashCode()
183    {
184    int result = super.hashCode();
185    result = 31 * result + ( keyType != null ? keyType.hashCode() : 0 );
186    result = 31 * result + ( valueType != null ? valueType.hashCode() : 0 );
187    return result;
188    }
189  }