001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.scheme.hadoop; 023 024import java.beans.ConstructorProperties; 025import java.io.IOException; 026 027import cascading.flow.FlowProcess; 028import cascading.scheme.SinkCall; 029import cascading.scheme.SourceCall; 030import cascading.tap.Tap; 031import cascading.tuple.Fields; 032import cascading.tuple.Tuple; 033import cascading.tuple.TupleEntry; 034import org.apache.hadoop.conf.Configuration; 035import org.apache.hadoop.io.NullWritable; 036import org.apache.hadoop.io.Writable; 037import org.apache.hadoop.mapred.OutputCollector; 038import org.apache.hadoop.mapred.OutputFormat; 039import org.apache.hadoop.mapred.RecordReader; 040import org.apache.hadoop.mapred.SequenceFileOutputFormat; 041 042/** 043 * Class WritableSequenceFile is a sub-class of {@link SequenceFile} that reads and writes values of the given 044 * {@code writableType} {@code Class}, instead of {@link Tuple} instances used by default in SequenceFile. 045 * <p> 046 * This Class is a convenience for those who need to read/write specific types from existing sequence files without 047 * them being wrapped in a Tuple instance. 048 * <p> 049 * Note due to the nature of sequence files, only one type can be stored in the key and value positions, they they can be 050 * uniquely different types (LongWritable, Text). 051 * <p> 052 * If keyType is null, valueType must not be null, and vice versa, assuming you only wish to store a single value. 053 * <p> 054 * {@link NullWritable} is used as the empty type for either a null keyType or valueType. 055 */ 056public class WritableSequenceFile extends SequenceFile 057 { 058 protected final Class<? extends Writable> keyType; 059 protected final Class<? extends Writable> valueType; 060 061 /** 062 * Constructor WritableSequenceFile creates a new WritableSequenceFile instance. 063 * 064 * @param fields of type Fields 065 * @param valueType of type Class, may not be null 066 */ 067 @ConstructorProperties({"fields", "valueType"}) 068 public WritableSequenceFile( Fields fields, Class<? extends Writable> valueType ) 069 { 070 this( fields, null, valueType ); 071 } 072 073 /** 074 * Constructor WritableSequenceFile creates a new WritableSequenceFile instance. 075 * 076 * @param fields of type Fields 077 * @param keyType of type Class 078 * @param valueType of type Class 079 */ 080 @ConstructorProperties({"fields", "keyType", "valueType"}) 081 public WritableSequenceFile( Fields fields, Class<? extends Writable> keyType, Class<? extends Writable> valueType ) 082 { 083 super( fields ); 084 this.keyType = keyType; 085 this.valueType = valueType; 086 087 if( keyType == null && valueType == null ) 088 throw new IllegalArgumentException( "both keyType and valueType may not be null" ); 089 090 if( keyType == null && fields.size() != 1 ) 091 throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'keys' from a sequence file" ); 092 else if( valueType == null && fields.size() != 1 ) 093 throw new IllegalArgumentException( "fields must declare exactly one field when only reading/writing 'values' from a sequence file" ); 094 else if( keyType != null && valueType != null && fields.size() != 2 ) 095 throw new IllegalArgumentException( "fields must declare exactly two fields when only reading/writing 'keys' and 'values' from a sequence file" ); 096 } 097 098 @Override 099 public void sinkConfInit( FlowProcess<? extends Configuration> flowProcess, Tap<Configuration, RecordReader, OutputCollector> tap, Configuration conf ) 100 { 101 if( keyType != null ) 102 conf.setClass( "mapred.output.key.class", keyType, Object.class ); 103 else 104 conf.setClass( "mapred.output.key.class", NullWritable.class, Object.class ); 105 106 if( valueType != null ) 107 conf.setClass( "mapred.output.value.class", valueType, Object.class ); 108 else 109 conf.setClass( "mapred.output.value.class", NullWritable.class, Object.class ); 110 111 conf.setClass( "mapred.output.format.class", SequenceFileOutputFormat.class, OutputFormat.class ); 112 } 113 114 @Override 115 public boolean source( FlowProcess<? extends Configuration> flowProcess, SourceCall<Object[], RecordReader> sourceCall ) throws IOException 116 { 117 Object key = sourceCall.getContext()[ 0 ]; 118 Object value = sourceCall.getContext()[ 1 ]; 119 boolean result = sourceCall.getInput().next( key, value ); 120 121 if( !result ) 122 return false; 123 124 int count = 0; 125 TupleEntry entry = sourceCall.getIncomingEntry(); 126 127 if( keyType != null ) 128 entry.setObject( count++, key ); 129 130 if( valueType != null ) 131 entry.setObject( count, value ); 132 133 return true; 134 } 135 136 @Override 137 public void sink( FlowProcess<? extends Configuration> flowProcess, SinkCall<Void, OutputCollector> sinkCall ) throws IOException 138 { 139 TupleEntry tupleEntry = sinkCall.getOutgoingEntry(); 140 141 Writable keyValue = NullWritable.get(); 142 Writable valueValue = NullWritable.get(); 143 144 if( keyType == null ) 145 { 146 valueValue = (Writable) tupleEntry.getObject( 0 ); 147 } 148 else if( valueType == null ) 149 { 150 keyValue = (Writable) tupleEntry.getObject( 0 ); 151 } 152 else 153 { 154 keyValue = (Writable) tupleEntry.getObject( 0 ); 155 valueValue = (Writable) tupleEntry.getObject( 1 ); 156 } 157 158 sinkCall.getOutput().collect( keyValue, valueValue ); 159 } 160 161 @Override 162 public boolean equals( Object object ) 163 { 164 if( this == object ) 165 return true; 166 if( !( object instanceof WritableSequenceFile ) ) 167 return false; 168 if( !super.equals( object ) ) 169 return false; 170 171 WritableSequenceFile that = (WritableSequenceFile) object; 172 173 if( keyType != null ? !keyType.equals( that.keyType ) : that.keyType != null ) 174 return false; 175 if( valueType != null ? !valueType.equals( that.valueType ) : that.valueType != null ) 176 return false; 177 178 return true; 179 } 180 181 @Override 182 public int hashCode() 183 { 184 int result = super.hashCode(); 185 result = 31 * result + ( keyType != null ? keyType.hashCode() : 0 ); 186 result = 31 * result + ( valueType != null ? valueType.hashCode() : 0 ); 187 return result; 188 } 189 }