001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.tuple; 023 024import java.io.Closeable; 025import java.io.IOException; 026import java.util.Collections; 027import java.util.Set; 028import java.util.function.Supplier; 029 030import cascading.flow.FlowProcess; 031import cascading.scheme.ConcreteCall; 032import cascading.scheme.Scheme; 033import cascading.tap.Tap; 034import cascading.util.CloseableIterator; 035import cascading.util.SingleCloseableInputIterator; 036import cascading.util.Util; 037import org.slf4j.Logger; 038import org.slf4j.LoggerFactory; 039 040/** 041 * Class TupleEntrySchemeIterator is a helper class for wrapping a {@link Scheme} instance, calling 042 * {@link Scheme#source(cascading.flow.FlowProcess, cascading.scheme.SourceCall)} on every call to 043 * {@link #next()}. The behavior can be controlled via properties defined in {@link TupleEntrySchemeIteratorProps}. 044 * <p> 045 * Use this class inside a custom {@link cascading.tap.Tap} when overriding the 046 * {@link cascading.tap.Tap#openForRead(cascading.flow.FlowProcess)} method. 047 */ 048public class TupleEntrySchemeIterator<Config, Input> extends TupleEntryIterator 049 { 050 /** Field LOG */ 051 private static final Logger LOG = LoggerFactory.getLogger( TupleEntrySchemeIterator.class ); 052 053 private final FlowProcess<? extends Config> flowProcess; 054 private final Scheme<Config, Input, ?, Object, ?> scheme; 055 private final CloseableIterator<Input> inputIterator; 056 private final Set<Class<? extends Exception>> permittedExceptions; 057 private ConcreteCall sourceCall; 058 059 private Supplier<String> loggableIdentifier = () -> "'unknown'"; 060 private boolean isComplete = false; 061 private boolean hasWaiting = false; 062 private TupleException currentException; 063 064 @Deprecated 065 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Scheme scheme, Input input ) 066 { 067 this( flowProcess, scheme, input, null ); 068 } 069 070 @Deprecated 071 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Scheme scheme, Input input, String loggableIdentifier ) 072 { 073 this( flowProcess, scheme, (CloseableIterator<Input>) new SingleCloseableInputIterator( (Closeable) input ), loggableIdentifier ); 074 } 075 076 @Deprecated 077 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Scheme scheme, CloseableIterator<Input> inputIterator ) 078 { 079 this( flowProcess, scheme, inputIterator, null ); 080 } 081 082 @Deprecated 083 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Scheme scheme, CloseableIterator<Input> inputIterator, String loggableIdentifier ) 084 { 085 this( flowProcess, null, scheme, inputIterator, loggableIdentifier ); 086 } 087 088 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, Input input ) 089 { 090 this( flowProcess, tap, scheme, input, (Supplier<String>) null ); 091 } 092 093 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, Input input, String loggableIdentifier ) 094 { 095 this( flowProcess, tap, scheme, (CloseableIterator<Input>) new SingleCloseableInputIterator( (Closeable) input ), loggableIdentifier ); 096 } 097 098 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, Input input, Supplier<String> loggableIdentifier ) 099 { 100 this( flowProcess, tap, scheme, (CloseableIterator<Input>) new SingleCloseableInputIterator( (Closeable) input ), loggableIdentifier ); 101 } 102 103 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, CloseableIterator<Input> inputIterator ) 104 { 105 this( flowProcess, tap, scheme, inputIterator, (Supplier<String>) null ); 106 } 107 108 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, CloseableIterator<Input> inputIterator, String loggableIdentifier ) 109 { 110 this( flowProcess, tap, scheme, inputIterator, loggableIdentifier == null ? null : () -> loggableIdentifier ); 111 } 112 113 public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, CloseableIterator<Input> inputIterator, Supplier<String> loggableIdentifier ) 114 { 115 super( scheme.getSourceFields() ); 116 this.flowProcess = flowProcess; 117 this.scheme = scheme; 118 this.inputIterator = inputIterator; 119 120 Object permittedExceptions = flowProcess.getProperty( TupleEntrySchemeIteratorProps.PERMITTED_EXCEPTIONS ); 121 122 if( permittedExceptions != null ) 123 this.permittedExceptions = Util.asClasses( permittedExceptions.toString(), "unable to load permitted exception class" ); 124 else 125 this.permittedExceptions = Collections.emptySet(); 126 127 // honor provided loggableIdentifier value 128 if( tap != null && loggableIdentifier == null ) 129 this.loggableIdentifier = tap::getIdentifier; 130 else if( loggableIdentifier != null ) 131 this.loggableIdentifier = loggableIdentifier; 132 133 if( !inputIterator.hasNext() ) 134 { 135 isComplete = true; 136 return; 137 } 138 139 sourceCall = new ConcreteCall(); 140 141 sourceCall.setTap( tap ); 142 sourceCall.setIncomingEntry( getTupleEntry() ); 143 sourceCall.setInput( wrapInput( inputIterator.next() ) ); 144 145 try 146 { 147 this.scheme.sourcePrepare( flowProcess, sourceCall ); 148 } 149 catch( IOException exception ) 150 { 151 throw new TupleException( "unable to prepare source for input identifier: " + this.loggableIdentifier.get(), exception ); 152 } 153 } 154 155 protected FlowProcess<? extends Config> getFlowProcess() 156 { 157 return flowProcess; 158 } 159 160 protected Input wrapInput( Input input ) 161 { 162 try 163 { 164 return scheme.sourceWrap( flowProcess, input ); 165 } 166 catch( IOException exception ) 167 { 168 throw new TupleException( "unable to wrap source for input identifier: " + this.loggableIdentifier.get(), exception ); 169 } 170 } 171 172 @Override 173 public boolean hasNext() 174 { 175 if( currentException != null ) 176 return true; 177 178 if( isComplete ) 179 return false; 180 181 if( hasWaiting ) 182 return true; 183 184 try 185 { 186 getNext(); 187 } 188 catch( Exception exception ) 189 { 190 if( permittedExceptions.contains( exception.getClass() ) ) 191 { 192 LOG.warn( "Caught permitted exception while reading {}", loggableIdentifier.get(), exception ); 193 return false; 194 } 195 196 currentException = new TupleException( "unable to read from input identifier: " + loggableIdentifier.get(), exception ); 197 198 return true; 199 } 200 201 if( !hasWaiting ) 202 isComplete = true; 203 204 return !isComplete; 205 } 206 207 private TupleEntry getNext() throws IOException 208 { 209 Tuples.asModifiable( sourceCall.getIncomingEntry().getTuple() ); 210 hasWaiting = scheme.source( flowProcess, sourceCall ); 211 212 while( !hasWaiting && inputIterator.hasNext() ) 213 { 214 sourceCall.setInput( wrapInput( inputIterator.next() ) ); 215 216 try 217 { 218 scheme.sourceRePrepare( flowProcess, sourceCall ); 219 } 220 catch( IOException exception ) 221 { 222 throw new TupleException( "unable to prepare source for input identifier: " + loggableIdentifier.get(), exception ); 223 } 224 225 Tuples.asModifiable( sourceCall.getIncomingEntry().getTuple() ); 226 hasWaiting = scheme.source( flowProcess, sourceCall ); 227 } 228 229 return getTupleEntry(); 230 } 231 232 @Override 233 public TupleEntry next() 234 { 235 try 236 { 237 if( currentException != null ) 238 throw currentException; 239 } 240 finally 241 { 242 currentException = null; // data may be trapped 243 } 244 245 if( isComplete ) 246 throw new IllegalStateException( "no next element" ); 247 248 try 249 { 250 if( hasWaiting ) 251 return getTupleEntry(); 252 253 return getNext(); 254 } 255 catch( Exception exception ) 256 { 257 throw new TupleException( "unable to source from input identifier: " + loggableIdentifier.get(), exception ); 258 } 259 finally 260 { 261 hasWaiting = false; 262 } 263 } 264 265 @Override 266 public void remove() 267 { 268 throw new UnsupportedOperationException( "may not remove elements from this iterator" ); 269 } 270 271 @Override 272 public void close() throws IOException 273 { 274 try 275 { 276 if( sourceCall != null ) 277 scheme.sourceCleanup( flowProcess, sourceCall ); 278 } 279 finally 280 { 281 inputIterator.close(); 282 } 283 } 284 }