001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.tuple;
023
024import java.io.Closeable;
025import java.io.IOException;
026import java.util.Collections;
027import java.util.Set;
028import java.util.function.Supplier;
029
030import cascading.flow.FlowProcess;
031import cascading.scheme.ConcreteCall;
032import cascading.scheme.Scheme;
033import cascading.tap.Tap;
034import cascading.util.CloseableIterator;
035import cascading.util.SingleCloseableInputIterator;
036import cascading.util.Util;
037import org.slf4j.Logger;
038import org.slf4j.LoggerFactory;
039
040/**
041 * Class TupleEntrySchemeIterator is a helper class for wrapping a {@link Scheme} instance, calling
042 * {@link Scheme#source(cascading.flow.FlowProcess, cascading.scheme.SourceCall)} on every call to
043 * {@link #next()}. The behavior can be controlled via properties defined in {@link TupleEntrySchemeIteratorProps}.
044 * <p>
045 * Use this class inside a custom {@link cascading.tap.Tap} when overriding the
046 * {@link cascading.tap.Tap#openForRead(cascading.flow.FlowProcess)} method.
047 */
048public class TupleEntrySchemeIterator<Config, Input> extends TupleEntryIterator
049  {
050  /** Field LOG */
051  private static final Logger LOG = LoggerFactory.getLogger( TupleEntrySchemeIterator.class );
052
053  private final FlowProcess<? extends Config> flowProcess;
054  private final Scheme<Config, Input, ?, Object, ?> scheme;
055  private final CloseableIterator<Input> inputIterator;
056  private final Set<Class<? extends Exception>> permittedExceptions;
057  private ConcreteCall sourceCall;
058
059  private Supplier<String> loggableIdentifier = () -> "'unknown'";
060  private boolean isComplete = false;
061  private boolean hasWaiting = false;
062  private TupleException currentException;
063
064  @Deprecated
065  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Scheme scheme, Input input )
066    {
067    this( flowProcess, scheme, input, null );
068    }
069
070  @Deprecated
071  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Scheme scheme, Input input, String loggableIdentifier )
072    {
073    this( flowProcess, scheme, (CloseableIterator<Input>) new SingleCloseableInputIterator( (Closeable) input ), loggableIdentifier );
074    }
075
076  @Deprecated
077  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Scheme scheme, CloseableIterator<Input> inputIterator )
078    {
079    this( flowProcess, scheme, inputIterator, null );
080    }
081
082  @Deprecated
083  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Scheme scheme, CloseableIterator<Input> inputIterator, String loggableIdentifier )
084    {
085    this( flowProcess, null, scheme, inputIterator, loggableIdentifier );
086    }
087
088  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, Input input )
089    {
090    this( flowProcess, tap, scheme, input, (Supplier<String>) null );
091    }
092
093  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, Input input, String loggableIdentifier )
094    {
095    this( flowProcess, tap, scheme, (CloseableIterator<Input>) new SingleCloseableInputIterator( (Closeable) input ), loggableIdentifier );
096    }
097
098  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, Input input, Supplier<String> loggableIdentifier )
099    {
100    this( flowProcess, tap, scheme, (CloseableIterator<Input>) new SingleCloseableInputIterator( (Closeable) input ), loggableIdentifier );
101    }
102
103  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, CloseableIterator<Input> inputIterator )
104    {
105    this( flowProcess, tap, scheme, inputIterator, (Supplier<String>) null );
106    }
107
108  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, CloseableIterator<Input> inputIterator, String loggableIdentifier )
109    {
110    this( flowProcess, tap, scheme, inputIterator, loggableIdentifier == null ? null : () -> loggableIdentifier );
111    }
112
113  public TupleEntrySchemeIterator( FlowProcess<? extends Config> flowProcess, Tap tap, Scheme scheme, CloseableIterator<Input> inputIterator, Supplier<String> loggableIdentifier )
114    {
115    super( scheme.getSourceFields() );
116    this.flowProcess = flowProcess;
117    this.scheme = scheme;
118    this.inputIterator = inputIterator;
119
120    Object permittedExceptions = flowProcess.getProperty( TupleEntrySchemeIteratorProps.PERMITTED_EXCEPTIONS );
121
122    if( permittedExceptions != null )
123      this.permittedExceptions = Util.asClasses( permittedExceptions.toString(), "unable to load permitted exception class" );
124    else
125      this.permittedExceptions = Collections.emptySet();
126
127    // honor provided loggableIdentifier value
128    if( tap != null && loggableIdentifier == null )
129      this.loggableIdentifier = tap::getIdentifier;
130    else if( loggableIdentifier != null )
131      this.loggableIdentifier = loggableIdentifier;
132
133    if( !inputIterator.hasNext() )
134      {
135      isComplete = true;
136      return;
137      }
138
139    sourceCall = new ConcreteCall();
140
141    sourceCall.setTap( tap );
142    sourceCall.setIncomingEntry( getTupleEntry() );
143    sourceCall.setInput( wrapInput( inputIterator.next() ) );
144
145    try
146      {
147      this.scheme.sourcePrepare( flowProcess, sourceCall );
148      }
149    catch( IOException exception )
150      {
151      throw new TupleException( "unable to prepare source for input identifier: " + this.loggableIdentifier.get(), exception );
152      }
153    }
154
155  protected FlowProcess<? extends Config> getFlowProcess()
156    {
157    return flowProcess;
158    }
159
160  protected Input wrapInput( Input input )
161    {
162    try
163      {
164      return scheme.sourceWrap( flowProcess, input );
165      }
166    catch( IOException exception )
167      {
168      throw new TupleException( "unable to wrap source for input identifier: " + this.loggableIdentifier.get(), exception );
169      }
170    }
171
172  @Override
173  public boolean hasNext()
174    {
175    if( currentException != null )
176      return true;
177
178    if( isComplete )
179      return false;
180
181    if( hasWaiting )
182      return true;
183
184    try
185      {
186      getNext();
187      }
188    catch( Exception exception )
189      {
190      if( permittedExceptions.contains( exception.getClass() ) )
191        {
192        LOG.warn( "Caught permitted exception while reading {}", loggableIdentifier.get(), exception );
193        return false;
194        }
195
196      currentException = new TupleException( "unable to read from input identifier: " + loggableIdentifier.get(), exception );
197
198      return true;
199      }
200
201    if( !hasWaiting )
202      isComplete = true;
203
204    return !isComplete;
205    }
206
207  private TupleEntry getNext() throws IOException
208    {
209    Tuples.asModifiable( sourceCall.getIncomingEntry().getTuple() );
210    hasWaiting = scheme.source( flowProcess, sourceCall );
211
212    while( !hasWaiting && inputIterator.hasNext() )
213      {
214      sourceCall.setInput( wrapInput( inputIterator.next() ) );
215
216      try
217        {
218        scheme.sourceRePrepare( flowProcess, sourceCall );
219        }
220      catch( IOException exception )
221        {
222        throw new TupleException( "unable to prepare source for input identifier: " + loggableIdentifier.get(), exception );
223        }
224
225      Tuples.asModifiable( sourceCall.getIncomingEntry().getTuple() );
226      hasWaiting = scheme.source( flowProcess, sourceCall );
227      }
228
229    return getTupleEntry();
230    }
231
232  @Override
233  public TupleEntry next()
234    {
235    try
236      {
237      if( currentException != null )
238        throw currentException;
239      }
240    finally
241      {
242      currentException = null; // data may be trapped
243      }
244
245    if( isComplete )
246      throw new IllegalStateException( "no next element" );
247
248    try
249      {
250      if( hasWaiting )
251        return getTupleEntry();
252
253      return getNext();
254      }
255    catch( Exception exception )
256      {
257      throw new TupleException( "unable to source from input identifier: " + loggableIdentifier.get(), exception );
258      }
259    finally
260      {
261      hasWaiting = false;
262      }
263    }
264
265  @Override
266  public void remove()
267    {
268    throw new UnsupportedOperationException( "may not remove elements from this iterator" );
269    }
270
271  @Override
272  public void close() throws IOException
273    {
274    try
275      {
276      if( sourceCall != null )
277        scheme.sourceCleanup( flowProcess, sourceCall );
278      }
279    finally
280      {
281      inputIterator.close();
282      }
283    }
284  }