001/*
002 * Copyright (c) 2016-2017 Chris K Wensel. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.tuple;
023
024import java.io.Closeable;
025import java.io.IOException;
026import java.util.function.Supplier;
027
028/**
029 * Interface TupleEntryCollector is used to allow {@link cascading.operation.BaseOperation} instances to emit
030 * one or more result {@link Tuple} values.
031 * <p>
032 * The general rule in Cascading is if you are handed a Tuple, you cannot change or cache it. Attempts at modifying
033 * such a Tuple will result in an Exception. Preventing caching is harder, see below.
034 * <p>
035 * If you create the Tuple, you can re-use or modify it.
036 * <p>
037 * When calling {@link #add(Tuple)} or {@link #add(TupleEntry)}, you are passing a Tuple to the down stream pipes and
038 * operations. Since no downstream operation may modify or cache the Tuple instance, it is safe to re-use the Tuple
039 * instance when {@code add()} returns.
040 * <p>
041 * That said, Tuple copies do get cached in order to perform specific operations in the underlying platforms. Currently
042 * only a shallow copy is made (via the {@link Tuple} copy constructor). Thus, any mutable type or collection
043 * placed inside a Tuple will not be copied, but will likely be cached if a copy of the Tuple passed downstream is
044 * copied.
045 * <p>
046 * So any subsequent changes to that nested type or collection will be reflected in the cached copy, a likely
047 * source of hard to find errors.
048 * <p>
049 * There is currently no way to specify that a deep copy must be performed when making a Tuple copy.
050 */
051public abstract class TupleEntryCollector implements Closeable
052  {
053  protected TupleEntry tupleEntry = new TupleEntry( Fields.UNKNOWN, null, true );
054
055  protected TupleEntryCollector()
056    {
057    }
058
059  /**
060   * Constructor TupleCollector creates a new TupleCollector instance.
061   *
062   * @param declared of type Fields
063   */
064  public TupleEntryCollector( Fields declared )
065    {
066    setFields( declared );
067    }
068
069  public void setFields( Fields declared )
070    {
071    if( declared == null )
072      throw new IllegalArgumentException( "declared fields must not be null" );
073
074    if( declared.isUnknown() || declared.isAll() )
075      return;
076
077    // if operation declared ARGS, then the arguments are a selector and must be forced to declared
078    declared = Fields.asDeclaration( declared );
079
080    this.tupleEntry = new TupleEntry( declared, Tuple.size( declared.size() ), true );
081    }
082
083  /**
084   * Method addTupleEntry inserts the given {@link TupleEntry} into the outgoing stream. Note the method {@link #add(Tuple)} is
085   * more efficient as it simply calls {@link TupleEntry#getTuple()};
086   * <p>
087   * See {@link cascading.tuple.TupleEntryCollector} on when and how to re-use a Tuple instance.
088   *
089   * @param supplier of type Supplier
090   */
091  public void addTupleEntry( Supplier<TupleEntry> supplier )
092    {
093    add( supplier.get() );
094    }
095
096  /**
097   * Method add inserts the given {@link TupleEntry} into the outgoing stream. Note the method {@link #add(Tuple)} is
098   * more efficient as it simply calls {@link TupleEntry#getTuple()};
099   * <p>
100   * See {@link cascading.tuple.TupleEntryCollector} on when and how to re-use a Tuple instance.
101   *
102   * @param tupleEntry of type TupleEntry
103   */
104  public void add( TupleEntry tupleEntry )
105    {
106    Fields expectedFields = this.tupleEntry.getFields();
107    TupleEntry outgoingEntry = this.tupleEntry;
108
109    if( expectedFields.isUnknown() || expectedFields.equals( tupleEntry.getFields() ) )
110      outgoingEntry = tupleEntry;
111    else
112      outgoingEntry.setTuple( selectTupleFrom( tupleEntry, expectedFields ) );
113
114    safeCollect( outgoingEntry );
115    }
116
117  private Tuple selectTupleFrom( TupleEntry tupleEntry, Fields expectedFields )
118    {
119    try
120      {
121      return tupleEntry.selectTuple( expectedFields );
122      }
123    catch( TupleException exception )
124      {
125      Fields givenFields = tupleEntry.getFields();
126      String string = "given TupleEntry fields: " + givenFields.printVerbose();
127      string += " do not match the operation declaredFields: " + expectedFields.printVerbose();
128      string += ", operations must emit tuples that match the fields they declare as output";
129
130      throw new TupleException( string, exception );
131      }
132    }
133
134  /**
135   * Method addTuple inserts the given {@link Tuple} into the outgoing stream.
136   * <p>
137   * See {@link cascading.tuple.TupleEntryCollector} on when and how to re-use a Tuple instance.
138   *
139   * @param supplier of type Supplier
140   */
141  public void addTuple( Supplier<Tuple> supplier )
142    {
143    add( supplier.get() );
144    }
145
146  /**
147   * Method add inserts the given {@link Tuple} into the outgoing stream.
148   * <p>
149   * See {@link cascading.tuple.TupleEntryCollector} on when and how to re-use a Tuple instance.
150   *
151   * @param tuple of type Tuple
152   */
153  public void add( Tuple tuple )
154    {
155    if( !tupleEntry.getFields().isUnknown() && tupleEntry.getFields().size() != tuple.size() )
156      throw new TupleException( "operation added the wrong number of fields, expected: " + tupleEntry.getFields().print() + ", got result size: " + tuple.size() );
157
158    boolean isUnmodifiable = tuple.isUnmodifiable();
159
160    tupleEntry.setTuple( tuple );
161
162    try
163      {
164      safeCollect( tupleEntry );
165      }
166    finally
167      {
168      Tuples.setUnmodifiable( tuple, isUnmodifiable );
169      }
170    }
171
172  private void safeCollect( TupleEntry tupleEntry )
173    {
174    try
175      {
176      collect( tupleEntry );
177      }
178    catch( IOException exception )
179      {
180      throw new TupleException( "unable to collect tuple", exception );
181      }
182    }
183
184  protected abstract void collect( TupleEntry tupleEntry ) throws IOException;
185
186  /**
187   * Method close closes the underlying resource being written to.
188   * <p>
189   * This method should be called when when an instance is returned via
190   * {@link cascading.tap.Tap#openForWrite(cascading.flow.FlowProcess)}
191   * and no more {@link Tuple} instances will be written out.
192   * <p>
193   * This method must not be called when an instance is returned from {@code getOutputCollector()} from any of
194   * the relevant {@link cascading.operation.OperationCall} implementations (inside a Function, Aggregator, or Buffer).
195   */
196  @Override
197  public void close()
198    {
199    // do nothing
200    }
201  }