001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.tap.local;
023
024import java.io.File;
025import java.io.FileInputStream;
026import java.io.IOException;
027import java.io.InputStream;
028import java.io.OutputStream;
029import java.nio.file.Files;
030import java.nio.file.Path;
031import java.nio.file.Paths;
032import java.util.LinkedHashSet;
033import java.util.Properties;
034import java.util.Set;
035import java.util.concurrent.TimeUnit;
036
037import cascading.flow.FlowProcess;
038import cascading.scheme.Scheme;
039import cascading.tap.SinkMode;
040import cascading.tap.Tap;
041import cascading.tap.local.io.TapFileOutputStream;
042import cascading.tap.type.FileType;
043import cascading.tuple.TupleEntryCollector;
044import cascading.tuple.TupleEntryIterator;
045import cascading.tuple.TupleEntrySchemeCollector;
046import cascading.tuple.TupleEntrySchemeIterator;
047
048/**
049 * Class FileTap is a {@link Tap} sub-class that allows for direct local file access.
050 * <p>
051 * This class can only open an single file, see {@link DirTap} for reading from a directory tree.
052 * <p>
053 * FileTap must be used with the {@link cascading.flow.local.LocalFlowConnector} to create
054 * {@link cascading.flow.Flow} instances that run in "local" mode.
055 */
056public class FileTap extends Tap<Properties, InputStream, OutputStream> implements FileType<Properties>
057  {
058  private final Path path;
059
060  /**
061   * Constructor FileTap creates a new FileTap instance using the given {@link cascading.scheme.Scheme} and file {@code path}.
062   *
063   * @param scheme of type Scheme
064   * @param path   of type String
065   */
066  public FileTap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String path )
067    {
068    this( scheme, path, SinkMode.KEEP );
069    }
070
071  /**
072   * Constructor FileTap creates a new FileTap instance using the given {@link cascading.scheme.Scheme} and file {@code path}.
073   *
074   * @param scheme of type Scheme
075   * @param path   of type Path
076   */
077  public FileTap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, Path path )
078    {
079    this( scheme, path, SinkMode.KEEP );
080    }
081
082  /**
083   * Constructor FileTap creates a new FileTap instance using the given {@link cascading.scheme.Scheme},
084   * file {@code path}, and {@code SinkMode}.
085   *
086   * @param scheme   of type Scheme
087   * @param path     of type String
088   * @param sinkMode of type SinkMode
089   */
090  public FileTap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String path, SinkMode sinkMode )
091    {
092    super( scheme, sinkMode );
093    this.path = Paths.get( path ); // cleans path information
094
095    verify();
096    }
097
098  /**
099   * Constructor FileTap creates a new FileTap instance using the given {@link cascading.scheme.Scheme},
100   * file {@code path}, and {@code SinkMode}.
101   *
102   * @param scheme   of type Scheme
103   * @param path     of type String
104   * @param sinkMode of type SinkMode
105   */
106  public FileTap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, Path path, SinkMode sinkMode )
107    {
108    super( scheme, sinkMode );
109    this.path = path;
110
111    verify();
112    }
113
114  protected void verify()
115    {
116    if( getPath() == null )
117      throw new IllegalArgumentException( "path may not be null" );
118    }
119
120  protected Path getPath()
121    {
122    return path;
123    }
124
125  @Override
126  public String getIdentifier()
127    {
128    return path.toString();
129    }
130
131  @Override
132  public String getFullIdentifier( Properties conf )
133    {
134    return getPath().toAbsolutePath().toUri().toString();
135    }
136
137  private String fullyQualifyIdentifier( String identifier )
138    {
139    return new File( identifier ).getAbsoluteFile().toURI().toString();
140    }
141
142  @Override
143  public TupleEntryIterator openForRead( FlowProcess<? extends Properties> flowProcess, InputStream input ) throws IOException
144    {
145    if( input == null )
146      input = new FileInputStream( getIdentifier() );
147
148    flowProcess.getFlowProcessContext().setSourcePath( getFullIdentifier( flowProcess ) );
149
150    return new TupleEntrySchemeIterator<Properties, InputStream>( flowProcess, this, getScheme(), input, getIdentifier() );
151    }
152
153  @Override
154  public TupleEntryCollector openForWrite( FlowProcess<? extends Properties> flowProcess, OutputStream output ) throws IOException
155    {
156    if( output == null )
157      output = new TapFileOutputStream( getOutputIdentifier(), isUpdate() ); // append if we are in update mode
158
159    return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, this, getScheme(), output, getIdentifier() );
160    }
161
162  /**
163   * Only used with {@link #openForWrite(FlowProcess, OutputStream)} calls.
164   */
165  protected String getOutputIdentifier()
166    {
167    return getIdentifier();
168    }
169
170  @Override
171  public boolean createResource( Properties conf ) throws IOException
172    {
173    File parentFile = new File( getIdentifier() ).getParentFile(); // parent dir
174
175    return parentFile.exists() || parentFile.mkdirs();
176    }
177
178  @Override
179  public boolean deleteResource( Properties conf ) throws IOException
180    {
181    return Files.deleteIfExists( getPath() );
182    }
183
184  @Override
185  public boolean commitResource( Properties conf ) throws IOException
186    {
187    return true;
188    }
189
190  @Override
191  public boolean resourceExists( Properties conf ) throws IOException
192    {
193    return Files.exists( getPath() );
194    }
195
196  @Override
197  public long getModifiedTime( Properties conf ) throws IOException
198    {
199    return Files.getLastModifiedTime( getPath() ).to( TimeUnit.MILLISECONDS );
200    }
201
202  @Override
203  public boolean isDirectory( FlowProcess<? extends Properties> flowProcess ) throws IOException
204    {
205    return isDirectory( flowProcess.getConfig() );
206    }
207
208  @Override
209  public boolean isDirectory( Properties conf ) throws IOException
210    {
211    return Files.isDirectory( getPath() );
212    }
213
214  @Override
215  public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess ) throws IOException
216    {
217    return getChildIdentifiers( flowProcess.getConfig() );
218    }
219
220  @Override
221  public String[] getChildIdentifiers( Properties conf ) throws IOException
222    {
223    return getChildIdentifiers( conf, 1, false );
224    }
225
226  @Override
227  public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess, int depth, boolean fullyQualified ) throws IOException
228    {
229    return getChildIdentifiers( flowProcess.getConfig(), depth, fullyQualified );
230    }
231
232  @Override
233  public String[] getChildIdentifiers( Properties conf, int depth, boolean fullyQualified ) throws IOException
234    {
235    if( !resourceExists( conf ) )
236      return new String[ 0 ];
237
238    Set<String> results = new LinkedHashSet<String>();
239
240    getChildPaths( results, getIdentifier(), depth );
241
242    String[] allPaths = results.toArray( new String[ results.size() ] );
243
244    if( !fullyQualified )
245      return allPaths;
246
247    for( int i = 0; i < allPaths.length; i++ )
248      allPaths[ i ] = fullyQualifyIdentifier( allPaths[ i ] );
249
250    return allPaths;
251    }
252
253  @Override
254  public long getSize( FlowProcess<? extends Properties> flowProcess ) throws IOException
255    {
256    return getSize( flowProcess.getConfig() );
257    }
258
259  @Override
260  public long getSize( Properties conf ) throws IOException
261    {
262    File file = new File( getIdentifier() );
263
264    if( file.isDirectory() )
265      return 0;
266
267    return file.length();
268    }
269
270  private boolean getChildPaths( Set<String> results, String identifier, int depth )
271    {
272    File file = new File( identifier );
273
274    if( depth == 0 || file.isFile() )
275      {
276      results.add( identifier );
277      return true;
278      }
279
280    String[] paths = file.list();
281
282    if( paths == null )
283      return false;
284
285    boolean result = false;
286
287    for( String path : paths )
288      result |= getChildPaths( results, new File( file, path ).getPath(), depth - 1 );
289
290    return result;
291    }
292  }