001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.tap.local; 023 024import java.io.File; 025import java.io.FileInputStream; 026import java.io.IOException; 027import java.io.InputStream; 028import java.io.OutputStream; 029import java.nio.file.Files; 030import java.nio.file.Path; 031import java.nio.file.Paths; 032import java.util.LinkedHashSet; 033import java.util.Properties; 034import java.util.Set; 035import java.util.concurrent.TimeUnit; 036 037import cascading.flow.FlowProcess; 038import cascading.scheme.Scheme; 039import cascading.tap.SinkMode; 040import cascading.tap.Tap; 041import cascading.tap.local.io.TapFileOutputStream; 042import cascading.tap.type.FileType; 043import cascading.tuple.TupleEntryCollector; 044import cascading.tuple.TupleEntryIterator; 045import cascading.tuple.TupleEntrySchemeCollector; 046import cascading.tuple.TupleEntrySchemeIterator; 047 048/** 049 * Class FileTap is a {@link Tap} sub-class that allows for direct local file access. 050 * <p> 051 * This class can only open an single file, see {@link DirTap} for reading from a directory tree. 052 * <p> 053 * FileTap must be used with the {@link cascading.flow.local.LocalFlowConnector} to create 054 * {@link cascading.flow.Flow} instances that run in "local" mode. 055 */ 056public class FileTap extends Tap<Properties, InputStream, OutputStream> implements FileType<Properties> 057 { 058 private final Path path; 059 060 /** 061 * Constructor FileTap creates a new FileTap instance using the given {@link cascading.scheme.Scheme} and file {@code path}. 062 * 063 * @param scheme of type Scheme 064 * @param path of type String 065 */ 066 public FileTap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String path ) 067 { 068 this( scheme, path, SinkMode.KEEP ); 069 } 070 071 /** 072 * Constructor FileTap creates a new FileTap instance using the given {@link cascading.scheme.Scheme} and file {@code path}. 073 * 074 * @param scheme of type Scheme 075 * @param path of type Path 076 */ 077 public FileTap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, Path path ) 078 { 079 this( scheme, path, SinkMode.KEEP ); 080 } 081 082 /** 083 * Constructor FileTap creates a new FileTap instance using the given {@link cascading.scheme.Scheme}, 084 * file {@code path}, and {@code SinkMode}. 085 * 086 * @param scheme of type Scheme 087 * @param path of type String 088 * @param sinkMode of type SinkMode 089 */ 090 public FileTap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, String path, SinkMode sinkMode ) 091 { 092 super( scheme, sinkMode ); 093 this.path = Paths.get( path ); // cleans path information 094 095 verify(); 096 } 097 098 /** 099 * Constructor FileTap creates a new FileTap instance using the given {@link cascading.scheme.Scheme}, 100 * file {@code path}, and {@code SinkMode}. 101 * 102 * @param scheme of type Scheme 103 * @param path of type String 104 * @param sinkMode of type SinkMode 105 */ 106 public FileTap( Scheme<Properties, InputStream, OutputStream, ?, ?> scheme, Path path, SinkMode sinkMode ) 107 { 108 super( scheme, sinkMode ); 109 this.path = path; 110 111 verify(); 112 } 113 114 protected void verify() 115 { 116 if( getPath() == null ) 117 throw new IllegalArgumentException( "path may not be null" ); 118 } 119 120 protected Path getPath() 121 { 122 return path; 123 } 124 125 @Override 126 public String getIdentifier() 127 { 128 return path.toString(); 129 } 130 131 @Override 132 public String getFullIdentifier( Properties conf ) 133 { 134 return getPath().toAbsolutePath().toUri().toString(); 135 } 136 137 private String fullyQualifyIdentifier( String identifier ) 138 { 139 return new File( identifier ).getAbsoluteFile().toURI().toString(); 140 } 141 142 @Override 143 public TupleEntryIterator openForRead( FlowProcess<? extends Properties> flowProcess, InputStream input ) throws IOException 144 { 145 if( input == null ) 146 input = new FileInputStream( getIdentifier() ); 147 148 flowProcess.getFlowProcessContext().setSourcePath( getFullIdentifier( flowProcess ) ); 149 150 return new TupleEntrySchemeIterator<Properties, InputStream>( flowProcess, this, getScheme(), input, getIdentifier() ); 151 } 152 153 @Override 154 public TupleEntryCollector openForWrite( FlowProcess<? extends Properties> flowProcess, OutputStream output ) throws IOException 155 { 156 if( output == null ) 157 output = new TapFileOutputStream( getOutputIdentifier(), isUpdate() ); // append if we are in update mode 158 159 return new TupleEntrySchemeCollector<Properties, OutputStream>( flowProcess, this, getScheme(), output, getIdentifier() ); 160 } 161 162 /** 163 * Only used with {@link #openForWrite(FlowProcess, OutputStream)} calls. 164 */ 165 protected String getOutputIdentifier() 166 { 167 return getIdentifier(); 168 } 169 170 @Override 171 public boolean createResource( Properties conf ) throws IOException 172 { 173 File parentFile = new File( getIdentifier() ).getParentFile(); // parent dir 174 175 return parentFile.exists() || parentFile.mkdirs(); 176 } 177 178 @Override 179 public boolean deleteResource( Properties conf ) throws IOException 180 { 181 return Files.deleteIfExists( getPath() ); 182 } 183 184 @Override 185 public boolean commitResource( Properties conf ) throws IOException 186 { 187 return true; 188 } 189 190 @Override 191 public boolean resourceExists( Properties conf ) throws IOException 192 { 193 return Files.exists( getPath() ); 194 } 195 196 @Override 197 public long getModifiedTime( Properties conf ) throws IOException 198 { 199 return Files.getLastModifiedTime( getPath() ).to( TimeUnit.MILLISECONDS ); 200 } 201 202 @Override 203 public boolean isDirectory( FlowProcess<? extends Properties> flowProcess ) throws IOException 204 { 205 return isDirectory( flowProcess.getConfig() ); 206 } 207 208 @Override 209 public boolean isDirectory( Properties conf ) throws IOException 210 { 211 return Files.isDirectory( getPath() ); 212 } 213 214 @Override 215 public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess ) throws IOException 216 { 217 return getChildIdentifiers( flowProcess.getConfig() ); 218 } 219 220 @Override 221 public String[] getChildIdentifiers( Properties conf ) throws IOException 222 { 223 return getChildIdentifiers( conf, 1, false ); 224 } 225 226 @Override 227 public String[] getChildIdentifiers( FlowProcess<? extends Properties> flowProcess, int depth, boolean fullyQualified ) throws IOException 228 { 229 return getChildIdentifiers( flowProcess.getConfig(), depth, fullyQualified ); 230 } 231 232 @Override 233 public String[] getChildIdentifiers( Properties conf, int depth, boolean fullyQualified ) throws IOException 234 { 235 if( !resourceExists( conf ) ) 236 return new String[ 0 ]; 237 238 Set<String> results = new LinkedHashSet<String>(); 239 240 getChildPaths( results, getIdentifier(), depth ); 241 242 String[] allPaths = results.toArray( new String[ results.size() ] ); 243 244 if( !fullyQualified ) 245 return allPaths; 246 247 for( int i = 0; i < allPaths.length; i++ ) 248 allPaths[ i ] = fullyQualifyIdentifier( allPaths[ i ] ); 249 250 return allPaths; 251 } 252 253 @Override 254 public long getSize( FlowProcess<? extends Properties> flowProcess ) throws IOException 255 { 256 return getSize( flowProcess.getConfig() ); 257 } 258 259 @Override 260 public long getSize( Properties conf ) throws IOException 261 { 262 File file = new File( getIdentifier() ); 263 264 if( file.isDirectory() ) 265 return 0; 266 267 return file.length(); 268 } 269 270 private boolean getChildPaths( Set<String> results, String identifier, int depth ) 271 { 272 File file = new File( identifier ); 273 274 if( depth == 0 || file.isFile() ) 275 { 276 results.add( identifier ); 277 return true; 278 } 279 280 String[] paths = file.list(); 281 282 if( paths == null ) 283 return false; 284 285 boolean result = false; 286 287 for( String path : paths ) 288 result |= getChildPaths( results, new File( file, path ).getPath(), depth - 1 ); 289 290 return result; 291 } 292 }