001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.tap.hadoop; 023 024import java.io.IOException; 025import java.net.URI; 026 027import cascading.flow.FlowProcess; 028import org.apache.hadoop.conf.Configuration; 029import org.apache.hadoop.filecache.DistributedCache; 030import org.apache.hadoop.fs.Path; 031 032/** 033 * Class DistCacheTap is a Tap decorator for Hfs and can be used to move a file to the 034 * {@link org.apache.hadoop.filecache.DistributedCache} on read when accessed cluster side. 035 * <p> 036 * This is useful for {@link cascading.pipe.HashJoin}s. 037 * <p> 038 * The distributed cache is only used when the Tap is used as a source. If the DistCacheTap is used as a sink, 039 * it will delegate to the provided parent instance and not use the DistributedCache. 040 */ 041public class DistCacheTap extends BaseDistCacheTap 042 { 043 /** 044 * Constructs a new DistCacheTap instance with the given Hfs. 045 * 046 * @param parent an Hfs or GlobHfs instance representing a small file. 047 */ 048 public DistCacheTap( Hfs parent ) 049 { 050 super( parent ); 051 } 052 053 @Override 054 protected Path[] getLocalCacheFiles( FlowProcess<? extends Configuration> flowProcess ) throws IOException 055 { 056 return DistributedCache.getLocalCacheFiles( flowProcess.getConfig() ); 057 } 058 059 @Override 060 protected void addLocalCacheFiles( Configuration conf, URI uri ) 061 { 062 DistributedCache.addCacheFile( uri, conf ); 063 } 064 }