001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.tap.hadoop;
023
024import java.io.IOException;
025import java.net.URI;
026
027import cascading.flow.FlowProcess;
028import org.apache.hadoop.conf.Configuration;
029import org.apache.hadoop.filecache.DistributedCache;
030import org.apache.hadoop.fs.Path;
031
032/**
033 * Class DistCacheTap is a Tap decorator for Hfs and can be used to move a file to the
034 * {@link org.apache.hadoop.filecache.DistributedCache} on read when accessed cluster side.
035 * <p>
036 * This is useful for {@link cascading.pipe.HashJoin}s.
037 * <p>
038 * The distributed cache is only used when the Tap is used as a source. If the DistCacheTap is used as a sink,
039 * it will delegate to the provided parent instance and not use the DistributedCache.
040 */
041public class DistCacheTap extends BaseDistCacheTap
042  {
043  /**
044   * Constructs a new DistCacheTap instance with the given Hfs.
045   *
046   * @param parent an Hfs or GlobHfs instance representing a small file.
047   */
048  public DistCacheTap( Hfs parent )
049    {
050    super( parent );
051    }
052
053  @Override
054  protected Path[] getLocalCacheFiles( FlowProcess<? extends Configuration> flowProcess ) throws IOException
055    {
056    return DistributedCache.getLocalCacheFiles( flowProcess.getConfig() );
057    }
058
059  @Override
060  protected void addLocalCacheFiles( Configuration conf, URI uri )
061    {
062    DistributedCache.addCacheFile( uri, conf );
063    }
064  }