001/*
002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.platform.hadoop2;
022
023import java.io.File;
024import java.io.IOException;
025import java.util.Map;
026
027import cascading.flow.FlowConnector;
028import cascading.flow.FlowProcess;
029import cascading.flow.FlowProps;
030import cascading.flow.FlowSession;
031import cascading.flow.hadoop.HadoopFlowProcess;
032import cascading.flow.hadoop2.Hadoop2MR1FlowConnector;
033import cascading.flow.hadoop2.Hadoop2MR1Planner;
034import cascading.platform.hadoop.BaseHadoopPlatform;
035import cascading.tap.Tap;
036import cascading.tap.hadoop.DistCacheTap;
037import cascading.tap.hadoop.Hfs;
038import cascading.util.Util;
039import org.apache.hadoop.fs.FileSystem;
040import org.apache.hadoop.hdfs.MiniDFSCluster;
041import org.apache.hadoop.mapred.JobConf;
042import org.apache.hadoop.mapred.MiniMRClientCluster;
043import org.apache.hadoop.mapred.MiniMRClientClusterFactory;
044import org.slf4j.Logger;
045import org.slf4j.LoggerFactory;
046
047/**
048 * Class Hadoop2Platform is automatically loaded and injected into a {@link cascading.PlatformTestCase} instance
049 * so that all *PlatformTest classes can be tested against Apache Hadoop 2.x.
050 */
051public class Hadoop2MR1Platform extends BaseHadoopPlatform<JobConf>
052  {
053  private static final Logger LOG = LoggerFactory.getLogger( Hadoop2MR1Platform.class );
054  private transient static MiniDFSCluster dfs;
055  private transient static MiniMRClientCluster mr;
056
057  public Hadoop2MR1Platform()
058    {
059    }
060
061  @Override
062  public String getName()
063    {
064    return "hadoop2-mr1";
065    }
066
067  @Override
068  public FlowConnector getFlowConnector( Map<Object, Object> properties )
069    {
070    return new Hadoop2MR1FlowConnector( properties );
071    }
072
073  @Override
074  public void setNumMapTasks( Map<Object, Object> properties, int numMapTasks )
075    {
076    properties.put( "mapreduce.job.maps", Integer.toString( numMapTasks ) );
077    }
078
079  @Override
080  public void setNumReduceTasks( Map<Object, Object> properties, int numReduceTasks )
081    {
082    properties.put( "mapreduce.job.reduces", Integer.toString( numReduceTasks ) );
083    }
084
085  @Override
086  public Integer getNumMapTasks( Map<Object, Object> properties )
087    {
088    if( properties.get( "mapreduce.job.maps" ) == null )
089      return null;
090
091    return Integer.parseInt( properties.get( "mapreduce.job.maps" ).toString() );
092    }
093
094  @Override
095  public Integer getNumReduceTasks( Map<Object, Object> properties )
096    {
097    if( properties.get( "mapreduce.job.reduces" ) == null )
098      return null;
099
100    return Integer.parseInt( properties.get( "mapreduce.job.reduces" ).toString() );
101    }
102
103  public JobConf getConfiguration()
104    {
105    return new JobConf( configuration );
106    }
107
108  @Override
109  public Tap getDistCacheTap( Hfs parent )
110    {
111    return new DistCacheTap( parent );
112    }
113
114  @Override
115  public FlowProcess getFlowProcess()
116    {
117    return new HadoopFlowProcess( FlowSession.NULL, getConfiguration(), true );
118    }
119
120  @Override
121  public synchronized void setUp() throws IOException
122    {
123    if( configuration != null )
124      return;
125
126    if( !isUseCluster() )
127      {
128      LOG.info( "not using cluster" );
129      configuration = new JobConf();
130
131      // enforce settings to make local mode behave the same across distributions
132      configuration.set( "fs.defaultFS", "file:///" );
133      configuration.set( "mapreduce.framework.name", "local" );
134      configuration.set( "mapreduce.jobtracker.staging.root.dir", System.getProperty( "user.dir" ) + "/" + "build/tmp/cascading/staging" );
135
136      String stagingDir = configuration.get( "mapreduce.jobtracker.staging.root.dir" );
137
138      if( Util.isEmpty( stagingDir ) )
139        configuration.set( "mapreduce.jobtracker.staging.root.dir", System.getProperty( "user.dir" ) + "/build/tmp/cascading/staging" );
140
141      fileSys = FileSystem.get( configuration );
142      }
143    else
144      {
145      LOG.info( "using cluster" );
146
147      if( Util.isEmpty( System.getProperty( "hadoop.log.dir" ) ) )
148        System.setProperty( "hadoop.log.dir", "build/test/log" );
149
150      if( Util.isEmpty( System.getProperty( "hadoop.tmp.dir" ) ) )
151        System.setProperty( "hadoop.tmp.dir", "build/test/tmp" );
152
153      new File( System.getProperty( "hadoop.log.dir" ) ).mkdirs(); // ignored
154
155      JobConf conf = new JobConf();
156
157      if( getApplicationJar() != null )
158        {
159        LOG.info( "using a remote cluster with jar: {}", getApplicationJar() );
160
161        configuration = conf;
162
163        ( (JobConf) configuration ).setJar( getApplicationJar() );
164
165        if( !Util.isEmpty( System.getProperty( "fs.default.name" ) ) )
166          {
167          LOG.info( "using {}={}", "fs.default.name", System.getProperty( "fs.default.name" ) );
168          configuration.set( "fs.default.name", System.getProperty( "fs.default.name" ) );
169          }
170
171        if( !Util.isEmpty( System.getProperty( "mapred.job.tracker" ) ) )
172          {
173          LOG.info( "using {}={}", "mapred.job.tracker", System.getProperty( "mapred.job.tracker" ) );
174          configuration.set( "mapred.job.tracker", System.getProperty( "mapred.job.tracker" ) );
175          }
176
177        if( !Util.isEmpty( System.getProperty( "fs.defaultFS" ) ) )
178          {
179          LOG.info( "using {}={}", "fs.defaultFS", System.getProperty( "fs.defaultFS" ) );
180          configuration.set( "fs.defaultFS", System.getProperty( "fs.defaultFS" ) );
181          }
182
183        if( !Util.isEmpty( System.getProperty( "yarn.resourcemanager.address" ) ) )
184          {
185          LOG.info( "using {}={}", "yarn.resourcemanager.address", System.getProperty( "yarn.resourcemanager.address" ) );
186          configuration.set( "yarn.resourcemanager.address", System.getProperty( "yarn.resourcemanager.address" ) );
187          }
188
189        if( !Util.isEmpty( System.getProperty( "mapreduce.jobhistory.address" ) ) )
190          {
191          LOG.info( "using {}={}", "mapreduce.jobhistory.address", System.getProperty( "mapreduce.jobhistory.address" ) );
192          configuration.set( "mapreduce.jobhistory.address", System.getProperty( "mapreduce.jobhistory.address" ) );
193          }
194
195        configuration.set( "mapreduce.job.user.classpath.first", "true" ); // use test dependencies
196        configuration.set( "mapreduce.user.classpath.first", "true" ); // use test dependencies
197        configuration.set( "mapreduce.framework.name", "yarn" );
198
199        fileSys = FileSystem.get( configuration );
200        }
201      else
202        {
203        conf.setBoolean( "yarn.is.minicluster", true );
204//      conf.setInt( "yarn.nodemanager.delete.debug-delay-sec", -1 );
205//      conf.set( "yarn.scheduler.capacity.root.queues", "default" );
206//      conf.set( "yarn.scheduler.capacity.root.default.capacity", "100" );
207        // disable blacklisting hosts not to fail localhost during unit tests
208        conf.setBoolean( "yarn.app.mapreduce.am.job.node-blacklisting.enable", false );
209
210        dfs = new MiniDFSCluster( conf, 4, true, null );
211        fileSys = dfs.getFileSystem();
212
213        FileSystem.setDefaultUri( conf, fileSys.getUri() );
214
215        mr = MiniMRClientClusterFactory.create( this.getClass(), 4, conf );
216
217        configuration = mr.getConfig();
218        }
219
220      configuration.set( "mapred.child.java.opts", "-Xmx512m" );
221      configuration.setInt( "mapreduce.job.jvm.numtasks", -1 );
222      configuration.setInt( "mapreduce.client.completion.pollinterval", 50 );
223      configuration.setInt( "mapreduce.client.progressmonitor.pollinterval", 50 );
224      configuration.setBoolean( "mapreduce.map.speculative", false );
225      configuration.setBoolean( "mapreduce.reduce.speculative", false );
226      }
227
228    configuration.setInt( "mapreduce.job.maps", numMappers );
229    configuration.setInt( "mapreduce.job.reduces", numReducers );
230
231    Map<Object, Object> globalProperties = getGlobalProperties();
232
233    if( logger != null )
234      globalProperties.put( "log4j.logger", logger );
235
236    FlowProps.setJobPollingInterval( globalProperties, 10 ); // should speed up tests
237
238    Hadoop2MR1Planner.copyProperties( configuration, globalProperties ); // copy any external properties
239
240    Hadoop2MR1Planner.copyConfiguration( properties, configuration ); // put all properties on the jobconf
241    }
242  }