001/* 002 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.platform.hadoop2; 022 023import java.io.File; 024import java.io.IOException; 025import java.util.Map; 026 027import cascading.flow.FlowConnector; 028import cascading.flow.FlowProcess; 029import cascading.flow.FlowProps; 030import cascading.flow.FlowSession; 031import cascading.flow.hadoop.HadoopFlowProcess; 032import cascading.flow.hadoop2.Hadoop2MR1FlowConnector; 033import cascading.flow.hadoop2.Hadoop2MR1Planner; 034import cascading.platform.hadoop.BaseHadoopPlatform; 035import cascading.tap.Tap; 036import cascading.tap.hadoop.DistCacheTap; 037import cascading.tap.hadoop.Hfs; 038import cascading.util.Util; 039import org.apache.hadoop.fs.FileSystem; 040import org.apache.hadoop.hdfs.MiniDFSCluster; 041import org.apache.hadoop.mapred.JobConf; 042import org.apache.hadoop.mapred.MiniMRClientCluster; 043import org.apache.hadoop.mapred.MiniMRClientClusterFactory; 044import org.slf4j.Logger; 045import org.slf4j.LoggerFactory; 046 047/** 048 * Class Hadoop2Platform is automatically loaded and injected into a {@link cascading.PlatformTestCase} instance 049 * so that all *PlatformTest classes can be tested against Apache Hadoop 2.x. 050 */ 051public class Hadoop2MR1Platform extends BaseHadoopPlatform<JobConf> 052 { 053 private static final Logger LOG = LoggerFactory.getLogger( Hadoop2MR1Platform.class ); 054 private transient static MiniDFSCluster dfs; 055 private transient static MiniMRClientCluster mr; 056 057 public Hadoop2MR1Platform() 058 { 059 } 060 061 @Override 062 public String getName() 063 { 064 return "hadoop2-mr1"; 065 } 066 067 @Override 068 public FlowConnector getFlowConnector( Map<Object, Object> properties ) 069 { 070 return new Hadoop2MR1FlowConnector( properties ); 071 } 072 073 @Override 074 public void setNumMapTasks( Map<Object, Object> properties, int numMapTasks ) 075 { 076 properties.put( "mapreduce.job.maps", Integer.toString( numMapTasks ) ); 077 } 078 079 @Override 080 public void setNumReduceTasks( Map<Object, Object> properties, int numReduceTasks ) 081 { 082 properties.put( "mapreduce.job.reduces", Integer.toString( numReduceTasks ) ); 083 } 084 085 @Override 086 public Integer getNumMapTasks( Map<Object, Object> properties ) 087 { 088 if( properties.get( "mapreduce.job.maps" ) == null ) 089 return null; 090 091 return Integer.parseInt( properties.get( "mapreduce.job.maps" ).toString() ); 092 } 093 094 @Override 095 public Integer getNumReduceTasks( Map<Object, Object> properties ) 096 { 097 if( properties.get( "mapreduce.job.reduces" ) == null ) 098 return null; 099 100 return Integer.parseInt( properties.get( "mapreduce.job.reduces" ).toString() ); 101 } 102 103 public JobConf getConfiguration() 104 { 105 return new JobConf( configuration ); 106 } 107 108 @Override 109 public Tap getDistCacheTap( Hfs parent ) 110 { 111 return new DistCacheTap( parent ); 112 } 113 114 @Override 115 public FlowProcess getFlowProcess() 116 { 117 return new HadoopFlowProcess( FlowSession.NULL, getConfiguration(), true ); 118 } 119 120 @Override 121 public synchronized void setUp() throws IOException 122 { 123 if( configuration != null ) 124 return; 125 126 if( !isUseCluster() ) 127 { 128 LOG.info( "not using cluster" ); 129 configuration = new JobConf(); 130 131 // enforce settings to make local mode behave the same across distributions 132 configuration.set( "fs.defaultFS", "file:///" ); 133 configuration.set( "mapreduce.framework.name", "local" ); 134 configuration.set( "mapreduce.jobtracker.staging.root.dir", System.getProperty( "user.dir" ) + "/" + "build/tmp/cascading/staging" ); 135 136 String stagingDir = configuration.get( "mapreduce.jobtracker.staging.root.dir" ); 137 138 if( Util.isEmpty( stagingDir ) ) 139 configuration.set( "mapreduce.jobtracker.staging.root.dir", System.getProperty( "user.dir" ) + "/build/tmp/cascading/staging" ); 140 141 fileSys = FileSystem.get( configuration ); 142 } 143 else 144 { 145 LOG.info( "using cluster" ); 146 147 if( Util.isEmpty( System.getProperty( "hadoop.log.dir" ) ) ) 148 System.setProperty( "hadoop.log.dir", "build/test/log" ); 149 150 if( Util.isEmpty( System.getProperty( "hadoop.tmp.dir" ) ) ) 151 System.setProperty( "hadoop.tmp.dir", "build/test/tmp" ); 152 153 new File( System.getProperty( "hadoop.log.dir" ) ).mkdirs(); // ignored 154 155 JobConf conf = new JobConf(); 156 157 if( getApplicationJar() != null ) 158 { 159 LOG.info( "using a remote cluster with jar: {}", getApplicationJar() ); 160 161 configuration = conf; 162 163 ( (JobConf) configuration ).setJar( getApplicationJar() ); 164 165 if( !Util.isEmpty( System.getProperty( "fs.default.name" ) ) ) 166 { 167 LOG.info( "using {}={}", "fs.default.name", System.getProperty( "fs.default.name" ) ); 168 configuration.set( "fs.default.name", System.getProperty( "fs.default.name" ) ); 169 } 170 171 if( !Util.isEmpty( System.getProperty( "mapred.job.tracker" ) ) ) 172 { 173 LOG.info( "using {}={}", "mapred.job.tracker", System.getProperty( "mapred.job.tracker" ) ); 174 configuration.set( "mapred.job.tracker", System.getProperty( "mapred.job.tracker" ) ); 175 } 176 177 if( !Util.isEmpty( System.getProperty( "fs.defaultFS" ) ) ) 178 { 179 LOG.info( "using {}={}", "fs.defaultFS", System.getProperty( "fs.defaultFS" ) ); 180 configuration.set( "fs.defaultFS", System.getProperty( "fs.defaultFS" ) ); 181 } 182 183 if( !Util.isEmpty( System.getProperty( "yarn.resourcemanager.address" ) ) ) 184 { 185 LOG.info( "using {}={}", "yarn.resourcemanager.address", System.getProperty( "yarn.resourcemanager.address" ) ); 186 configuration.set( "yarn.resourcemanager.address", System.getProperty( "yarn.resourcemanager.address" ) ); 187 } 188 189 if( !Util.isEmpty( System.getProperty( "mapreduce.jobhistory.address" ) ) ) 190 { 191 LOG.info( "using {}={}", "mapreduce.jobhistory.address", System.getProperty( "mapreduce.jobhistory.address" ) ); 192 configuration.set( "mapreduce.jobhistory.address", System.getProperty( "mapreduce.jobhistory.address" ) ); 193 } 194 195 configuration.set( "mapreduce.job.user.classpath.first", "true" ); // use test dependencies 196 configuration.set( "mapreduce.user.classpath.first", "true" ); // use test dependencies 197 configuration.set( "mapreduce.framework.name", "yarn" ); 198 199 fileSys = FileSystem.get( configuration ); 200 } 201 else 202 { 203 conf.setBoolean( "yarn.is.minicluster", true ); 204// conf.setInt( "yarn.nodemanager.delete.debug-delay-sec", -1 ); 205// conf.set( "yarn.scheduler.capacity.root.queues", "default" ); 206// conf.set( "yarn.scheduler.capacity.root.default.capacity", "100" ); 207 // disable blacklisting hosts not to fail localhost during unit tests 208 conf.setBoolean( "yarn.app.mapreduce.am.job.node-blacklisting.enable", false ); 209 210 dfs = new MiniDFSCluster( conf, 4, true, null ); 211 fileSys = dfs.getFileSystem(); 212 213 FileSystem.setDefaultUri( conf, fileSys.getUri() ); 214 215 mr = MiniMRClientClusterFactory.create( this.getClass(), 4, conf ); 216 217 configuration = mr.getConfig(); 218 } 219 220 configuration.set( "mapred.child.java.opts", "-Xmx512m" ); 221 configuration.setInt( "mapreduce.job.jvm.numtasks", -1 ); 222 configuration.setInt( "mapreduce.client.completion.pollinterval", 50 ); 223 configuration.setInt( "mapreduce.client.progressmonitor.pollinterval", 50 ); 224 configuration.setBoolean( "mapreduce.map.speculative", false ); 225 configuration.setBoolean( "mapreduce.reduce.speculative", false ); 226 } 227 228 configuration.setInt( "mapreduce.job.maps", numMappers ); 229 configuration.setInt( "mapreduce.job.reduces", numReducers ); 230 231 Map<Object, Object> globalProperties = getGlobalProperties(); 232 233 if( logger != null ) 234 globalProperties.put( "log4j.logger", logger ); 235 236 FlowProps.setJobPollingInterval( globalProperties, 10 ); // should speed up tests 237 238 Hadoop2MR1Planner.copyProperties( configuration, globalProperties ); // copy any external properties 239 240 Hadoop2MR1Planner.copyConfiguration( properties, configuration ); // put all properties on the jobconf 241 } 242 }