001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.flow.hadoop.planner; 023 024import java.net.URI; 025import java.util.Map; 026import java.util.Properties; 027import java.util.Set; 028 029import cascading.flow.FlowConnector; 030import cascading.flow.FlowConnectorProps; 031import cascading.flow.FlowDef; 032import cascading.flow.FlowStep; 033import cascading.flow.hadoop.HadoopFlow; 034import cascading.flow.hadoop.HadoopFlowStep; 035import cascading.flow.hadoop.util.HadoopUtil; 036import cascading.flow.planner.BaseFlowStepFactory; 037import cascading.flow.planner.FlowPlanner; 038import cascading.flow.planner.PlannerInfo; 039import cascading.flow.planner.PlatformInfo; 040import cascading.flow.planner.graph.ElementGraph; 041import cascading.flow.planner.process.FlowNodeGraph; 042import cascading.flow.planner.process.FlowStepFactory; 043import cascading.flow.planner.rule.RuleRegistry; 044import cascading.flow.planner.rule.transformer.IntermediateTapElementFactory; 045import cascading.property.AppProps; 046import cascading.property.PropertyUtil; 047import cascading.tap.Tap; 048import cascading.tap.hadoop.DistCacheTap; 049import cascading.tap.hadoop.Hfs; 050import cascading.tap.hadoop.util.TempHfs; 051import cascading.util.Util; 052import org.apache.hadoop.conf.Configuration; 053import org.apache.hadoop.mapred.JobConf; 054import org.slf4j.Logger; 055import org.slf4j.LoggerFactory; 056 057/** 058 * Class HadoopPlanner is the core Hadoop MapReduce planner used by default through a {@link cascading.flow.FlowConnector} 059 * sub-class. 060 * <p> 061 * Notes: 062 * <p> 063 * <strong>Custom JobConf properties</strong><br> 064 * A custom JobConf instance can be passed to this planner by calling {@link #copyJobConf(java.util.Map, org.apache.hadoop.mapred.JobConf)} 065 * on a map properties object before constructing a new {@link cascading.flow.FlowConnector} sub-class. 066 * <p> 067 * A better practice would be to set Hadoop properties directly on the map properties object handed to the FlowConnector. 068 * All values in the map will be passed to a new default JobConf instance to be used as defaults for all resulting 069 * Flow instances. 070 * <p> 071 * For example, {@code properties.set("mapred.child.java.opts","-Xmx512m");} would convince Hadoop 072 * to spawn all child jvms with a heap of 512MB. 073 */ 074public class HadoopPlanner extends FlowPlanner<HadoopFlow, JobConf> 075 { 076 /** Field LOG */ 077 private static final Logger LOG = LoggerFactory.getLogger( HadoopPlanner.class ); 078 079 public static final String PLATFORM_NAME = "hadoop"; 080 081 /** Field jobConf */ 082 private JobConf defaultJobConf; 083 /** Field intermediateSchemeClass */ 084 private Class intermediateSchemeClass; 085 086 /** 087 * Method copyJobConf adds the given JobConf values to the given properties object. Use this method to pass 088 * custom default Hadoop JobConf properties to Hadoop. 089 * 090 * @param properties of type Map 091 * @param jobConf of type JobConf 092 */ 093 public static void copyJobConf( Map<Object, Object> properties, JobConf jobConf ) 094 { 095 for( Map.Entry<String, String> entry : jobConf ) 096 properties.put( entry.getKey(), entry.getValue() ); 097 } 098 099 /** 100 * Method createJobConf returns a new JobConf instance using the values in the given properties argument. 101 * 102 * @param properties of type Map 103 * @return a JobConf instance 104 */ 105 public static JobConf createJobConf( Map<Object, Object> properties ) 106 { 107 JobConf conf = new JobConf(); 108 109 copyProperties( conf, properties ); 110 111 return conf; 112 } 113 114 /** 115 * Method copyProperties adds the given Map values to the given JobConf object. 116 * 117 * @param jobConf of type JobConf 118 * @param properties of type Map 119 */ 120 public static void copyProperties( JobConf jobConf, Map<Object, Object> properties ) 121 { 122 if( properties instanceof Properties ) 123 { 124 Properties props = (Properties) properties; 125 Set<String> keys = props.stringPropertyNames(); 126 127 for( String key : keys ) 128 jobConf.set( key, props.getProperty( key ) ); 129 } 130 else 131 { 132 for( Map.Entry<Object, Object> entry : properties.entrySet() ) 133 { 134 if( entry.getValue() != null ) 135 jobConf.set( entry.getKey().toString(), entry.getValue().toString() ); 136 } 137 } 138 } 139 140 @Override 141 public PlannerInfo getPlannerInfo( String registryName ) 142 { 143 return new PlannerInfo( getClass().getSimpleName(), PLATFORM_NAME, registryName ); 144 } 145 146 @Override 147 public JobConf getDefaultConfig() 148 { 149 return defaultJobConf; 150 } 151 152 @Override 153 public PlatformInfo getPlatformInfo() 154 { 155 return HadoopUtil.getPlatformInfo( JobConf.class, "org/apache/hadoop", "Hadoop MR" ); 156 } 157 158 @Override 159 public void initialize( FlowConnector flowConnector, Map<Object, Object> properties ) 160 { 161 super.initialize( flowConnector, properties ); 162 163 defaultJobConf = HadoopUtil.createJobConf( properties, createJobConf( properties ) ); 164 checkPlatform( defaultJobConf ); 165 intermediateSchemeClass = flowConnector.getIntermediateSchemeClass( properties ); 166 167 Class type = AppProps.getApplicationJarClass( properties ); 168 if( defaultJobConf.getJar() == null && type != null ) 169 defaultJobConf.setJarByClass( type ); 170 171 String path = AppProps.getApplicationJarPath( properties ); 172 if( defaultJobConf.getJar() == null && path != null ) 173 defaultJobConf.setJar( path ); 174 175 if( defaultJobConf.getJar() == null ) 176 defaultJobConf.setJarByClass( HadoopUtil.findMainClass( HadoopPlanner.class ) ); 177 178 AppProps.setApplicationJarPath( properties, defaultJobConf.getJar() ); 179 180 LOG.info( "using application jar: {}", defaultJobConf.getJar() ); 181 } 182 183 @Override 184 public void configRuleRegistryDefaults( RuleRegistry ruleRegistry ) 185 { 186 super.configRuleRegistryDefaults( ruleRegistry ); 187 188 ruleRegistry.addDefaultElementFactory( IntermediateTapElementFactory.TEMP_TAP, new TempTapElementFactory() ); 189 190 if( PropertyUtil.getBooleanProperty( getDefaultProperties(), FlowConnectorProps.ENABLE_DECORATE_ACCUMULATED_TAP, true ) ) 191 ruleRegistry.addDefaultElementFactory( IntermediateTapElementFactory.ACCUMULATED_TAP, new TempTapElementFactory( DistCacheTap.class.getName() ) ); 192 } 193 194 protected void checkPlatform( Configuration conf ) 195 { 196 if( HadoopUtil.isYARN( conf ) ) 197 LOG.warn( "running YARN based flows on Hadoop 1.x may cause problems, please use the 'cascading-hadoop2-mr1' dependencies" ); 198 } 199 200 @Override 201 protected HadoopFlow createFlow( FlowDef flowDef ) 202 { 203 return new HadoopFlow( getPlatformInfo(), getDefaultProperties(), getDefaultConfig(), flowDef ); 204 } 205 206 @Override 207 public FlowStepFactory<JobConf> getFlowStepFactory() 208 { 209 return new BaseFlowStepFactory<JobConf>( getFlowNodeFactory() ) 210 { 211 @Override 212 public FlowStep<JobConf> createFlowStep( ElementGraph stepElementGraph, FlowNodeGraph flowNodeGraph ) 213 { 214 return new HadoopFlowStep( stepElementGraph, flowNodeGraph ); 215 } 216 }; 217 } 218 219 public URI getDefaultURIScheme( Tap tap ) 220 { 221 return ( (Hfs) tap ).getDefaultFileSystemURIScheme( defaultJobConf ); 222 } 223 224 public URI getURIScheme( Tap tap ) 225 { 226 return ( (Hfs) tap ).getURIScheme( defaultJobConf ); 227 } 228 229 @Override 230 protected Tap makeTempTap( String prefix, String name ) 231 { 232 // must give Taps unique names 233 return new TempHfs( defaultJobConf, Util.makePath( prefix, name ), intermediateSchemeClass, prefix == null ); 234 } 235 }