001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.flow.hadoop2; 023 024import java.util.Map; 025import java.util.Properties; 026import java.util.Set; 027 028import cascading.flow.hadoop.planner.HadoopPlanner; 029import cascading.flow.hadoop.util.HadoopUtil; 030import cascading.flow.planner.PlannerInfo; 031import org.apache.hadoop.conf.Configuration; 032import org.slf4j.Logger; 033import org.slf4j.LoggerFactory; 034 035/** 036 * Class Hadoop2MR1Planner is the core Hadoop MapReduce planner used by default through the {@link cascading.flow.hadoop2.Hadoop2MR1FlowConnector}. 037 * <p> 038 * Notes: 039 * <p> 040 * <strong>Custom JobConf properties</strong><br> 041 * A custom JobConf instance can be passed to this planner by calling {@link #copyJobConf(java.util.Map, org.apache.hadoop.mapred.JobConf)} 042 * on a map properties object before constructing a new {@link cascading.flow.hadoop2.Hadoop2MR1FlowConnector}. 043 * <p> 044 * A better practice would be to set Hadoop properties directly on the map properties object handed to the FlowConnector. 045 * All values in the map will be passed to a new default JobConf instance to be used as defaults for all resulting 046 * Flow instances. 047 * <p> 048 * For example, {@code properties.set("mapred.child.java.opts","-Xmx512m");} would convince Hadoop 049 * to spawn all child jvms with a heap of 512MB. 050 */ 051public class Hadoop2MR1Planner extends HadoopPlanner 052 { 053 /** Field LOG */ 054 private static final Logger LOG = LoggerFactory.getLogger( Hadoop2MR1Planner.class ); 055 056 public static final String PLATFORM_NAME = "hadoop2-mr1"; 057 058 /** 059 * Method copyJobConf adds the given JobConf values to the given properties object. Use this method to pass 060 * custom default Hadoop JobConf properties to Hadoop. 061 * 062 * @param properties of type Map 063 * @param configuration of type JobConf 064 */ 065 public static void copyConfiguration( Map<Object, Object> properties, Configuration configuration ) 066 { 067 for( Map.Entry<String, String> entry : configuration ) 068 properties.put( entry.getKey(), entry.getValue() ); 069 } 070 071 /** 072 * Method copyProperties adds the given Map values to the given JobConf object. 073 * 074 * @param configuration of type JobConf 075 * @param properties of type Map 076 */ 077 public static void copyProperties( Configuration configuration, Map<Object, Object> properties ) 078 { 079 if( properties instanceof Properties ) 080 { 081 Properties props = (Properties) properties; 082 Set<String> keys = props.stringPropertyNames(); 083 084 for( String key : keys ) 085 configuration.set( key, props.getProperty( key ) ); 086 } 087 else 088 { 089 for( Map.Entry<Object, Object> entry : properties.entrySet() ) 090 { 091 if( entry.getValue() != null ) 092 configuration.set( entry.getKey().toString(), entry.getValue().toString() ); 093 } 094 } 095 } 096 097 @Override 098 public PlannerInfo getPlannerInfo( String registryName ) 099 { 100 return new PlannerInfo( getClass().getSimpleName(), PLATFORM_NAME, registryName ); 101 } 102 103 @Override 104 protected void checkPlatform( Configuration conf ) 105 { 106 if( !HadoopUtil.isYARN( conf ) ) 107 LOG.warn( "running Hadoop 1.x based flows on YARN may cause problems, please use the 'cascading-hadoop' dependencies" ); 108 } 109 }