001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.tap.hadoop; 023 024import java.util.Map; 025import java.util.Properties; 026 027import cascading.property.Props; 028 029/** 030 * Class HfsProps is a fluent helper for setting various Hadoop FS level properties that some 031 * {@link cascading.flow.Flow} may or may not be required to have set. These properties are typically passed to a Flow 032 * via a {@link cascading.flow.FlowConnector}. 033 */ 034public class HfsProps extends Props 035 { 036 /** Field TEMPORARY_DIRECTORY */ 037 public static final String TEMPORARY_DIRECTORY = "cascading.tmp.dir"; 038 /** Fields LOCAL_MODE_SCHEME * */ 039 public static final String LOCAL_MODE_SCHEME = "cascading.hadoop.localmode.scheme"; 040 /** Field COMBINE_INPUT_FILES */ 041 public static final String COMBINE_INPUT_FILES = "cascading.hadoop.hfs.combine.files"; 042 /** Field COMBINE_INPUT_FILES_SAFEMODE */ 043 public static final String COMBINE_INPUT_FILES_SAFE_MODE = "cascading.hadoop.hfs.combine.safemode"; 044 /** Field COMBINE_INPUT_FILES_SIZE_MAX */ 045 public static final String COMBINE_INPUT_FILES_SIZE_MAX = "cascading.hadoop.hfs.combine.max.size"; 046 047 protected String temporaryDirectory; 048 protected String localModeScheme; 049 protected Boolean useCombinedInput; 050 protected Long combinedInputMaxSize; 051 protected Boolean combinedInputSafeMode; 052 053 /** 054 * Method setTemporaryDirectory sets the temporary directory on the given properties object. 055 * 056 * @param properties of type Map 057 * @param temporaryDirectory of type String 058 */ 059 public static void setTemporaryDirectory( Map<Object, Object> properties, String temporaryDirectory ) 060 { 061 properties.put( TEMPORARY_DIRECTORY, temporaryDirectory ); 062 } 063 064 /** 065 * Method setLocalModeScheme provides a means to change the scheme value used to detect when a 066 * MapReduce job should be run in Hadoop local mode. By default the value is {@code "file"}, set to 067 * {@code "none"} to disable entirely. 068 * 069 * @param properties of type Map 070 * @param scheme a String 071 */ 072 public static void setLocalModeScheme( Map<Object, Object> properties, String scheme ) 073 { 074 properties.put( LOCAL_MODE_SCHEME, scheme ); 075 } 076 077 /** 078 * Method setUseCombinedInput provides a means to indicate whether to leverage 079 * {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} for the input format. By default it is false. 080 * <p> 081 * Use {@link #setCombinedInputMaxSize(long)} to set the max split/combined input size. Other specific 082 * properties must be specified directly if needed. Specifically "mapred.min.split.size.per.node" and 083 * "mapred.min.split.size.per.rack", which are 0 by default. 084 * 085 * @param properties of type Map 086 * @param combine a boolean 087 */ 088 public static void setUseCombinedInput( Map<Object, Object> properties, Boolean combine ) 089 { 090 if( combine != null ) 091 properties.put( COMBINE_INPUT_FILES, Boolean.toString( combine ) ); 092 } 093 094 /** 095 * Method setUseCombinedInputSafeMode toggles safe mode when using 096 * {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat}. Safe mode will throw an exception if the underlying 097 * InputFormat is not of type {@link org.apache.hadoop.mapred.FileInputFormat}. If safeMode is off a warning will 098 * be logged instead. safeMode is on by default. 099 * <p> 100 * Setting this property when not setting {@link #setUseCombinedInput(boolean)} to true has no effect. 101 * 102 * @param properties of type Map 103 * @param safeMode a boolean 104 */ 105 public static void setUseCombinedInputSafeMode( Map<Object, Object> properties, Boolean safeMode ) 106 { 107 if( safeMode != null ) 108 properties.put( COMBINE_INPUT_FILES_SAFE_MODE, Boolean.toString( safeMode ) ); 109 } 110 111 /** 112 * Method setCombinedInputMaxSize sets the maximum input split size to be used. 113 * <p> 114 * This property is an alias for the Hadoop property "mapred.max.split.size". 115 * 116 * @param properties of type Map 117 * @param size of type long 118 */ 119 public static void setCombinedInputMaxSize( Map<Object, Object> properties, Long size ) 120 { 121 if( size != null ) 122 properties.put( COMBINE_INPUT_FILES_SIZE_MAX, Long.toString( size ) ); 123 } 124 125 /** 126 * Creates a new HfsProps instance. 127 * 128 * @return HfsProps instance 129 */ 130 public static HfsProps hfsProps() 131 { 132 return new HfsProps(); 133 } 134 135 public HfsProps() 136 { 137 } 138 139 public String getTemporaryDirectory() 140 { 141 return temporaryDirectory; 142 } 143 144 /** 145 * Method setTemporaryDirectory sets the temporary directory for use on the underlying filesystem. 146 * 147 * @param temporaryDirectory of type String 148 * @return returns this instance 149 */ 150 public HfsProps setTemporaryDirectory( String temporaryDirectory ) 151 { 152 this.temporaryDirectory = temporaryDirectory; 153 154 return this; 155 } 156 157 public String getLocalModeScheme() 158 { 159 return localModeScheme; 160 } 161 162 /** 163 * Method setLocalModeScheme provides a means to change the scheme value used to detect when a 164 * MapReduce job should be run in Hadoop local mode. By default the value is {@code "file"}, set to 165 * {@code "none"} to disable entirely. 166 * 167 * @param localModeScheme of type String 168 * @return returns this instance 169 */ 170 public HfsProps setLocalModeScheme( String localModeScheme ) 171 { 172 this.localModeScheme = localModeScheme; 173 174 return this; 175 } 176 177 public boolean isUseCombinedInput() 178 { 179 return useCombinedInput; 180 } 181 182 /** 183 * Method setUseCombinedInput provides a means to indicate whether to leverage 184 * {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} for the input format. By default it is false. 185 * 186 * @param useCombinedInput boolean 187 * @return returns this instance 188 */ 189 public HfsProps setUseCombinedInput( boolean useCombinedInput ) 190 { 191 this.useCombinedInput = useCombinedInput; 192 193 return this; 194 } 195 196 public Long getCombinedInputMaxSize() 197 { 198 return combinedInputMaxSize; 199 } 200 201 /** 202 * Method setCombinedInputMaxSize sets the maximum input split size to be used. 203 * <p> 204 * This value is not honored unless {@link #setUseCombinedInput(boolean)} is {@code true}. 205 * 206 * @param combinedInputMaxSize of type long 207 * @return returns this instance 208 */ 209 public HfsProps setCombinedInputMaxSize( long combinedInputMaxSize ) 210 { 211 this.combinedInputMaxSize = combinedInputMaxSize; 212 213 return this; 214 } 215 216 public boolean isUseCombinedInputSafeMode() 217 { 218 return combinedInputSafeMode; 219 } 220 221 /** 222 * Method setUseCombinedInputSafeMode toggles safe mode when using 223 * {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat}. Safe mode will throw an exception if the underlying 224 * InputFormat is not of type {@link org.apache.hadoop.mapred.FileInputFormat}. If safeMode is off a warning will 225 * be logged instead. safeMode is on by default. 226 * <p> 227 * Setting this property when not setting {@link #setUseCombinedInput(boolean)} to true has no effect. 228 * 229 * @param combinedInputSafeMode boolean 230 * @return returns this instance 231 */ 232 public HfsProps setUseCombinedInputSafeMode( boolean combinedInputSafeMode ) 233 { 234 this.combinedInputSafeMode = combinedInputSafeMode; 235 236 return this; 237 } 238 239 @Override 240 protected void addPropertiesTo( Properties properties ) 241 { 242 setTemporaryDirectory( properties, temporaryDirectory ); 243 setLocalModeScheme( properties, localModeScheme ); 244 setUseCombinedInput( properties, useCombinedInput ); 245 setCombinedInputMaxSize( properties, combinedInputMaxSize ); 246 setUseCombinedInputSafeMode( properties, combinedInputSafeMode ); 247 } 248 }