001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.tap.hadoop;
023
024import java.util.Map;
025import java.util.Properties;
026
027import cascading.property.Props;
028
029/**
030 * Class HfsProps is a fluent helper for setting various Hadoop FS level properties that some
031 * {@link cascading.flow.Flow} may or may not be required to have set. These properties are typically passed to a Flow
032 * via a {@link cascading.flow.FlowConnector}.
033 */
034public class HfsProps extends Props
035  {
036  /** Field TEMPORARY_DIRECTORY */
037  public static final String TEMPORARY_DIRECTORY = "cascading.tmp.dir";
038  /** Fields LOCAL_MODE_SCHEME * */
039  public static final String LOCAL_MODE_SCHEME = "cascading.hadoop.localmode.scheme";
040  /** Field COMBINE_INPUT_FILES */
041  public static final String COMBINE_INPUT_FILES = "cascading.hadoop.hfs.combine.files";
042  /** Field COMBINE_INPUT_FILES_SAFEMODE */
043  public static final String COMBINE_INPUT_FILES_SAFE_MODE = "cascading.hadoop.hfs.combine.safemode";
044  /** Field COMBINE_INPUT_FILES_SIZE_MAX */
045  public static final String COMBINE_INPUT_FILES_SIZE_MAX = "cascading.hadoop.hfs.combine.max.size";
046
047  protected String temporaryDirectory;
048  protected String localModeScheme;
049  protected Boolean useCombinedInput;
050  protected Long combinedInputMaxSize;
051  protected Boolean combinedInputSafeMode;
052
053  /**
054   * Method setTemporaryDirectory sets the temporary directory on the given properties object.
055   *
056   * @param properties         of type Map
057   * @param temporaryDirectory of type String
058   */
059  public static void setTemporaryDirectory( Map<Object, Object> properties, String temporaryDirectory )
060    {
061    properties.put( TEMPORARY_DIRECTORY, temporaryDirectory );
062    }
063
064  /**
065   * Method setLocalModeScheme provides a means to change the scheme value used to detect when a
066   * MapReduce job should be run in Hadoop local mode. By default the value is {@code "file"}, set to
067   * {@code "none"} to disable entirely.
068   *
069   * @param properties of type Map
070   * @param scheme     a String
071   */
072  public static void setLocalModeScheme( Map<Object, Object> properties, String scheme )
073    {
074    properties.put( LOCAL_MODE_SCHEME, scheme );
075    }
076
077  /**
078   * Method setUseCombinedInput provides a means to indicate whether to leverage
079   * {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} for the input format. By default it is false.
080   * <p>
081   * Use {@link #setCombinedInputMaxSize(long)} to set the max split/combined input size. Other specific
082   * properties must be specified directly if needed. Specifically "mapred.min.split.size.per.node" and
083   * "mapred.min.split.size.per.rack", which are 0 by default.
084   *
085   * @param properties of type Map
086   * @param combine    a boolean
087   */
088  public static void setUseCombinedInput( Map<Object, Object> properties, Boolean combine )
089    {
090    if( combine != null )
091      properties.put( COMBINE_INPUT_FILES, Boolean.toString( combine ) );
092    }
093
094  /**
095   * Method setUseCombinedInputSafeMode toggles safe mode when using
096   * {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat}. Safe mode will throw an exception if the underlying
097   * InputFormat is not of type {@link org.apache.hadoop.mapred.FileInputFormat}. If safeMode is off a warning will
098   * be logged instead. safeMode is on by default.
099   * <p>
100   * Setting this property when not setting {@link #setUseCombinedInput(boolean)} to true has no effect.
101   *
102   * @param properties of type Map
103   * @param safeMode   a boolean
104   */
105  public static void setUseCombinedInputSafeMode( Map<Object, Object> properties, Boolean safeMode )
106    {
107    if( safeMode != null )
108      properties.put( COMBINE_INPUT_FILES_SAFE_MODE, Boolean.toString( safeMode ) );
109    }
110
111  /**
112   * Method setCombinedInputMaxSize sets the maximum input split size to be used.
113   * <p>
114   * This property is an alias for the Hadoop property "mapred.max.split.size".
115   *
116   * @param properties of type Map
117   * @param size       of type long
118   */
119  public static void setCombinedInputMaxSize( Map<Object, Object> properties, Long size )
120    {
121    if( size != null )
122      properties.put( COMBINE_INPUT_FILES_SIZE_MAX, Long.toString( size ) );
123    }
124
125  /**
126   * Creates a new HfsProps instance.
127   *
128   * @return HfsProps instance
129   */
130  public static HfsProps hfsProps()
131    {
132    return new HfsProps();
133    }
134
135  public HfsProps()
136    {
137    }
138
139  public String getTemporaryDirectory()
140    {
141    return temporaryDirectory;
142    }
143
144  /**
145   * Method setTemporaryDirectory sets the temporary directory for use on the underlying filesystem.
146   *
147   * @param temporaryDirectory of type String
148   * @return returns this instance
149   */
150  public HfsProps setTemporaryDirectory( String temporaryDirectory )
151    {
152    this.temporaryDirectory = temporaryDirectory;
153
154    return this;
155    }
156
157  public String getLocalModeScheme()
158    {
159    return localModeScheme;
160    }
161
162  /**
163   * Method setLocalModeScheme provides a means to change the scheme value used to detect when a
164   * MapReduce job should be run in Hadoop local mode. By default the value is {@code "file"}, set to
165   * {@code "none"} to disable entirely.
166   *
167   * @param localModeScheme of type String
168   * @return returns this instance
169   */
170  public HfsProps setLocalModeScheme( String localModeScheme )
171    {
172    this.localModeScheme = localModeScheme;
173
174    return this;
175    }
176
177  public boolean isUseCombinedInput()
178    {
179    return useCombinedInput;
180    }
181
182  /**
183   * Method setUseCombinedInput provides a means to indicate whether to leverage
184   * {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat} for the input format. By default it is false.
185   *
186   * @param useCombinedInput boolean
187   * @return returns this instance
188   */
189  public HfsProps setUseCombinedInput( boolean useCombinedInput )
190    {
191    this.useCombinedInput = useCombinedInput;
192
193    return this;
194    }
195
196  public Long getCombinedInputMaxSize()
197    {
198    return combinedInputMaxSize;
199    }
200
201  /**
202   * Method setCombinedInputMaxSize sets the maximum input split size to be used.
203   * <p>
204   * This value is not honored unless {@link #setUseCombinedInput(boolean)} is {@code true}.
205   *
206   * @param combinedInputMaxSize of type long
207   * @return returns this instance
208   */
209  public HfsProps setCombinedInputMaxSize( long combinedInputMaxSize )
210    {
211    this.combinedInputMaxSize = combinedInputMaxSize;
212
213    return this;
214    }
215
216  public boolean isUseCombinedInputSafeMode()
217    {
218    return combinedInputSafeMode;
219    }
220
221  /**
222   * Method setUseCombinedInputSafeMode toggles safe mode when using
223   * {@link org.apache.hadoop.mapred.lib.CombineFileInputFormat}. Safe mode will throw an exception if the underlying
224   * InputFormat is not of type {@link org.apache.hadoop.mapred.FileInputFormat}. If safeMode is off a warning will
225   * be logged instead. safeMode is on by default.
226   * <p>
227   * Setting this property when not setting {@link #setUseCombinedInput(boolean)} to true has no effect.
228   *
229   * @param combinedInputSafeMode boolean
230   * @return returns this instance
231   */
232  public HfsProps setUseCombinedInputSafeMode( boolean combinedInputSafeMode )
233    {
234    this.combinedInputSafeMode = combinedInputSafeMode;
235
236    return this;
237    }
238
239  @Override
240  protected void addPropertiesTo( Properties properties )
241    {
242    setTemporaryDirectory( properties, temporaryDirectory );
243    setLocalModeScheme( properties, localModeScheme );
244    setUseCombinedInput( properties, useCombinedInput );
245    setCombinedInputMaxSize( properties, combinedInputMaxSize );
246    setUseCombinedInputSafeMode( properties, combinedInputSafeMode );
247    }
248  }