001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.stats.hadoop;
023
024import java.io.IOException;
025
026import cascading.flow.FlowException;
027import cascading.flow.FlowNode;
028import cascading.flow.FlowStep;
029import cascading.flow.planner.BaseFlowStep;
030import cascading.management.state.ClientState;
031import cascading.stats.BaseCachedStepStats;
032import cascading.util.Util;
033import org.apache.hadoop.conf.Configuration;
034import org.apache.hadoop.mapred.JobClient;
035import org.apache.hadoop.mapred.JobConf;
036import org.apache.hadoop.mapred.RunningJob;
037import org.apache.hadoop.mapreduce.Counters;
038import org.apache.hadoop.mapreduce.Job;
039import org.apache.hadoop.mapreduce.TaskCompletionEvent;
040import org.slf4j.Logger;
041import org.slf4j.LoggerFactory;
042
043/** Class HadoopStepStats provides Hadoop specific statistics and methods to underlying Hadoop facilities. */
044public abstract class HadoopStepStats extends BaseCachedStepStats<Configuration, RunningJob, Counters>
045  {
046  private static final Logger LOG = LoggerFactory.getLogger( HadoopStepStats.class );
047
048  private HadoopNodeStats mapperNodeStats;
049  private HadoopNodeStats reducerNodeStats;
050
051  protected static Job getJob( RunningJob runningJob )
052    {
053    if( runningJob == null ) // if null, job hasn't been submitted
054      return null;
055
056    Job job = Util.returnInstanceFieldIfExistsSafe( runningJob, "job" );
057
058    if( job == null )
059      {
060      LOG.warn( "unable to get underlying org.apache.hadoop.mapreduce.Job from org.apache.hadoop.mapred.RunningJob, task level task counters will be unavailable" );
061      return null;
062      }
063
064    return job;
065    }
066
067  protected HadoopStepStats( FlowStep<JobConf> flowStep, ClientState clientState )
068    {
069    super( flowStep, clientState );
070
071    BaseFlowStep<JobConf> step = (BaseFlowStep<JobConf>) flowStep;
072
073    // don't rely on the iterator topological sort to identify mapper or reducer
074    for( FlowNode current : step.getFlowNodeGraph().vertexSet() )
075      {
076      if( step.getFlowNodeGraph().inDegreeOf( current ) == 0 )
077        {
078        if( mapperNodeStats != null )
079          throw new IllegalStateException( "mapper node already found" );
080
081        mapperNodeStats = new HadoopNodeStats( this, getConfig(), HadoopSliceStats.Kind.MAPPER, current, clientState );
082        addNodeStats( mapperNodeStats );
083        }
084      else
085        {
086        if( reducerNodeStats != null )
087          throw new IllegalStateException( "reducer node already found" );
088
089        reducerNodeStats = new HadoopNodeStats( this, getConfig(), HadoopSliceStats.Kind.REDUCER, current, clientState );
090        addNodeStats( reducerNodeStats );
091        }
092      }
093
094    if( mapperNodeStats == null )
095      throw new IllegalStateException( "mapper node not found" );
096
097    counterCache = new HadoopStepCounterCache( this, (Configuration) getConfig() )
098      {
099      @Override
100      protected RunningJob getJobStatusClient()
101        {
102        return HadoopStepStats.this.getJobStatusClient();
103        }
104      };
105    }
106
107  private Configuration getConfig()
108    {
109    return (Configuration) this.getFlowStep().getConfig();
110    }
111
112  /**
113   * Method getNumMapTasks returns the numMapTasks from the Hadoop job file.
114   *
115   * @return the numMapTasks (type int) of this HadoopStepStats object.
116   */
117  public int getNumMapTasks()
118    {
119    return mapperNodeStats.getChildren().size();
120    }
121
122  /**
123   * Method getNumReduceTasks returns the numReducerTasks from the Hadoop job file.
124   *
125   * @return the numReducerTasks (type int) of this HadoopStepStats object.
126   */
127  public int getNumReduceTasks()
128    {
129    return reducerNodeStats == null ? 0 : reducerNodeStats.getChildren().size();
130    }
131
132  @Override
133  public String getProcessStepID()
134    {
135    if( getJobStatusClient() == null )
136      return null;
137
138    return getJobStatusClient().getJobID().toString();
139    }
140
141  /**
142   * Method getJobClient returns the Hadoop {@link JobClient} managing this Hadoop job.
143   *
144   * @return the jobClient (type JobClient) of this HadoopStepStats object.
145   */
146  public abstract JobClient getJobClient();
147
148  /**
149   * Returns the underlying Map tasks progress percentage.
150   * <p>
151   * This method is experimental.
152   *
153   * @return float
154   */
155  public float getMapProgress()
156    {
157    Job runningJob = getJob( getJobStatusClient() );
158
159    if( runningJob == null )
160      return 0;
161
162    try
163      {
164      return runningJob.mapProgress();
165      }
166    catch( IOException exception )
167      {
168      throw new FlowException( "unable to get progress" );
169      }
170    }
171
172  /**
173   * Returns the underlying Reduce tasks progress percentage.
174   * <p>
175   * This method is experimental.
176   *
177   * @return float
178   */
179  public float getReduceProgress()
180    {
181    Job runningJob = getJob( getJobStatusClient() );
182
183    if( runningJob == null )
184      return 0;
185
186    try
187      {
188      return runningJob.reduceProgress();
189      }
190    catch( IOException exception )
191      {
192      throw new FlowException( "unable to get progress" );
193      }
194    }
195
196  @Override
197  public String getProcessStatusURL()
198    {
199    return getStatusURL();
200    }
201
202  /**
203   * @deprecated see {@link #getProcessStatusURL()}
204   */
205  @Deprecated
206  public String getStatusURL()
207    {
208    Job runningJob = getJob( getJobStatusClient() );
209
210    if( runningJob == null )
211      return null;
212
213    return runningJob.getTrackingURL();
214    }
215
216  /** Method captureDetail captures statistics task details and completion events. */
217  @Override
218  public synchronized void captureDetail( Type depth )
219    {
220    if( !getType().isChild( depth ) || !isDetailStale() )
221      return;
222
223    Job runningJob = getJob( getJobStatusClient() );
224
225    if( runningJob == null )
226      return;
227
228    try
229      {
230      mapperNodeStats.captureDetail( depth );
231
232      if( reducerNodeStats != null )
233        reducerNodeStats.captureDetail( depth );
234
235      int count = 0;
236
237      while( depth == Type.ATTEMPT )
238        {
239        TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents( count );
240
241        if( events.length == 0 )
242          break;
243
244        addAttemptsToTaskStats( events );
245        count += events.length;
246        }
247
248      markDetailCaptured();
249      }
250    catch( IOException exception )
251      {
252      LOG.warn( "unable to get task stats", exception );
253      }
254    }
255
256  private void addAttemptsToTaskStats( TaskCompletionEvent[] events )
257    {
258    for( TaskCompletionEvent event : events )
259      {
260      if( event == null )
261        {
262        LOG.warn( "found empty completion event" );
263        continue;
264        }
265
266      if( event.isMapTask() )
267        mapperNodeStats.addAttempt( event );
268      else
269        reducerNodeStats.addAttempt( event );
270      }
271    }
272  }