001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.stats.hadoop; 023 024import java.io.IOException; 025 026import cascading.flow.FlowException; 027import cascading.flow.FlowNode; 028import cascading.flow.FlowStep; 029import cascading.flow.planner.BaseFlowStep; 030import cascading.management.state.ClientState; 031import cascading.stats.BaseCachedStepStats; 032import cascading.util.Util; 033import org.apache.hadoop.conf.Configuration; 034import org.apache.hadoop.mapred.JobClient; 035import org.apache.hadoop.mapred.JobConf; 036import org.apache.hadoop.mapred.RunningJob; 037import org.apache.hadoop.mapreduce.Counters; 038import org.apache.hadoop.mapreduce.Job; 039import org.apache.hadoop.mapreduce.TaskCompletionEvent; 040import org.slf4j.Logger; 041import org.slf4j.LoggerFactory; 042 043/** Class HadoopStepStats provides Hadoop specific statistics and methods to underlying Hadoop facilities. */ 044public abstract class HadoopStepStats extends BaseCachedStepStats<Configuration, RunningJob, Counters> 045 { 046 private static final Logger LOG = LoggerFactory.getLogger( HadoopStepStats.class ); 047 048 private HadoopNodeStats mapperNodeStats; 049 private HadoopNodeStats reducerNodeStats; 050 051 protected static Job getJob( RunningJob runningJob ) 052 { 053 if( runningJob == null ) // if null, job hasn't been submitted 054 return null; 055 056 Job job = Util.returnInstanceFieldIfExistsSafe( runningJob, "job" ); 057 058 if( job == null ) 059 { 060 LOG.warn( "unable to get underlying org.apache.hadoop.mapreduce.Job from org.apache.hadoop.mapred.RunningJob, task level task counters will be unavailable" ); 061 return null; 062 } 063 064 return job; 065 } 066 067 protected HadoopStepStats( FlowStep<JobConf> flowStep, ClientState clientState ) 068 { 069 super( flowStep, clientState ); 070 071 BaseFlowStep<JobConf> step = (BaseFlowStep<JobConf>) flowStep; 072 073 // don't rely on the iterator topological sort to identify mapper or reducer 074 for( FlowNode current : step.getFlowNodeGraph().vertexSet() ) 075 { 076 if( step.getFlowNodeGraph().inDegreeOf( current ) == 0 ) 077 { 078 if( mapperNodeStats != null ) 079 throw new IllegalStateException( "mapper node already found" ); 080 081 mapperNodeStats = new HadoopNodeStats( this, getConfig(), HadoopSliceStats.Kind.MAPPER, current, clientState ); 082 addNodeStats( mapperNodeStats ); 083 } 084 else 085 { 086 if( reducerNodeStats != null ) 087 throw new IllegalStateException( "reducer node already found" ); 088 089 reducerNodeStats = new HadoopNodeStats( this, getConfig(), HadoopSliceStats.Kind.REDUCER, current, clientState ); 090 addNodeStats( reducerNodeStats ); 091 } 092 } 093 094 if( mapperNodeStats == null ) 095 throw new IllegalStateException( "mapper node not found" ); 096 097 counterCache = new HadoopStepCounterCache( this, (Configuration) getConfig() ) 098 { 099 @Override 100 protected RunningJob getJobStatusClient() 101 { 102 return HadoopStepStats.this.getJobStatusClient(); 103 } 104 }; 105 } 106 107 private Configuration getConfig() 108 { 109 return (Configuration) this.getFlowStep().getConfig(); 110 } 111 112 /** 113 * Method getNumMapTasks returns the numMapTasks from the Hadoop job file. 114 * 115 * @return the numMapTasks (type int) of this HadoopStepStats object. 116 */ 117 public int getNumMapTasks() 118 { 119 return mapperNodeStats.getChildren().size(); 120 } 121 122 /** 123 * Method getNumReduceTasks returns the numReducerTasks from the Hadoop job file. 124 * 125 * @return the numReducerTasks (type int) of this HadoopStepStats object. 126 */ 127 public int getNumReduceTasks() 128 { 129 return reducerNodeStats == null ? 0 : reducerNodeStats.getChildren().size(); 130 } 131 132 @Override 133 public String getProcessStepID() 134 { 135 if( getJobStatusClient() == null ) 136 return null; 137 138 return getJobStatusClient().getJobID().toString(); 139 } 140 141 /** 142 * Method getJobClient returns the Hadoop {@link JobClient} managing this Hadoop job. 143 * 144 * @return the jobClient (type JobClient) of this HadoopStepStats object. 145 */ 146 public abstract JobClient getJobClient(); 147 148 /** 149 * Returns the underlying Map tasks progress percentage. 150 * <p> 151 * This method is experimental. 152 * 153 * @return float 154 */ 155 public float getMapProgress() 156 { 157 Job runningJob = getJob( getJobStatusClient() ); 158 159 if( runningJob == null ) 160 return 0; 161 162 try 163 { 164 return runningJob.mapProgress(); 165 } 166 catch( IOException exception ) 167 { 168 throw new FlowException( "unable to get progress" ); 169 } 170 } 171 172 /** 173 * Returns the underlying Reduce tasks progress percentage. 174 * <p> 175 * This method is experimental. 176 * 177 * @return float 178 */ 179 public float getReduceProgress() 180 { 181 Job runningJob = getJob( getJobStatusClient() ); 182 183 if( runningJob == null ) 184 return 0; 185 186 try 187 { 188 return runningJob.reduceProgress(); 189 } 190 catch( IOException exception ) 191 { 192 throw new FlowException( "unable to get progress" ); 193 } 194 } 195 196 @Override 197 public String getProcessStatusURL() 198 { 199 return getStatusURL(); 200 } 201 202 /** 203 * @deprecated see {@link #getProcessStatusURL()} 204 */ 205 @Deprecated 206 public String getStatusURL() 207 { 208 Job runningJob = getJob( getJobStatusClient() ); 209 210 if( runningJob == null ) 211 return null; 212 213 return runningJob.getTrackingURL(); 214 } 215 216 /** Method captureDetail captures statistics task details and completion events. */ 217 @Override 218 public synchronized void captureDetail( Type depth ) 219 { 220 if( !getType().isChild( depth ) || !isDetailStale() ) 221 return; 222 223 Job runningJob = getJob( getJobStatusClient() ); 224 225 if( runningJob == null ) 226 return; 227 228 try 229 { 230 mapperNodeStats.captureDetail( depth ); 231 232 if( reducerNodeStats != null ) 233 reducerNodeStats.captureDetail( depth ); 234 235 int count = 0; 236 237 while( depth == Type.ATTEMPT ) 238 { 239 TaskCompletionEvent[] events = runningJob.getTaskCompletionEvents( count ); 240 241 if( events.length == 0 ) 242 break; 243 244 addAttemptsToTaskStats( events ); 245 count += events.length; 246 } 247 248 markDetailCaptured(); 249 } 250 catch( IOException exception ) 251 { 252 LOG.warn( "unable to get task stats", exception ); 253 } 254 } 255 256 private void addAttemptsToTaskStats( TaskCompletionEvent[] events ) 257 { 258 for( TaskCompletionEvent event : events ) 259 { 260 if( event == null ) 261 { 262 LOG.warn( "found empty completion event" ); 263 continue; 264 } 265 266 if( event.isMapTask() ) 267 mapperNodeStats.addAttempt( event ); 268 else 269 reducerNodeStats.addAttempt( event ); 270 } 271 } 272 }