001/*
002 * Copyright (c) 2016-2017 Chris K Wensel. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.operation.regex;
023
024import java.beans.ConstructorProperties;
025import java.util.regex.Matcher;
026
027import cascading.flow.FlowProcess;
028import cascading.operation.Function;
029import cascading.operation.FunctionCall;
030import cascading.operation.OperationCall;
031import cascading.tuple.Fields;
032import cascading.tuple.Tuple;
033import cascading.tuple.TupleEntry;
034import cascading.util.Pair;
035
036/**
037 * Class RegexGenerator will emit a new Tuple for every matched regex group.
038 * <p>
039 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before
040 * the regex is applied.
041 * <p>
042 * Any Object value will be coerced to a String type if type information is provided. See the
043 * {@link cascading.tuple.type.CoercibleType} interface to control how custom Object types are converted to String
044 * values.
045 */
046public class RegexGenerator extends RegexOperation<Pair<Matcher, TupleEntry>> implements Function<Pair<Matcher, TupleEntry>>
047  {
048  /**
049   * Constructor RegexGenerator creates a new RegexGenerator instance.
050   *
051   * @param patternString of type String
052   */
053  @ConstructorProperties({"patternString"})
054  public RegexGenerator( String patternString )
055    {
056    super( 1, Fields.size( 1 ), patternString );
057    }
058
059  /**
060   * Constructor RegexGenerator creates a new RegexGenerator instance.
061   *
062   * @param fieldDeclaration of type Fields
063   * @param patternString    of type String
064   */
065  @ConstructorProperties({"fieldDeclaration", "patternString"})
066  public RegexGenerator( Fields fieldDeclaration, String patternString )
067    {
068    super( 1, fieldDeclaration, patternString );
069
070    if( fieldDeclaration.size() != 1 )
071      throw new IllegalArgumentException( "fieldDeclaration may only declare one field, was " + fieldDeclaration.print() );
072    }
073
074  @Override
075  public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, TupleEntry>> operationCall )
076    {
077    TupleEntry tupleEntry = new TupleEntry( operationCall.getDeclaredFields(), Tuple.size( 1 ) );
078
079    operationCall.setContext( new Pair<>( getPattern().matcher( "" ), tupleEntry ) );
080    }
081
082  @Override
083  public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall )
084    {
085    String value = functionCall.getArguments().getString( 0 );
086
087    if( value == null )
088      value = "";
089
090    Matcher matcher = functionCall.getContext().getLhs().reset( value );
091
092    while( matcher.find() )
093      {
094      functionCall.getContext().getRhs().setString( 0, matcher.group() );
095      functionCall.getOutputCollector().add( functionCall.getContext().getRhs() );
096      }
097    }
098  }