001/* 002 * Copyright (c) 2016-2017 Chris K Wensel. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.operation.regex; 023 024import java.beans.ConstructorProperties; 025import java.util.regex.Matcher; 026 027import cascading.flow.FlowProcess; 028import cascading.operation.Function; 029import cascading.operation.FunctionCall; 030import cascading.operation.OperationCall; 031import cascading.tuple.Fields; 032import cascading.tuple.Tuple; 033import cascading.tuple.TupleEntry; 034import cascading.util.Pair; 035 036/** 037 * Class RegexGenerator will emit a new Tuple for every matched regex group. 038 * <p> 039 * Note a {@code null} valued argument passed to the parser will be converted to an empty string ({@code ""}) before 040 * the regex is applied. 041 * <p> 042 * Any Object value will be coerced to a String type if type information is provided. See the 043 * {@link cascading.tuple.type.CoercibleType} interface to control how custom Object types are converted to String 044 * values. 045 */ 046public class RegexGenerator extends RegexOperation<Pair<Matcher, TupleEntry>> implements Function<Pair<Matcher, TupleEntry>> 047 { 048 /** 049 * Constructor RegexGenerator creates a new RegexGenerator instance. 050 * 051 * @param patternString of type String 052 */ 053 @ConstructorProperties({"patternString"}) 054 public RegexGenerator( String patternString ) 055 { 056 super( 1, Fields.size( 1 ), patternString ); 057 } 058 059 /** 060 * Constructor RegexGenerator creates a new RegexGenerator instance. 061 * 062 * @param fieldDeclaration of type Fields 063 * @param patternString of type String 064 */ 065 @ConstructorProperties({"fieldDeclaration", "patternString"}) 066 public RegexGenerator( Fields fieldDeclaration, String patternString ) 067 { 068 super( 1, fieldDeclaration, patternString ); 069 070 if( fieldDeclaration.size() != 1 ) 071 throw new IllegalArgumentException( "fieldDeclaration may only declare one field, was " + fieldDeclaration.print() ); 072 } 073 074 @Override 075 public void prepare( FlowProcess flowProcess, OperationCall<Pair<Matcher, TupleEntry>> operationCall ) 076 { 077 TupleEntry tupleEntry = new TupleEntry( operationCall.getDeclaredFields(), Tuple.size( 1 ) ); 078 079 operationCall.setContext( new Pair<>( getPattern().matcher( "" ), tupleEntry ) ); 080 } 081 082 @Override 083 public void operate( FlowProcess flowProcess, FunctionCall<Pair<Matcher, TupleEntry>> functionCall ) 084 { 085 String value = functionCall.getArguments().getString( 0 ); 086 087 if( value == null ) 088 value = ""; 089 090 Matcher matcher = functionCall.getContext().getLhs().reset( value ); 091 092 while( matcher.find() ) 093 { 094 functionCall.getContext().getRhs().setString( 0, matcher.group() ); 095 functionCall.getOutputCollector().add( functionCall.getContext().getRhs() ); 096 } 097 } 098 }