001/* 002 * Copyright (c) 2016-2017 Chris K Wensel. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.nested.core; 022 023import java.util.ArrayList; 024import java.util.List; 025import java.util.regex.Matcher; 026import java.util.regex.Pattern; 027 028import cascading.flow.FlowProcess; 029import cascading.operation.Filter; 030import cascading.operation.FilterCall; 031import cascading.operation.OperationCall; 032import heretical.pointer.path.Pointer; 033 034/** 035 * Class NestedRegexFilter is the base class for {@link Filter} implementations that want to filter a tuple stream 036 * based on the values in a nested object tree. 037 * <p> 038 * {@link cascading.tuple.Tuple} instances are retained if any of the {@link Pattern} instances match. 039 * <p> 040 * Any {@code null} values will be converted to an empty string before being passed to a pattern for matching. 041 */ 042public class NestedRegexFilter<Node, Results> extends NestedBaseOperation<Node, Results, Matcher[]> implements Filter<Matcher[]> 043 { 044 private static final String EMPTY = ""; 045 046 final Pointer<Node> pointer; 047 final List<Pattern> patterns; 048 049 public NestedRegexFilter( NestedCoercibleType<Node, Results> nestedCoercibleType, String pointer, List<Pattern> patterns ) 050 { 051 super( nestedCoercibleType ); 052 this.pointer = getNestedPointerCompiler().compile( pointer ); 053 this.patterns = new ArrayList<>( patterns ); 054 } 055 056 @Override 057 public void prepare( FlowProcess flowProcess, OperationCall<Matcher[]> operationCall ) 058 { 059 Matcher[] matchers = new Matcher[ patterns.size() ]; 060 061 for( int i = 0; i < patterns.size(); i++ ) 062 matchers[ i ] = patterns.get( i ).matcher( "" ); 063 064 operationCall.setContext( matchers ); 065 } 066 067 @Override 068 public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher[]> filterCall ) 069 { 070 Node node = (Node) filterCall.getArguments().getObject( 0, getCoercibleType() ); 071 Node result = pointer.at( node ); 072 073 String value = getCoercibleType().coerce( result, String.class ); 074 075 if( value == null ) 076 value = EMPTY; 077 078 for( Matcher matcher : filterCall.getContext() ) 079 { 080 matcher.reset( value ); 081 082 if( matcher.find() ) 083 return false; 084 } 085 086 return true; 087 } 088 }