001/*
002 * Copyright (c) 2016-2017 Chris K Wensel. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.nested.core;
022
023import java.util.ArrayList;
024import java.util.List;
025import java.util.regex.Matcher;
026import java.util.regex.Pattern;
027
028import cascading.flow.FlowProcess;
029import cascading.operation.Filter;
030import cascading.operation.FilterCall;
031import cascading.operation.OperationCall;
032import heretical.pointer.path.Pointer;
033
034/**
035 * Class NestedRegexFilter is the base class for {@link Filter} implementations that want to filter a tuple stream
036 * based on the values in a nested object tree.
037 * <p>
038 * {@link cascading.tuple.Tuple} instances are retained if any of the {@link Pattern} instances match.
039 * <p>
040 * Any {@code null} values will be converted to an empty string before being passed to a pattern for matching.
041 */
042public class NestedRegexFilter<Node, Results> extends NestedBaseOperation<Node, Results, Matcher[]> implements Filter<Matcher[]>
043  {
044  private static final String EMPTY = "";
045
046  final Pointer<Node> pointer;
047  final List<Pattern> patterns;
048
049  public NestedRegexFilter( NestedCoercibleType<Node, Results> nestedCoercibleType, String pointer, List<Pattern> patterns )
050    {
051    super( nestedCoercibleType );
052    this.pointer = getNestedPointerCompiler().compile( pointer );
053    this.patterns = new ArrayList<>( patterns );
054    }
055
056  @Override
057  public void prepare( FlowProcess flowProcess, OperationCall<Matcher[]> operationCall )
058    {
059    Matcher[] matchers = new Matcher[ patterns.size() ];
060
061    for( int i = 0; i < patterns.size(); i++ )
062      matchers[ i ] = patterns.get( i ).matcher( "" );
063
064    operationCall.setContext( matchers );
065    }
066
067  @Override
068  public boolean isRemove( FlowProcess flowProcess, FilterCall<Matcher[]> filterCall )
069    {
070    Node node = (Node) filterCall.getArguments().getObject( 0, getCoercibleType() );
071    Node result = pointer.at( node );
072
073    String value = getCoercibleType().coerce( result, String.class );
074
075    if( value == null )
076      value = EMPTY;
077
078    for( Matcher matcher : filterCall.getContext() )
079      {
080      matcher.reset( value );
081
082      if( matcher.find() )
083        return false;
084      }
085
086    return true;
087    }
088  }