001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.tap.partition;
023
024import java.util.regex.Pattern;
025
026import cascading.tuple.Fields;
027import cascading.tuple.TupleEntry;
028import cascading.util.Util;
029
030/**
031 * DelimitedPartition is an implementation of the {@link Partition} interface that allows for simple
032 * text delimited paths as partitions.
033 * <p>
034 * For example, given the delimiter {@code -} (dash), a partition path will have dashes.
035 * <p>
036 * Note the delimiter must not be naturally present in any of the values making up the partition.
037 * <p>
038 * The postfix value will be appended to any partition when created, and removed when the partition is parsed. Use
039 * this value to add static filenames to the output path. It is safe to include the delimiter in the postfix value
040 * (e.g '/somepath/filename.csv' where the delimiter is the default '/').
041 */
042public class DelimitedPartition implements Partition
043  {
044  public static final String PATH_DELIM = "/";
045
046  final Fields partitionFields;
047  final String delimiter;
048  final String postfix;
049
050  int numSplits;
051
052  transient Pattern pattern;
053
054  public DelimitedPartition( Fields partitionFields )
055    {
056    this( partitionFields, null, null );
057    }
058
059  public DelimitedPartition( Fields partitionFields, String delimiter )
060    {
061    this( partitionFields, delimiter, null );
062    }
063
064  public DelimitedPartition( Fields partitionFields, String delimiter, String postfix )
065    {
066    if( partitionFields == null )
067      throw new IllegalArgumentException( "partitionFields must not be null" );
068
069    if( !partitionFields.isDefined() )
070      throw new IllegalArgumentException( "partitionFields must be defined, got: " + partitionFields.printVerbose() );
071
072    this.partitionFields = partitionFields;
073    this.delimiter = delimiter == null ? PATH_DELIM : delimiter;
074
075    postfix = Util.isEmpty( postfix ) ? null : postfix.startsWith( this.delimiter ) ? postfix.substring( this.delimiter.length() ) : postfix;
076
077    this.numSplits = partitionFields.size() + ( postfix != null ? postfix.split( this.delimiter ).length : 0 );
078    this.postfix = postfix == null ? null : delimiter + postfix; // prefix the postfix w/ the delimiter
079    }
080
081  @Override
082  public int getPathDepth()
083    {
084    return numSplits;
085    }
086
087  @Override
088  public Fields getPartitionFields()
089    {
090    return partitionFields;
091    }
092
093  protected Pattern getPattern()
094    {
095    if( pattern == null )
096      pattern = Pattern.compile( delimiter );
097
098    return pattern;
099    }
100
101  public String getDelimiter()
102    {
103    return delimiter;
104    }
105
106  public String getPostfix()
107    {
108    return postfix;
109    }
110
111  @Override
112  public void toTuple( String partition, TupleEntry tupleEntry )
113    {
114    if( partition.startsWith( delimiter ) )
115      partition = partition.substring( 1 );
116
117    String[] split = getPattern().split( partition, numSplits );
118
119    tupleEntry.setCanonicalValues( split, 0, partitionFields.size() );
120    }
121
122  @Override
123  public String toPartition( TupleEntry tupleEntry )
124    {
125    String partition = Util.join( tupleEntry.asIterableOf( String.class ), delimiter, true );
126
127    if( postfix != null )
128      partition = partition + postfix; // delimiter prefixed in ctor
129
130    return partition;
131    }
132  }