001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.tap.partition; 023 024import java.util.regex.Pattern; 025 026import cascading.tuple.Fields; 027import cascading.tuple.TupleEntry; 028import cascading.util.Util; 029 030/** 031 * DelimitedPartition is an implementation of the {@link Partition} interface that allows for simple 032 * text delimited paths as partitions. 033 * <p> 034 * For example, given the delimiter {@code -} (dash), a partition path will have dashes. 035 * <p> 036 * Note the delimiter must not be naturally present in any of the values making up the partition. 037 * <p> 038 * The postfix value will be appended to any partition when created, and removed when the partition is parsed. Use 039 * this value to add static filenames to the output path. It is safe to include the delimiter in the postfix value 040 * (e.g '/somepath/filename.csv' where the delimiter is the default '/'). 041 */ 042public class DelimitedPartition implements Partition 043 { 044 public static final String PATH_DELIM = "/"; 045 046 final Fields partitionFields; 047 final String delimiter; 048 final String postfix; 049 050 int numSplits; 051 052 transient Pattern pattern; 053 054 public DelimitedPartition( Fields partitionFields ) 055 { 056 this( partitionFields, null, null ); 057 } 058 059 public DelimitedPartition( Fields partitionFields, String delimiter ) 060 { 061 this( partitionFields, delimiter, null ); 062 } 063 064 public DelimitedPartition( Fields partitionFields, String delimiter, String postfix ) 065 { 066 if( partitionFields == null ) 067 throw new IllegalArgumentException( "partitionFields must not be null" ); 068 069 if( !partitionFields.isDefined() ) 070 throw new IllegalArgumentException( "partitionFields must be defined, got: " + partitionFields.printVerbose() ); 071 072 this.partitionFields = partitionFields; 073 this.delimiter = delimiter == null ? PATH_DELIM : delimiter; 074 075 postfix = Util.isEmpty( postfix ) ? null : postfix.startsWith( this.delimiter ) ? postfix.substring( this.delimiter.length() ) : postfix; 076 077 this.numSplits = partitionFields.size() + ( postfix != null ? postfix.split( this.delimiter ).length : 0 ); 078 this.postfix = postfix == null ? null : delimiter + postfix; // prefix the postfix w/ the delimiter 079 } 080 081 @Override 082 public int getPathDepth() 083 { 084 return numSplits; 085 } 086 087 @Override 088 public Fields getPartitionFields() 089 { 090 return partitionFields; 091 } 092 093 protected Pattern getPattern() 094 { 095 if( pattern == null ) 096 pattern = Pattern.compile( delimiter ); 097 098 return pattern; 099 } 100 101 public String getDelimiter() 102 { 103 return delimiter; 104 } 105 106 public String getPostfix() 107 { 108 return postfix; 109 } 110 111 @Override 112 public void toTuple( String partition, TupleEntry tupleEntry ) 113 { 114 if( partition.startsWith( delimiter ) ) 115 partition = partition.substring( 1 ); 116 117 String[] split = getPattern().split( partition, numSplits ); 118 119 tupleEntry.setCanonicalValues( split, 0, partitionFields.size() ); 120 } 121 122 @Override 123 public String toPartition( TupleEntry tupleEntry ) 124 { 125 String partition = Util.join( tupleEntry.asIterableOf( String.class ), delimiter, true ); 126 127 if( postfix != null ) 128 partition = partition + postfix; // delimiter prefixed in ctor 129 130 return partition; 131 } 132 }