001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved. 004 * 005 * Project and contact information: http://www.cascading.org/ 006 * 007 * This file is part of the Cascading project. 008 * 009 * Licensed under the Apache License, Version 2.0 (the "License"); 010 * you may not use this file except in compliance with the License. 011 * You may obtain a copy of the License at 012 * 013 * http://www.apache.org/licenses/LICENSE-2.0 014 * 015 * Unless required by applicable law or agreed to in writing, software 016 * distributed under the License is distributed on an "AS IS" BASIS, 017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 018 * See the License for the specific language governing permissions and 019 * limitations under the License. 020 */ 021 022package cascading.tuple.hadoop.util; 023 024import java.io.Serializable; 025import java.util.Arrays; 026import java.util.Comparator; 027 028import cascading.tuple.Hasher; 029import cascading.tuple.StreamComparator; 030import cascading.tuple.hadoop.io.BufferedInputStream; 031import org.apache.hadoop.io.WritableComparator; 032 033/** 034 * Class BytesComparator is used to compare arrays of bytes. 035 * <p> 036 * Note that BytesComparator implements {@link Hasher}, but for the Hasher interface to be applied during grouping, 037 * sorting or joining, it must be set on a {@link cascading.tuple.Fields} instance via 038 * {@link cascading.tuple.Fields#setComparator(Comparable, java.util.Comparator)}. 039 */ 040public class BytesComparator implements StreamComparator<BufferedInputStream>, Hasher<byte[]>, Comparator<byte[]>, Serializable 041 { 042 @Override 043 public int compare( byte[] lhs, byte[] rhs ) 044 { 045 if( lhs == rhs ) 046 return 0; 047 048 return WritableComparator.compareBytes( lhs, 0, lhs.length, rhs, 0, rhs.length ); 049 } 050 051 @Override 052 public int compare( BufferedInputStream lhsStream, BufferedInputStream rhsStream ) 053 { 054 byte[] lhs = lhsStream.getBuffer(); 055 int lhsPos = lhsStream.getPosition(); 056 int lhsLen = readLen( lhs, lhsPos ); 057 058 lhsStream.skip( lhsLen + 4 ); 059 060 byte[] rhs = rhsStream.getBuffer(); 061 int rhsPos = rhsStream.getPosition(); 062 int rhsLen = readLen( rhs, rhsPos ); 063 064 rhsStream.skip( rhsLen + 4 ); 065 066 return WritableComparator.compareBytes( lhs, lhsPos + 4, lhsLen, rhs, rhsPos + 4, rhsLen ); 067 } 068 069 private int readLen( byte[] buffer, int off ) 070 { 071 return ( ( buffer[ off ] & 0xff ) << 24 ) + 072 ( ( buffer[ off + 1 ] & 0xff ) << 16 ) + 073 ( ( buffer[ off + 2 ] & 0xff ) << 8 ) + 074 ( buffer[ off + 3 ] & 0xff ); 075 } 076 077 @Override 078 public int hashCode( byte[] value ) 079 { 080 return Arrays.hashCode( value ); 081 } 082 }