001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 * Copyright (c) 2007-2017 Xplenty, Inc. All Rights Reserved.
004 *
005 * Project and contact information: http://www.cascading.org/
006 *
007 * This file is part of the Cascading project.
008 *
009 * Licensed under the Apache License, Version 2.0 (the "License");
010 * you may not use this file except in compliance with the License.
011 * You may obtain a copy of the License at
012 *
013 *     http://www.apache.org/licenses/LICENSE-2.0
014 *
015 * Unless required by applicable law or agreed to in writing, software
016 * distributed under the License is distributed on an "AS IS" BASIS,
017 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
018 * See the License for the specific language governing permissions and
019 * limitations under the License.
020 */
021
022package cascading.tuple.hadoop.util;
023
024import java.io.Serializable;
025import java.util.Arrays;
026import java.util.Comparator;
027
028import cascading.tuple.Hasher;
029import cascading.tuple.StreamComparator;
030import cascading.tuple.hadoop.io.BufferedInputStream;
031import org.apache.hadoop.io.WritableComparator;
032
033/**
034 * Class BytesComparator is used to compare arrays of bytes.
035 * <p>
036 * Note that BytesComparator implements {@link Hasher}, but for the Hasher interface to be applied during grouping,
037 * sorting or joining, it must be set on a {@link cascading.tuple.Fields} instance via
038 * {@link cascading.tuple.Fields#setComparator(Comparable, java.util.Comparator)}.
039 */
040public class BytesComparator implements StreamComparator<BufferedInputStream>, Hasher<byte[]>, Comparator<byte[]>, Serializable
041  {
042  @Override
043  public int compare( byte[] lhs, byte[] rhs )
044    {
045    if( lhs == rhs )
046      return 0;
047
048    return WritableComparator.compareBytes( lhs, 0, lhs.length, rhs, 0, rhs.length );
049    }
050
051  @Override
052  public int compare( BufferedInputStream lhsStream, BufferedInputStream rhsStream )
053    {
054    byte[] lhs = lhsStream.getBuffer();
055    int lhsPos = lhsStream.getPosition();
056    int lhsLen = readLen( lhs, lhsPos );
057
058    lhsStream.skip( lhsLen + 4 );
059
060    byte[] rhs = rhsStream.getBuffer();
061    int rhsPos = rhsStream.getPosition();
062    int rhsLen = readLen( rhs, rhsPos );
063
064    rhsStream.skip( rhsLen + 4 );
065
066    return WritableComparator.compareBytes( lhs, lhsPos + 4, lhsLen, rhs, rhsPos + 4, rhsLen );
067    }
068
069  private int readLen( byte[] buffer, int off )
070    {
071    return ( ( buffer[ off ] & 0xff ) << 24 ) +
072      ( ( buffer[ off + 1 ] & 0xff ) << 16 ) +
073      ( ( buffer[ off + 2 ] & 0xff ) << 8 ) +
074      ( buffer[ off + 3 ] & 0xff );
075    }
076
077  @Override
078  public int hashCode( byte[] value )
079    {
080    return Arrays.hashCode( value );
081    }
082  }