001/*
002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved.
003 *
004 * Project and contact information: http://www.cascading.org/
005 *
006 * This file is part of the Cascading project.
007 *
008 * Licensed under the Apache License, Version 2.0 (the "License");
009 * you may not use this file except in compliance with the License.
010 * You may obtain a copy of the License at
011 *
012 *     http://www.apache.org/licenses/LICENSE-2.0
013 *
014 * Unless required by applicable law or agreed to in writing, software
015 * distributed under the License is distributed on an "AS IS" BASIS,
016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
017 * See the License for the specific language governing permissions and
018 * limitations under the License.
019 */
020
021package cascading.nested.json.local;
022
023import java.io.IOException;
024import java.io.InputStream;
025import java.io.LineNumberReader;
026import java.io.OutputStream;
027import java.io.PrintWriter;
028import java.util.Properties;
029
030import cascading.flow.FlowProcess;
031import cascading.nested.json.JSONCoercibleType;
032import cascading.scheme.SinkCall;
033import cascading.scheme.SourceCall;
034import cascading.scheme.local.Compressors;
035import cascading.scheme.local.TextLine;
036import cascading.tuple.Fields;
037import cascading.tuple.Tuple;
038import cascading.tuple.TupleEntry;
039import com.fasterxml.jackson.databind.DeserializationFeature;
040import com.fasterxml.jackson.databind.JsonNode;
041import com.fasterxml.jackson.databind.ObjectMapper;
042
043/**
044 * A JSONTextLine is a type of {@link cascading.scheme.Scheme} for JSON text files. Files are broken into
045 * lines, where each line is a JSON object. Either line-feed or carriage-return are used to signal end of line.
046 * <p>
047 * By default, this scheme returns a {@link Tuple} with one field, "json" with the type {@link JSONCoercibleType}.
048 * <p>
049 * Any {@link Fields} object passed to the constructor will have the JSONCoercibleType.TYPE type applied.
050 * <p>
051 * In order to read or write a compressed files, pass a {@link cascading.scheme.local.CompressorScheme.Compressor}
052 * instance to the appropriate constructors. See {@link Compressors} for provided compression algorithms.
053 *
054 * @see Compressors
055 */
056public class JSONTextLine extends TextLine
057  {
058  public static final Fields DEFAULT_FIELDS = new Fields( "json" ).applyTypes( JSONCoercibleType.TYPE );
059
060  private ObjectMapper mapper = new ObjectMapper();
061
062  {
063  // prevents json object from being created with duplicate names at the same level
064  mapper.setConfig( mapper.getDeserializationConfig()
065    .with( DeserializationFeature.FAIL_ON_READING_DUP_TREE_KEY ) );
066  }
067
068  /**
069   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
070   * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json".
071   */
072  public JSONTextLine()
073    {
074    this( DEFAULT_FIELDS );
075    }
076
077  /**
078   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
079   * {@link cascading.flow.local.LocalFlowConnector}.
080   *
081   * @param fields of Fields
082   */
083  public JSONTextLine( Fields fields )
084    {
085    this( fields, DEFAULT_CHARSET );
086    }
087
088  /**
089   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
090   * {@link cascading.flow.local.LocalFlowConnector}.
091   *
092   * @param fields      of Fields
093   * @param charsetName of String
094   */
095  public JSONTextLine( Fields fields, String charsetName )
096    {
097    this( fields, null, charsetName );
098    }
099
100  /**
101   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
102   * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json".
103   *
104   * @param compressor of type Compressor, see {@link Compressors}
105   */
106  public JSONTextLine( Compressor compressor )
107    {
108    this( DEFAULT_FIELDS, compressor );
109    }
110
111  /**
112   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
113   * {@link cascading.flow.local.LocalFlowConnector}.
114   *
115   * @param fields     of Fields
116   * @param compressor of type Compressor, see {@link Compressors}
117   */
118  public JSONTextLine( Fields fields, Compressor compressor )
119    {
120    this( fields, compressor, DEFAULT_CHARSET );
121    }
122
123  /**
124   * Constructor JSONTextLine creates a new JSONTextLine instance for use with the
125   * {@link cascading.flow.local.LocalFlowConnector}.
126   *
127   * @param fields      of Fields
128   * @param compressor  of type Compressor, see {@link Compressors}
129   * @param charsetName of String
130   */
131  public JSONTextLine( Fields fields, Compressor compressor, String charsetName )
132    {
133    if( fields == null )
134      throw new IllegalArgumentException( "fields may not be null" );
135
136    if( !fields.isDefined() )
137      throw new IllegalArgumentException( "fields argument must declare a single field" );
138
139    if( fields.size() != 1 )
140      throw new IllegalArgumentException( "may only declare a single source/sink field in the fields argument" );
141
142    fields = fields.hasTypes() ? fields : fields.applyTypes( JSONCoercibleType.TYPE );
143
144    setSinkFields( fields );
145    setSourceFields( fields );
146
147    setCompressor( compressor );
148
149    // throws an exception if not found
150    setCharsetName( charsetName );
151    }
152
153  @Override
154  public boolean source( FlowProcess<? extends Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException
155    {
156    String line = sourceCall.getContext().readLine();
157
158    if( line == null )
159      return false;
160
161    TupleEntry incomingEntry = sourceCall.getIncomingEntry();
162
163    JsonNode jsonNode = null;
164
165    if( !line.isEmpty() )
166      jsonNode = mapper.readTree( line.getBytes() );
167
168    incomingEntry.setObject( 0, jsonNode );
169
170    return true;
171    }
172
173  @Override
174  public void sink( FlowProcess<? extends Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) throws IOException
175    {
176    JsonNode jsonNode = (JsonNode) sinkCall.getOutgoingEntry().getTuple().getObject( 0 );
177
178    if( jsonNode == null )
179      {
180      sinkCall.getContext().println();
181      }
182    else
183      {
184      String string = mapper.writeValueAsString( jsonNode );
185
186      sinkCall.getContext().println( string );
187      }
188    }
189  }