001/* 002 * Copyright (c) 2016-2017 Chris K Wensel <chris@wensel.net>. All Rights Reserved. 003 * 004 * Project and contact information: http://www.cascading.org/ 005 * 006 * This file is part of the Cascading project. 007 * 008 * Licensed under the Apache License, Version 2.0 (the "License"); 009 * you may not use this file except in compliance with the License. 010 * You may obtain a copy of the License at 011 * 012 * http://www.apache.org/licenses/LICENSE-2.0 013 * 014 * Unless required by applicable law or agreed to in writing, software 015 * distributed under the License is distributed on an "AS IS" BASIS, 016 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 017 * See the License for the specific language governing permissions and 018 * limitations under the License. 019 */ 020 021package cascading.nested.json.local; 022 023import java.io.IOException; 024import java.io.InputStream; 025import java.io.LineNumberReader; 026import java.io.OutputStream; 027import java.io.PrintWriter; 028import java.util.Properties; 029 030import cascading.flow.FlowProcess; 031import cascading.nested.json.JSONCoercibleType; 032import cascading.scheme.SinkCall; 033import cascading.scheme.SourceCall; 034import cascading.scheme.local.Compressors; 035import cascading.scheme.local.TextLine; 036import cascading.tuple.Fields; 037import cascading.tuple.Tuple; 038import cascading.tuple.TupleEntry; 039import com.fasterxml.jackson.databind.DeserializationFeature; 040import com.fasterxml.jackson.databind.JsonNode; 041import com.fasterxml.jackson.databind.ObjectMapper; 042 043/** 044 * A JSONTextLine is a type of {@link cascading.scheme.Scheme} for JSON text files. Files are broken into 045 * lines, where each line is a JSON object. Either line-feed or carriage-return are used to signal end of line. 046 * <p> 047 * By default, this scheme returns a {@link Tuple} with one field, "json" with the type {@link JSONCoercibleType}. 048 * <p> 049 * Any {@link Fields} object passed to the constructor will have the JSONCoercibleType.TYPE type applied. 050 * <p> 051 * In order to read or write a compressed files, pass a {@link cascading.scheme.local.CompressorScheme.Compressor} 052 * instance to the appropriate constructors. See {@link Compressors} for provided compression algorithms. 053 * 054 * @see Compressors 055 */ 056public class JSONTextLine extends TextLine 057 { 058 public static final Fields DEFAULT_FIELDS = new Fields( "json" ).applyTypes( JSONCoercibleType.TYPE ); 059 060 private ObjectMapper mapper = new ObjectMapper(); 061 062 { 063 // prevents json object from being created with duplicate names at the same level 064 mapper.setConfig( mapper.getDeserializationConfig() 065 .with( DeserializationFeature.FAIL_ON_READING_DUP_TREE_KEY ) ); 066 } 067 068 /** 069 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 070 * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json". 071 */ 072 public JSONTextLine() 073 { 074 this( DEFAULT_FIELDS ); 075 } 076 077 /** 078 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 079 * {@link cascading.flow.local.LocalFlowConnector}. 080 * 081 * @param fields of Fields 082 */ 083 public JSONTextLine( Fields fields ) 084 { 085 this( fields, DEFAULT_CHARSET ); 086 } 087 088 /** 089 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 090 * {@link cascading.flow.local.LocalFlowConnector}. 091 * 092 * @param fields of Fields 093 * @param charsetName of String 094 */ 095 public JSONTextLine( Fields fields, String charsetName ) 096 { 097 this( fields, null, charsetName ); 098 } 099 100 /** 101 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 102 * {@link cascading.flow.local.LocalFlowConnector} returning results with the default field named "json". 103 * 104 * @param compressor of type Compressor, see {@link Compressors} 105 */ 106 public JSONTextLine( Compressor compressor ) 107 { 108 this( DEFAULT_FIELDS, compressor ); 109 } 110 111 /** 112 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 113 * {@link cascading.flow.local.LocalFlowConnector}. 114 * 115 * @param fields of Fields 116 * @param compressor of type Compressor, see {@link Compressors} 117 */ 118 public JSONTextLine( Fields fields, Compressor compressor ) 119 { 120 this( fields, compressor, DEFAULT_CHARSET ); 121 } 122 123 /** 124 * Constructor JSONTextLine creates a new JSONTextLine instance for use with the 125 * {@link cascading.flow.local.LocalFlowConnector}. 126 * 127 * @param fields of Fields 128 * @param compressor of type Compressor, see {@link Compressors} 129 * @param charsetName of String 130 */ 131 public JSONTextLine( Fields fields, Compressor compressor, String charsetName ) 132 { 133 if( fields == null ) 134 throw new IllegalArgumentException( "fields may not be null" ); 135 136 if( !fields.isDefined() ) 137 throw new IllegalArgumentException( "fields argument must declare a single field" ); 138 139 if( fields.size() != 1 ) 140 throw new IllegalArgumentException( "may only declare a single source/sink field in the fields argument" ); 141 142 fields = fields.hasTypes() ? fields : fields.applyTypes( JSONCoercibleType.TYPE ); 143 144 setSinkFields( fields ); 145 setSourceFields( fields ); 146 147 setCompressor( compressor ); 148 149 // throws an exception if not found 150 setCharsetName( charsetName ); 151 } 152 153 @Override 154 public boolean source( FlowProcess<? extends Properties> flowProcess, SourceCall<LineNumberReader, InputStream> sourceCall ) throws IOException 155 { 156 String line = sourceCall.getContext().readLine(); 157 158 if( line == null ) 159 return false; 160 161 TupleEntry incomingEntry = sourceCall.getIncomingEntry(); 162 163 JsonNode jsonNode = null; 164 165 if( !line.isEmpty() ) 166 jsonNode = mapper.readTree( line.getBytes() ); 167 168 incomingEntry.setObject( 0, jsonNode ); 169 170 return true; 171 } 172 173 @Override 174 public void sink( FlowProcess<? extends Properties> flowProcess, SinkCall<PrintWriter, OutputStream> sinkCall ) throws IOException 175 { 176 JsonNode jsonNode = (JsonNode) sinkCall.getOutgoingEntry().getTuple().getObject( 0 ); 177 178 if( jsonNode == null ) 179 { 180 sinkCall.getContext().println(); 181 } 182 else 183 { 184 String string = mapper.writeValueAsString( jsonNode ); 185 186 sinkCall.getContext().println( string ); 187 } 188 } 189 }