Skip to content

Commit 5b4a454

Browse files
authored
Replacing the use of Java object serialization with Jackson (#2080)
1 parent 8007115 commit 5b4a454

File tree

15 files changed

+147
-113
lines changed

15 files changed

+147
-113
lines changed

mr/src/main/java/org/elasticsearch/hadoop/rest/PartitionDefinition.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ public static class PartitionDefinitionBuilder {
4747

4848
private PartitionDefinitionBuilder(Settings settings, Mapping resolvedMapping) {
4949
this.serializedSettings = settings == null ? null : settings.save();
50-
this.serializedMapping = resolvedMapping == null ? null : IOUtils.serializeToBase64(resolvedMapping);
50+
this.serializedMapping = resolvedMapping == null ? null : IOUtils.serializeToJsonString(resolvedMapping);
5151
}
5252

5353
public PartitionDefinition build(String index, int shardId) {

mr/src/main/java/org/elasticsearch/hadoop/rest/RestService.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ public static PartitionReader createReader(Settings settings, PartitionDefinitio
419419
RestRepository repository = new RestRepository(settings);
420420
Mapping fieldMapping = null;
421421
if (StringUtils.hasText(partition.getSerializedMapping())) {
422-
fieldMapping = IOUtils.deserializeFromBase64(partition.getSerializedMapping());
422+
fieldMapping = IOUtils.deserializeFromJsonString(partition.getSerializedMapping(), Mapping.class);
423423
}
424424
else {
425425
log.warn(String.format("No mapping found for [%s] - either no index exists or the partition configuration has been corrupted", partition));

mr/src/main/java/org/elasticsearch/hadoop/serialization/dto/mapping/Field.java

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@
2121
import java.io.Serializable;
2222
import java.util.Arrays;
2323
import java.util.Collection;
24+
import java.util.Objects;
2425

2526
import org.elasticsearch.hadoop.serialization.FieldType;
27+
import org.elasticsearch.hadoop.thirdparty.codehaus.jackson.annotate.JsonCreator;
28+
import org.elasticsearch.hadoop.thirdparty.codehaus.jackson.annotate.JsonProperty;
2629

27-
@SuppressWarnings("serial")
2830
public class Field implements Serializable {
2931

3032
static final Field[] NO_FIELDS = new Field[0];
@@ -41,20 +43,24 @@ public Field(String name, FieldType type, Collection<Field> properties) {
4143
this(name, type, (properties != null ? properties.toArray(new Field[properties.size()]) : NO_FIELDS));
4244
}
4345

44-
Field(String name, FieldType type, Field[] properties) {
46+
@JsonCreator
47+
Field(@JsonProperty("name") String name, @JsonProperty("type") FieldType type, @JsonProperty("properties") Field[] properties) {
4548
this.name = name;
4649
this.type = type;
4750
this.properties = properties;
4851
}
4952

53+
@JsonProperty("properties")
5054
public Field[] properties() {
5155
return properties;
5256
}
5357

58+
@JsonProperty("type")
5459
public FieldType type() {
5560
return type;
5661
}
5762

63+
@JsonProperty("name")
5864
public String name() {
5965
return name;
6066
}
@@ -63,4 +69,15 @@ public String name() {
6369
public String toString() {
6470
return String.format("%s=%s", name, ((type == FieldType.OBJECT || type == FieldType.NESTED) ? Arrays.toString(properties) : type));
6571
}
72+
73+
@Override
74+
public boolean equals(Object o) {
75+
if (o instanceof Field == false) {
76+
return false;
77+
}
78+
Field other = (Field) o;
79+
return Objects.equals(this.name, other.name) &&
80+
Objects.equals(this.type, other.type) &&
81+
Objects.deepEquals(this.properties, other.properties);
82+
}
6683
}

mr/src/main/java/org/elasticsearch/hadoop/serialization/dto/mapping/Mapping.java

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919

2020
package org.elasticsearch.hadoop.serialization.dto.mapping;
2121

22+
import org.elasticsearch.hadoop.serialization.FieldType;
23+
import org.elasticsearch.hadoop.serialization.field.FieldFilter;
24+
import org.elasticsearch.hadoop.thirdparty.codehaus.jackson.annotate.JsonProperty;
25+
2226
import java.io.Serializable;
2327
import java.util.ArrayList;
2428
import java.util.Arrays;
@@ -27,9 +31,7 @@
2731
import java.util.LinkedHashMap;
2832
import java.util.List;
2933
import java.util.Map;
30-
31-
import org.elasticsearch.hadoop.serialization.FieldType;
32-
import org.elasticsearch.hadoop.serialization.field.FieldFilter;
34+
import java.util.Objects;
3335

3436
/**
3537
* A mapping has a name and a collection of fields.
@@ -53,7 +55,7 @@ public Mapping(String index, String name, Collection<Field> fields) {
5355
this(index, name, (fields != null ? fields.toArray(new Field[fields.size()]) : Field.NO_FIELDS));
5456
}
5557

56-
Mapping(String index, String type, Field[] fields) {
58+
Mapping(@JsonProperty("index") String index, @JsonProperty("type") String type, @JsonProperty("fields") Field[] fields) {
5759
this.index = index;
5860
this.type = type;
5961
this.fields = fields;
@@ -154,4 +156,15 @@ public String toString() {
154156
return String.format("%s=%s", index, Arrays.toString(fields));
155157
}
156158
}
159+
160+
@Override
161+
public boolean equals(Object o) {
162+
if (o instanceof Mapping == false) {
163+
return false;
164+
}
165+
Mapping other = (Mapping) o;
166+
return Objects.equals(this.index, other.index) &&
167+
Objects.equals(this.type, other.type) &&
168+
Objects.deepEquals(this.fields, other.fields);
169+
}
157170
}

mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java

Lines changed: 24 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -18,31 +18,25 @@
1818
*/
1919
package org.elasticsearch.hadoop.util;
2020

21+
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
22+
import org.elasticsearch.hadoop.serialization.EsHadoopSerializationException;
23+
import org.elasticsearch.hadoop.thirdparty.codehaus.jackson.map.ObjectMapper;
24+
import org.elasticsearch.hadoop.thirdparty.codehaus.jackson.map.SerializationConfig;
25+
2126
import java.io.ByteArrayInputStream;
2227
import java.io.Closeable;
2328
import java.io.File;
2429
import java.io.IOException;
2530
import java.io.InputStream;
26-
import java.io.ObjectInputStream;
27-
import java.io.ObjectOutputStream;
28-
import java.io.Serializable;
2931
import java.io.StringReader;
3032
import java.io.StringWriter;
3133
import java.lang.reflect.Field;
3234
import java.net.JarURLConnection;
3335
import java.net.URI;
3436
import java.net.URISyntaxException;
3537
import java.net.URL;
36-
import java.net.URLConnection;
37-
import java.net.URLStreamHandler;
3838
import java.util.Properties;
3939

40-
import javax.xml.bind.DatatypeConverter;
41-
42-
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
43-
import org.elasticsearch.hadoop.EsHadoopIllegalStateException;
44-
import org.elasticsearch.hadoop.serialization.EsHadoopSerializationException;
45-
4640
/**
4741
* Utility class used internally for the Pig support.
4842
*/
@@ -55,42 +49,38 @@ public abstract class IOUtils {
5549
ReflectionUtils.makeAccessible(BYTE_ARRAY_BUFFER);
5650
}
5751

58-
public static String serializeToBase64(Serializable object) {
52+
private static final ObjectMapper mapper = new ObjectMapper().configure(SerializationConfig.Feature.FAIL_ON_EMPTY_BEANS, false);
53+
54+
/**
55+
* This method serializes object into a json String using jackson. The object must support jackson serialization.
56+
*/
57+
public static String serializeToJsonString(Object object) {
5958
if (object == null) {
6059
return StringUtils.EMPTY;
6160
}
62-
FastByteArrayOutputStream baos = new FastByteArrayOutputStream();
63-
ObjectOutputStream oos = null;
61+
final String json;
6462
try {
65-
oos = new ObjectOutputStream(baos);
66-
oos.writeObject(object);
63+
json = mapper.writeValueAsString(object);
6764
} catch (IOException ex) {
68-
throw new EsHadoopSerializationException("Cannot serialize object " + object, ex);
69-
} finally {
70-
close(oos);
65+
throw new EsHadoopSerializationException("Cannot serialize object: " + object, ex);
7166
}
72-
return DatatypeConverter.printBase64Binary(baos.bytes().bytes());
67+
return json;
7368
}
7469

75-
@SuppressWarnings("unchecked")
76-
public static <T extends Serializable> T deserializeFromBase64(String data) {
70+
/**
71+
* This method deserializes a String that was created by serializeToJsonString
72+
*/
73+
public static <T> T deserializeFromJsonString(String data, Class<T> clazz) {
7774
if (!StringUtils.hasLength(data)) {
7875
return null;
7976
}
80-
81-
byte[] rawData = DatatypeConverter.parseBase64Binary(data);
82-
ObjectInputStream ois = null;
77+
final T object;
8378
try {
84-
ois = new ObjectInputStream(new FastByteArrayInputStream(rawData));
85-
Object o = ois.readObject();
86-
return (T) o;
87-
} catch (ClassNotFoundException ex) {
88-
throw new EsHadoopIllegalStateException("cannot deserialize object", ex);
89-
} catch (IOException ex) {
90-
throw new EsHadoopSerializationException("cannot deserialize object", ex);
91-
} finally {
92-
close(ois);
79+
object = mapper.readValue(data, clazz);
80+
} catch (IOException e) {
81+
throw new EsHadoopSerializationException("Cannot deserialize string: [" + data + "]", e);
9382
}
83+
return object;
9484
}
9585

9686
public static String propsToString(Properties props) {

mr/src/main/java/org/elasticsearch/hadoop/util/SettingsUtils.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@
2929

3030
import java.net.InetAddress;
3131
import java.net.UnknownHostException;
32-
import java.util.ArrayList;
3332
import java.util.Collection;
3433
import java.util.LinkedHashMap;
3534
import java.util.LinkedHashSet;
@@ -177,11 +176,11 @@ public static void setFilters(Settings settings, String... filters) {
177176
return;
178177
}
179178

180-
settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_QUERY_FILTERS, IOUtils.serializeToBase64(filters));
179+
settings.setProperty(InternalConfigurationOptions.INTERNAL_ES_QUERY_FILTERS, IOUtils.serializeToJsonString(filters));
181180
}
182181

183182
public static String[] getFilters(Settings settings) {
184-
return IOUtils.deserializeFromBase64(settings.getProperty(InternalConfigurationOptions.INTERNAL_ES_QUERY_FILTERS));
183+
return IOUtils.deserializeFromJsonString(settings.getProperty(InternalConfigurationOptions.INTERNAL_ES_QUERY_FILTERS), String[].class);
185184
}
186185

187186
public static String determineSourceFields(Settings settings) {

mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,19 @@
3232
import java.net.URL;
3333
import java.net.URLConnection;
3434
import java.net.URLStreamHandler;
35+
import java.util.ArrayList;
36+
import java.util.List;
3537
import java.util.jar.JarFile;
3638

3739
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
40+
import org.elasticsearch.hadoop.serialization.EsHadoopSerializationException;
41+
import org.elasticsearch.hadoop.serialization.FieldType;
42+
import org.elasticsearch.hadoop.serialization.dto.mapping.Field;
43+
import org.elasticsearch.hadoop.serialization.dto.mapping.Mapping;
3844
import org.junit.Test;
3945

46+
import static junit.framework.TestCase.assertNull;
47+
import static org.junit.Assert.assertArrayEquals;
4048
import static org.junit.Assert.assertEquals;
4149
import static org.junit.Assert.assertNotNull;
4250
import static org.junit.Assert.fail;
@@ -96,6 +104,23 @@ public void testToCanonicalFileSpringBoot() throws Exception {
96104
assertEquals("jar:" + jarWithinJarPath, canonicalFilePath);
97105
}
98106

107+
@Test
108+
public void testDeserializeFromJsonString() {
109+
assertNull(IOUtils.deserializeFromJsonString("", String.class));
110+
try {
111+
IOUtils.deserializeFromJsonString("junk", String.class);
112+
fail("Should have thrown an EsHadoopIllegalArgumentException");
113+
} catch (EsHadoopSerializationException expected) {}
114+
List<Field> fieldsList = new ArrayList<>();
115+
fieldsList.add(new Field("%s", FieldType.TEXT));
116+
Mapping mapping = new Mapping("*", "*", fieldsList);
117+
Mapping roundTripMapping = IOUtils.deserializeFromJsonString(IOUtils.serializeToJsonString(mapping), Mapping.class);
118+
assertEquals(mapping, roundTripMapping);
119+
String[] filters = new String[]{"{\"exists\":{\"field\":\"id\"}}", "{\"match\":{\"id\":1}}"};
120+
String[] roundTripFilters = IOUtils.deserializeFromJsonString(IOUtils.serializeToJsonString(filters), String[].class);
121+
assertArrayEquals(filters, roundTripFilters);
122+
}
123+
99124
/**
100125
* This class simulates what Spring Boot's URLStreamHandler does.
101126
*/

pig/src/main/java/org/elasticsearch/hadoop/pig/EsStorage.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ public void checkSchema(ResourceSchema s) throws IOException {
142142
// save schema to back-end for JSON translation
143143
if (!StringUtils.hasText(props.getProperty(ResourceSchema.class.getName()))) {
144144
// save the schema as String (used JDK serialization since toString() screws up the signature - see the testcase)
145-
props.setProperty(ResourceSchema.class.getName(), IOUtils.serializeToBase64(s));
145+
props.setProperty(ResourceSchema.class.getName(), IOUtils.serializeToJsonString(s));
146146
}
147147
}
148148

@@ -204,7 +204,7 @@ public void prepareToWrite(RecordWriter writer) throws IOException {
204204
this.schema = new ResourceSchema();
205205
}
206206
else {
207-
this.schema = IOUtils.deserializeFromBase64(s);
207+
this.schema = IOUtils.deserializeFromJsonString(s, ResourceSchema.class);
208208
}
209209
this.pigTuple = new PigTuple(schema);
210210
}

pig/src/test/java/org/elasticsearch/hadoop/pig/PigSchemaSaveTest.java

Lines changed: 0 additions & 56 deletions
This file was deleted.

spark/sql-13/src/main/scala/org/elasticsearch/spark/sql/DefaultSource.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,7 @@ private[sql] case class ElasticsearchRelation(parameters: Map[String, String], @
209209
if (Utils.LOGGER.isTraceEnabled()) {
210210
Utils.LOGGER.trace(s"Transformed filters into DSL ${filterString.mkString("[", ",", "]")}")
211211
}
212-
paramWithScan += (InternalConfigurationOptions.INTERNAL_ES_QUERY_FILTERS -> IOUtils.serializeToBase64(filterString))
212+
paramWithScan += (InternalConfigurationOptions.INTERNAL_ES_QUERY_FILTERS -> IOUtils.serializeToJsonString(filterString))
213213
}
214214
else {
215215
if (Utils.LOGGER.isTraceEnabled()) {

0 commit comments

Comments
 (0)