From f7f61e097c7ecf7befaea192ffaf36fab1c754c7 Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Thu, 3 Feb 2022 13:28:08 -0600 Subject: [PATCH 1/2] Updating IOUtils.toCanonicalFilePath to work in a spring boot application --- .../elasticsearch/hadoop/util/IOUtils.java | 46 +++++++++++-- .../hadoop/util/IOUtilsTest.java | 64 +++++++++++++++++++ 2 files changed, 103 insertions(+), 7 deletions(-) diff --git a/mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java b/mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java index 8e29468f7..7de2355d4 100644 --- a/mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java +++ b/mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java @@ -33,6 +33,8 @@ import java.net.URI; import java.net.URISyntaxException; import java.net.URL; +import java.net.URLConnection; +import java.net.URLStreamHandler; import java.util.Properties; import javax.xml.bind.DatatypeConverter; @@ -237,14 +239,44 @@ public static String toCanonicalFilePath(URL fileURL) throws URISyntaxException, JarURLConnection jarURLConnection = (JarURLConnection) fileURL.openConnection(); fileURL = jarURLConnection.getJarFileURL(); } + /* + * Ordinarily at this point we would have a URL with a "file" protocal. But Spring boot puts the es-hadoop jar is inside of the + * spring boot jar like: + * jar:file:/some/path/outer.jar!/BOOT-INF/lib/elasticsearch-hadoop-7.17.0.jar!/org/elasticsearch/hadoop/util/Version.class + * And spring boot has its own custom URLStreamHandler which returns a URL with a "jar" protocol from the previous call to + * getJarFileURL() (the default JDK URLStreamHandler does not do this). So this next check is Spring Boot specific. + */ + final boolean isSpringBootJarInsideJar; + final String innerJarFilePath; + if ("jar".equals(fileURL.getProtocol())) { + JarURLConnection jarURLConnection = (JarURLConnection) fileURL.openConnection(); + innerJarFilePath = jarURLConnection.getEntryName(); + fileURL = jarURLConnection.getJarFileURL(); + isSpringBootJarInsideJar = true; + } else { + isSpringBootJarInsideJar = false; + innerJarFilePath = null; + } - URI fileURI = fileURL.toURI(); - File file = new File(fileURI); - - // Use filesystem to resolve any sym links or dots in the path to - // a singular unique file path - File canonicalFile = file.getCanonicalFile(); + String canonicalString; + if ("file".equals(fileURL.getProtocol())) { + URI fileURI = fileURL.toURI(); + File file = new File(fileURI); - return canonicalFile.toURI().toString(); + // Use filesystem to resolve any sym links or dots in the path to + // a singular unique file path + File canonicalFile = file.getCanonicalFile(); + canonicalString = canonicalFile.toURI().toString(); + } else { + /* + * In the event that some custom classloader is doing strange things and we don't have a file URL here, better to output + * whatever URL it gives us rather than fail + */ + canonicalString = fileURL.toString(); + } + if (isSpringBootJarInsideJar) { + canonicalString = canonicalString + "!/" + innerJarFilePath; + } + return canonicalString; } } \ No newline at end of file diff --git a/mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java b/mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java index 1a22b3b48..c46520c6f 100644 --- a/mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java +++ b/mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java @@ -23,9 +23,16 @@ import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; +import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.OutputStreamWriter; +import java.net.JarURLConnection; +import java.net.MalformedURLException; +import java.net.URL; +import java.net.URLConnection; +import java.net.URLStreamHandler; +import java.util.jar.JarFile; import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException; import org.junit.Test; @@ -64,4 +71,61 @@ public void openNonExistingFile() throws Exception { fail("Shouldn't pass"); } + @Test + public void testToCanonicalFile() throws Exception { + String file = "file:/some/path/org/elasticsearch/hadoop/util/Version.class"; + URL url = new URL(file); + String canonicalFilePath = IOUtils.toCanonicalFilePath(url); + assertEquals(file, canonicalFilePath); + + url = new URL("jar:file:/some/path/elasticsearch-hadoop-7.17.0.jar!/org/elasticsearch/hadoop/util/Version.class"); + canonicalFilePath = IOUtils.toCanonicalFilePath(url); + assertEquals("file:/some/path/elasticsearch-hadoop-7.17.0.jar", canonicalFilePath); + + url = new URL("file:/some/path/../path/org/elasticsearch/hadoop/util/Version.class"); + canonicalFilePath = IOUtils.toCanonicalFilePath(url); + assertEquals("file:/some/path/org/elasticsearch/hadoop/util/Version.class", canonicalFilePath); + } + + @Test + public void testToCanonicalFileSpringBoot() throws Exception { + String jarWithinJarPath = "file:/some/path/outer.jar!/BOOT-INF/lib/elasticsearch-hadoop-7.17.0.jar"; + String file = jarWithinJarPath + "!/org/elasticsearch/hadoop/util/Version.class"; + URL url = new URL("jar", "", -1, file, new SpringBootURLStreamHandler(jarWithinJarPath) ); + String canonicalFilePath = IOUtils.toCanonicalFilePath(url); + assertEquals(jarWithinJarPath, canonicalFilePath); + } + + /** + * This class simulates what Spring Boot's URLStreamHandler does. + */ + private static class SpringBootURLStreamHandler extends URLStreamHandler { + private final String jarWithinJarPath; + public SpringBootURLStreamHandler(String jarWithinJarPath) { + this.jarWithinJarPath = jarWithinJarPath; + } + + @Override + protected URLConnection openConnection(URL url) throws IOException { + return new JarURLConnection(url) { + @Override + public JarFile getJarFile() throws IOException { + return null; + } + + @Override + public void connect() throws IOException { + } + + @Override + public URL getJarFileURL() { + try { + return new URL("jar:" + jarWithinJarPath); + } catch (MalformedURLException e) { + throw new RuntimeException(e); + } + } + }; + } + } } \ No newline at end of file From f5b63f85d41a2f08b13322f3f2c9f5e25d38ffb2 Mon Sep 17 00:00:00 2001 From: Keith Massey Date: Thu, 3 Feb 2022 13:41:32 -0600 Subject: [PATCH 2/2] Cleaning up --- .../org/elasticsearch/hadoop/util/IOUtils.java | 15 ++++++--------- .../elasticsearch/hadoop/util/IOUtilsTest.java | 2 +- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java b/mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java index 7de2355d4..a33fd97f9 100644 --- a/mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java +++ b/mr/src/main/java/org/elasticsearch/hadoop/util/IOUtils.java @@ -246,16 +246,13 @@ public static String toCanonicalFilePath(URL fileURL) throws URISyntaxException, * And spring boot has its own custom URLStreamHandler which returns a URL with a "jar" protocol from the previous call to * getJarFileURL() (the default JDK URLStreamHandler does not do this). So this next check is Spring Boot specific. */ - final boolean isSpringBootJarInsideJar; - final String innerJarFilePath; + final String springBootInnerJarFilePath; if ("jar".equals(fileURL.getProtocol())) { JarURLConnection jarURLConnection = (JarURLConnection) fileURL.openConnection(); - innerJarFilePath = jarURLConnection.getEntryName(); + springBootInnerJarFilePath = jarURLConnection.getEntryName(); fileURL = jarURLConnection.getJarFileURL(); - isSpringBootJarInsideJar = true; } else { - isSpringBootJarInsideJar = false; - innerJarFilePath = null; + springBootInnerJarFilePath = null; } String canonicalString; @@ -267,6 +264,9 @@ public static String toCanonicalFilePath(URL fileURL) throws URISyntaxException, // a singular unique file path File canonicalFile = file.getCanonicalFile(); canonicalString = canonicalFile.toURI().toString(); + if (springBootInnerJarFilePath != null) { + canonicalString = "jar:" + canonicalString + "!/" + springBootInnerJarFilePath; + } } else { /* * In the event that some custom classloader is doing strange things and we don't have a file URL here, better to output @@ -274,9 +274,6 @@ public static String toCanonicalFilePath(URL fileURL) throws URISyntaxException, */ canonicalString = fileURL.toString(); } - if (isSpringBootJarInsideJar) { - canonicalString = canonicalString + "!/" + innerJarFilePath; - } return canonicalString; } } \ No newline at end of file diff --git a/mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java b/mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java index c46520c6f..0c2793be0 100644 --- a/mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java +++ b/mr/src/test/java/org/elasticsearch/hadoop/util/IOUtilsTest.java @@ -93,7 +93,7 @@ public void testToCanonicalFileSpringBoot() throws Exception { String file = jarWithinJarPath + "!/org/elasticsearch/hadoop/util/Version.class"; URL url = new URL("jar", "", -1, file, new SpringBootURLStreamHandler(jarWithinJarPath) ); String canonicalFilePath = IOUtils.toCanonicalFilePath(url); - assertEquals(jarWithinJarPath, canonicalFilePath); + assertEquals("jar:" + jarWithinJarPath, canonicalFilePath); } /**