diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index b016a29a86be1..7ec8f94a62925 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -94,7 +94,7 @@ jobs: tpcds=false docker=false fi - build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive"` + build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,graphx,catalyst,hive-thriftserver,streaming,sql-kafka-0-10,streaming-kafka-0-10,mllib-local,mllib,yarn,mesos,kubernetes,hadoop-cloud,spark-ganglia-lgpl,sql,hive,connect,protobuf,api"` precondition=" { \"build\": \"$build\", diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index ea3d7d2a63caf..8657755b8d0ea 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 3.5.3 +Version: 3.5.4 Title: R Front End for 'Apache Spark' Description: Provides an R Front end for 'Apache Spark' . Authors@R: diff --git a/assembly/pom.xml b/assembly/pom.xml index 9c47b16865791..47b38621d6400 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/build/mvn b/build/mvn index 3179099304c7a..2c778fd6c71a7 100755 --- a/build/mvn +++ b/build/mvn @@ -56,7 +56,7 @@ install_app() { local binary="${_DIR}/$6" local remote_tarball="${mirror_host}/${url_path}${url_query}" local local_checksum="${local_tarball}.${checksum_suffix}" - local remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}" + local remote_checksum="${mirror_host}/${url_path}.${checksum_suffix}${url_query}" local curl_opts="--silent --show-error -L" local wget_opts="--no-verbose" diff --git a/common/kvstore/pom.xml b/common/kvstore/pom.xml index 46f4be085728c..3757f69e9bd17 100644 --- a/common/kvstore/pom.xml +++ b/common/kvstore/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 9704e4aeee933..83243d183b7b9 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 25c1b785961c2..e74fb05beb0ae 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 3c95492690393..13c4b5cca1e32 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 2bf35c713923d..709bbed0c553c 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 509e067dd246a..59e9973c42d05 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 8b0130e551292..e222499eec228 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/utils/pom.xml b/common/utils/pom.xml index a402affc88229..7b2a1ad57b0ff 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java b/common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java similarity index 100% rename from common/utils/src/main/scala/org/apache/spark/unsafe/array/ByteArrayUtils.java rename to common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java diff --git a/connector/avro/pom.xml b/connector/avro/pom.xml index 5380c3705f4f0..1a6fe528b9168 100644 --- a/connector/avro/pom.xml +++ b/connector/avro/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala index 2554106d78e9d..67e4583fe4822 100644 --- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala +++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala @@ -29,6 +29,7 @@ import org.apache.avro.mapred.{AvroOutputFormat, FsInput} import org.apache.avro.mapreduce.AvroJob import org.apache.hadoop.conf.Configuration import org.apache.hadoop.fs.FileStatus +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.mapreduce.Job import org.apache.spark.SparkException @@ -140,6 +141,8 @@ private[sql] object AvroUtils extends Logging { try { Some(DataFileReader.openReader(in, new GenericDatumReader[GenericRecord]())) } catch { + case e: BlockMissingException => + throw new SparkException(s"Could not read file: $path", e) case e: IOException => if (ignoreCorruptFiles) { logWarning(s"Skipped the footer in the corrupted file: $path", e) diff --git a/connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala b/connector/avro/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala similarity index 100% rename from connector/avro/src/main/java/org/apache/spark/sql/avro/CustomDecimal.scala rename to connector/avro/src/main/scala/org/apache/spark/sql/avro/CustomDecimal.scala diff --git a/connector/connect/client/jvm/pom.xml b/connector/connect/client/jvm/pom.xml index 56b66be3f7744..695146d7a1113 100644 --- a/connector/connect/client/jvm/pom.xml +++ b/connector/connect/client/jvm/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../../../pom.xml diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala index 61d08912aec23..3ae9b9fc73b48 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/test/IntegrationTestUtils.scala @@ -74,7 +74,7 @@ object IntegrationTestUtils { // Redirect server log into console "--conf", - s"spark.driver.extraJavaOptions=-Dlog4j.configuration=$log4j2") + s"spark.driver.extraJavaOptions=-Dlog4j.configurationFile=$log4j2") } else Seq.empty } diff --git a/connector/connect/common/pom.xml b/connector/connect/common/pom.xml index 2d209746dc7fc..6c50469717f95 100644 --- a/connector/connect/common/pom.xml +++ b/connector/connect/common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../../pom.xml diff --git a/connector/connect/server/pom.xml b/connector/connect/server/pom.xml index 983d4f087a686..aeadbacb7c692 100644 --- a/connector/connect/server/pom.xml +++ b/connector/connect/server/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../../pom.xml diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala index 0e4f344da901c..93a1757cd687a 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/ExecuteHolder.scala @@ -21,6 +21,7 @@ import java.util.UUID import scala.collection.JavaConverters._ import scala.collection.mutable +import scala.util.control.NonFatal import org.apache.spark.{SparkEnv, SparkSQLException} import org.apache.spark.connect.proto @@ -237,7 +238,14 @@ private[connect] class ExecuteHolder( // it does. responseObserver.removeAll() // post closed to UI - eventsManager.postClosed() + try { + eventsManager.postClosed() + } catch { + // Catching the exception to prevent the wrong error code from being returned to the + // user: SPARK-49688. The issue was fixed by completely refactoring the code in Spark 4.0. + case e: Throwable if NonFatal.apply(e) => + logError(s"Error posting closed event to UI: ${e.getMessage()}") + } } // interrupt any attached grpcResponseSenders grpcResponseSenders.foreach(_.interrupt()) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala index 393b832de878e..b8363c15e6ba2 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectReattachExecuteHandler.scala @@ -29,6 +29,11 @@ class SparkConnectReattachExecuteHandler( extends Logging { def handle(v: proto.ReattachExecuteRequest): Unit = { + // An exception will be raised if the session is not available. + val sessionHolder = + SparkConnectService.getIsolatedSession(v.getUserContext.getUserId, v.getSessionId) + assert(sessionHolder != null) + val executeHolder = SparkConnectService.executionManager .getExecuteHolder(ExecuteKey(v.getUserContext.getUserId, v.getSessionId, v.getOperationId)) .getOrElse { diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala index e8af2acfd2e27..edf8147eff85a 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/service/SparkConnectService.scala @@ -360,6 +360,13 @@ object SparkConnectService extends Logging { userSessionMapping.invalidateAll() } + /** + * Used for testing + */ + private[connect] def invalidateSession(userId: String, sessionId: String): Unit = { + userSessionMapping.invalidate((userId, sessionId)) + } + /** * Used for testing. */ diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala index eddd1c6be72b1..234ee526d438a 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/SparkConnectServerTest.scala @@ -52,10 +52,6 @@ trait SparkConnectServerTest extends SharedSparkSession { withSparkEnvConfs((Connect.CONNECT_GRPC_BINDING_PORT.key, serverPort.toString)) { SparkConnectService.start(spark.sparkContext) } - // register udf directly on the server, we're not testing client UDFs here... - val serverSession = - SparkConnectService.getOrCreateIsolatedSession(defaultUserId, defaultSessionId).session - serverSession.udf.register("sleep", ((ms: Int) => { Thread.sleep(ms); ms })) } override def afterAll(): Unit = { diff --git a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala index 06cd1a5666b66..00de9fb6fd260 100644 --- a/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala +++ b/connector/connect/server/src/test/scala/org/apache/spark/sql/connect/execution/ReattachableExecuteSuite.scala @@ -56,6 +56,23 @@ class ReattachableExecuteSuite extends SparkConnectServerTest { } } + test("reattach after connection expired") { + withClient { client => + withRawBlockingStub { stub => + // emulate session expiration + SparkConnectService.invalidateSession(defaultUserId, defaultSessionId) + + // session closed, bound to fail immediately + val operationId = UUID.randomUUID().toString + val iter = stub.reattachExecute(buildReattachExecuteRequest(operationId, None)) + val e = intercept[StatusRuntimeException] { + iter.next() + } + assert(e.getMessage.contains("INVALID_HANDLE.SESSION_NOT_FOUND")) + } + } + } + test("raw interrupted RPC results in INVALID_CURSOR.DISCONNECTED error") { withRawBlockingStub { stub => val iter = stub.executePlan(buildExecutePlanRequest(buildPlan(MEDIUM_RESULTS_QUERY))) @@ -347,6 +364,10 @@ class ReattachableExecuteSuite extends SparkConnectServerTest { } test("long sleeping query") { + // register udf directly on the server, we're not testing client UDFs here... + val serverSession = + SparkConnectService.getOrCreateIsolatedSession(defaultUserId, defaultSessionId).session + serverSession.udf.register("sleep", ((ms: Int) => { Thread.sleep(ms); ms })) // query will be sleeping and not returning results, while having multiple reattach withSparkEnvConfs( (Connect.CONNECT_EXECUTE_REATTACHABLE_SENDER_MAX_STREAM_DURATION.key, "1s")) { diff --git a/connector/docker-integration-tests/pom.xml b/connector/docker-integration-tests/pom.xml index 6f48d2775b4cd..435c0fbd797aa 100644 --- a/connector/docker-integration-tests/pom.xml +++ b/connector/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala index de8fcf1a4a787..78fdbe7158bb7 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala @@ -22,7 +22,11 @@ import java.sql.Connection import org.scalatest.time.SpanSugar._ import org.apache.spark.{SparkConf, SparkSQLFeatureNotSupportedException} +import org.apache.spark.rdd.RDD import org.apache.spark.sql.AnalysisException +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.execution.{RowDataSourceScanExec, SparkPlan} +import org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.jdbc.MsSQLServerDatabaseOnDocker import org.apache.spark.sql.types._ @@ -39,6 +43,17 @@ import org.apache.spark.tags.DockerTest @DockerTest class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCTest { + def getExternalEngineQuery(executedPlan: SparkPlan): String = { + getExternalEngineRdd(executedPlan).asInstanceOf[JDBCRDD].getExternalEngineQuery + } + + def getExternalEngineRdd(executedPlan: SparkPlan): RDD[InternalRow] = { + val queryNode = executedPlan.collect { case r: RowDataSourceScanExec => + r + }.head + queryNode.rdd + } + override def excluded: Seq[String] = Seq( "simple scan with OFFSET", "simple scan with LIMIT and OFFSET", @@ -137,4 +152,68 @@ class MsSqlServerIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JD "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))") assert(df3.collect().length == 3) } + + test("SPARK-50087: SqlServer handle booleans in CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN name = 'Legolas' THEN name = 'Elf' ELSE NOT (name = 'Wizard') END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE IIF(("name" <> 'Wizard'), 1, 0) END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle booleans in CASE WHEN with always true test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN (name = 'Elf') ELSE (1=1) END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF(("name" = 'Elf'), 1, 0) ELSE 1 END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle booleans in nested CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN + | CASE WHEN (name = 'Elf') THEN (name = 'Elrond') ELSE (name = 'Gandalf') END + | ELSE (name = 'Sauron') END + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE (CASE WHEN ("name" = 'Legolas') THEN IIF((CASE WHEN ("name" = 'Elf') THEN IIF(("name" = 'Elrond'), 1, 0) ELSE IIF(("name" = 'Gandalf'), 1, 0) END = 1), 1, 0) ELSE IIF(("name" = 'Sauron'), 1, 0) END = 1) """ + ) + // scalastyle:on + df.collect() + } + + test("SPARK-50087: SqlServer handle non-booleans in nested CASE WHEN test") { + val df = sql( + s"""|SELECT * FROM $catalogName.employee + |WHERE CASE WHEN (name = 'Legolas') THEN + | CASE WHEN (name = 'Elf') THEN 'Elf' ELSE 'Wizard' END + | ELSE 'Sauron' END = name + |""".stripMargin + ) + + // scalastyle:off + assert(getExternalEngineQuery(df.queryExecution.executedPlan) == + """SELECT "dept","name","salary","bonus" FROM "employee" WHERE ("name" IS NOT NULL) AND ((CASE WHEN "name" = 'Legolas' THEN CASE WHEN "name" = 'Elf' THEN 'Elf' ELSE 'Wizard' END ELSE 'Sauron' END) = "name") """ + ) + // scalastyle:on + df.collect() + } } diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala index 7fef3ccd6b3f6..b0edac3fcdd1f 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/PostgresIntegrationSuite.scala @@ -22,6 +22,7 @@ import java.sql.Connection import org.apache.spark.SparkConf import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.analysis.TableAlreadyExistsException +import org.apache.spark.sql.execution.FilterExec import org.apache.spark.sql.execution.datasources.v2.jdbc.JDBCTableCatalog import org.apache.spark.sql.jdbc.DatabaseOnDocker import org.apache.spark.sql.types._ @@ -123,4 +124,13 @@ class PostgresIntegrationSuite extends DockerJDBCIntegrationV2Suite with V2JDBCT ) } } + + test("SPARK-49695: Postgres fix xor push-down") { + val df = spark.sql(s"select dept, name from $catalogName.employee where dept ^ 6 = 0") + val rows = df.collect() + assert(!df.queryExecution.sparkPlan.exists(_.isInstanceOf[FilterExec])) + assert(rows.length == 1) + assert(rows(0).getInt(0) === 6) + assert(rows(0).getString(1) === "jen") + } } diff --git a/connector/kafka-0-10-assembly/pom.xml b/connector/kafka-0-10-assembly/pom.xml index f85820ba1454e..8b6d7d47b0392 100644 --- a/connector/kafka-0-10-assembly/pom.xml +++ b/connector/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-sql/pom.xml b/connector/kafka-0-10-sql/pom.xml index 4a0d1af7968e7..dce1990f1c9d0 100644 --- a/connector/kafka-0-10-sql/pom.xml +++ b/connector/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10-token-provider/pom.xml b/connector/kafka-0-10-token-provider/pom.xml index b7aff2d217bc3..5973b9595db8a 100644 --- a/connector/kafka-0-10-token-provider/pom.xml +++ b/connector/kafka-0-10-token-provider/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kafka-0-10/pom.xml b/connector/kafka-0-10/pom.xml index a1ebf137e324d..54ba2b22093d0 100644 --- a/connector/kafka-0-10/pom.xml +++ b/connector/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl-assembly/pom.xml b/connector/kinesis-asl-assembly/pom.xml index 5aa937d481582..5e0c0fcafc12b 100644 --- a/connector/kinesis-asl-assembly/pom.xml +++ b/connector/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl/pom.xml b/connector/kinesis-asl/pom.xml index ab48e0eeae2c6..65b0fa33db29f 100644 --- a/connector/kinesis-asl/pom.xml +++ b/connector/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala index 7d12af3256f1f..d388b480e065d 100644 --- a/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala +++ b/connector/kinesis-asl/src/main/scala/org/apache/spark/examples/streaming/KinesisWordCountASL.scala @@ -275,7 +275,7 @@ private[streaming] object StreamingExamples extends Logging { // We first log something to initialize Spark's default logging, then we override the // logging level. logInfo("Setting log level to [WARN] for streaming example." + - " To override add a custom log4j.properties to the classpath.") + " To override add a custom log4j2.properties to the classpath.") Configurator.setRootLevel(Level.WARN) } } diff --git a/connector/protobuf/pom.xml b/connector/protobuf/pom.xml index 9fdaaa925a759..95be9ab74f105 100644 --- a/connector/protobuf/pom.xml +++ b/connector/protobuf/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/connector/spark-ganglia-lgpl/pom.xml b/connector/spark-ganglia-lgpl/pom.xml index e52b828bef975..62f1c4ab2b124 100644 --- a/connector/spark-ganglia-lgpl/pom.xml +++ b/connector/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 17004f875869c..e59066e19850d 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/core/src/main/resources/org/apache/spark/ui/static/webui.css b/core/src/main/resources/org/apache/spark/ui/static/webui.css index f952f86503e30..58c5add2d2400 100755 --- a/core/src/main/resources/org/apache/spark/ui/static/webui.css +++ b/core/src/main/resources/org/apache/spark/ui/static/webui.css @@ -355,6 +355,10 @@ a.expandbutton { width: 170px; } +.shuffle-write-time-checkbox-div { + width: 155px; +} + .result-serialization-time-checkbox-div { width: 185px; } diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 0f0d8b6c07c0a..43c95bed5c685 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -104,8 +104,13 @@ private[spark] class SparkSubmit extends Logging { */ private def kill(args: SparkSubmitArguments): Unit = { if (RestSubmissionClient.supportsRestClient(args.master)) { - new RestSubmissionClient(args.master) + val response = new RestSubmissionClient(args.master) .killSubmission(args.submissionToKill) + if (response.success) { + logInfo(s"${args.submissionToKill} is killed successfully.") + } else { + logError(response.message) + } } else { val sparkConf = args.toSparkConf() sparkConf.set("spark.master", args.master) diff --git a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala index c060ef9da8c10..12413698d2832 100644 --- a/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala +++ b/core/src/main/scala/org/apache/spark/deploy/rest/StandaloneRestServer.scala @@ -21,7 +21,7 @@ import java.io.File import javax.servlet.http.HttpServletResponse import org.apache.spark.{SPARK_VERSION => sparkVersion, SparkConf} -import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription} +import org.apache.spark.deploy.{Command, DeployMessages, DriverDescription, SparkSubmit} import org.apache.spark.deploy.ClientArguments._ import org.apache.spark.internal.config import org.apache.spark.launcher.SparkLauncher @@ -168,9 +168,16 @@ private[rest] class StandaloneSubmitRequestServlet( val extraJavaOpts = driverExtraJavaOptions.map(Utils.splitCommandString).getOrElse(Seq.empty) val sparkJavaOpts = Utils.sparkJavaOpts(conf) val javaOpts = sparkJavaOpts ++ defaultJavaOpts ++ extraJavaOpts + val sparkSubmitOpts = if (mainClass.equals(classOf[SparkSubmit].getName)) { + sparkProperties.get("spark.app.name") + .map { v => Seq("-c", s"spark.app.name=$v") } + .getOrElse(Seq.empty[String]) + } else { + Seq.empty[String] + } val command = new Command( "org.apache.spark.deploy.worker.DriverWrapper", - Seq("{{WORKER_URL}}", "{{USER_JAR}}", mainClass) ++ appArgs, // args to the DriverWrapper + Seq("{{WORKER_URL}}", "{{USER_JAR}}", mainClass) ++ sparkSubmitOpts ++ appArgs, environmentVariables, extraClassPath, extraLibraryPath, javaOpts) val actualDriverMemory = driverMemory.map(Utils.memoryStringToMb).getOrElse(DEFAULT_MEMORY) val actualDriverCores = driverCores.map(_.toInt).getOrElse(DEFAULT_CORES) diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index 537522326fc78..fe90895cacb53 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -474,6 +474,27 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging { } driverConf.set(EXECUTOR_ID, arguments.executorId) + // Set executor memory related config here according to resource profile + if (cfg.resourceProfile.id != ResourceProfile.DEFAULT_RESOURCE_PROFILE_ID) { + cfg.resourceProfile + .executorResources + .foreach { + case (ResourceProfile.OFFHEAP_MEM, request) => + driverConf.set(MEMORY_OFFHEAP_SIZE.key, request.amount.toString + "m") + logInfo(s"Set executor off-heap memory to $request") + case (ResourceProfile.MEMORY, request) => + driverConf.set(EXECUTOR_MEMORY.key, request.amount.toString + "m") + logInfo(s"Set executor memory to $request") + case (ResourceProfile.OVERHEAD_MEM, request) => + // Maybe don't need to set this since it's nearly used by tasks. + driverConf.set(EXECUTOR_MEMORY_OVERHEAD.key, request.amount.toString + "m") + logInfo(s"Set executor memory_overhead to $request") + case (ResourceProfile.CORES, request) => + driverConf.set(EXECUTOR_CORES.key, request.amount.toString) + logInfo(s"Set executor cores to $request") + case _ => + } + } val env = SparkEnv.createExecutorEnv(driverConf, arguments.executorId, arguments.bindAddress, arguments.hostname, arguments.cores, cfg.ioEncryptionKey, isLocal = false) // Set the application attemptId in the BlockStoreClient if available. diff --git a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala index cad107256c58c..8aa7d54fd61b9 100644 --- a/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/HadoopRDD.scala @@ -25,11 +25,13 @@ import scala.collection.immutable.Map import scala.reflect.ClassTag import org.apache.hadoop.conf.{Configurable, Configuration} +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapred._ import org.apache.hadoop.mapred.lib.CombineFileSplit import org.apache.hadoop.mapreduce.TaskType import org.apache.hadoop.mapreduce.lib.input.FileInputFormat +import org.apache.hadoop.security.AccessControlException import org.apache.hadoop.util.ReflectionUtils import org.apache.spark._ @@ -293,6 +295,7 @@ class HadoopRDD[K, V]( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e) finished = true @@ -318,6 +321,7 @@ class HadoopRDD[K, V]( finished = true // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning(s"Skipped the rest content in the corrupted file: ${split.inputSplit}", e) finished = true diff --git a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala index 119fdae531f22..7fc93806998bf 100644 --- a/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/NewHadoopRDD.scala @@ -25,12 +25,14 @@ import scala.collection.JavaConverters.asScalaBufferConverter import scala.reflect.ClassTag import org.apache.hadoop.conf.{Configurable, Configuration} +import org.apache.hadoop.hdfs.BlockMissingException import org.apache.hadoop.io.Writable import org.apache.hadoop.io.compress.CompressionCodecFactory import org.apache.hadoop.mapred.JobConf import org.apache.hadoop.mapreduce._ import org.apache.hadoop.mapreduce.lib.input.{CombineFileSplit, FileInputFormat, FileSplit, InvalidInputException} import org.apache.hadoop.mapreduce.task.{JobContextImpl, TaskAttemptContextImpl} +import org.apache.hadoop.security.AccessControlException import org.apache.spark._ import org.apache.spark.annotation.DeveloperApi @@ -227,6 +229,7 @@ class NewHadoopRDD[K, V]( null // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning( s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}", @@ -255,6 +258,7 @@ class NewHadoopRDD[K, V]( finished = true // Throw FileNotFoundException even if `ignoreCorruptFiles` is true case e: FileNotFoundException if !ignoreMissingFiles => throw e + case e @ (_ : AccessControlException | _ : BlockMissingException) => throw e case e: IOException if ignoreCorruptFiles => logWarning( s"Skipped the rest content in the corrupted file: ${split.serializableHadoopSplit}", diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index f695b10202758..b63e5999127d4 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -1812,8 +1812,9 @@ abstract class RDD[T: ClassTag]( * Please read the linked SPIP and design docs to understand the limitations and future plans. * @return an [[RDDBarrier]] instance that provides actions within a barrier stage * @see [[org.apache.spark.BarrierTaskContext]] - * @see SPIP: Barrier Execution Mode - * @see Design Doc + * @see + * SPIP: Barrier Execution Mode + * @see Design Doc */ @Experimental @Since("2.4.0") diff --git a/dev/.rat-excludes b/dev/.rat-excludes index d8fcfdf39ab81..311b30973577d 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -138,9 +138,6 @@ ansible-for-test-node/* node_modules spark-events-broken/* SqlBaseLexer.tokens -# Spark Connect related files with custom licence -any.proto -empty.proto .*\.explain .*\.proto.bin LimitedInputStream.java diff --git a/dev/create-release/release-build.sh b/dev/create-release/release-build.sh index 99841916cf293..7728e84bf9066 100755 --- a/dev/create-release/release-build.sh +++ b/dev/create-release/release-build.sh @@ -505,7 +505,7 @@ if [[ "$1" == "publish-release" ]]; then file_short=$(echo $file | sed -e "s/\.\///") dest_url="$nexus_upload/org/apache/spark/$file_short" echo " Uploading $file_short" - curl -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url + curl --retry 3 -u $ASF_USERNAME:$ASF_PASSWORD --upload-file $file_short $dest_url done echo "Closing nexus staging repository" diff --git a/dev/deps/spark-deps-hadoop-3-hive-2.3 b/dev/deps/spark-deps-hadoop-3-hive-2.3 index a9d63c1ad0f99..06e6aa199021f 100644 --- a/dev/deps/spark-deps-hadoop-3-hive-2.3 +++ b/dev/deps/spark-deps-hadoop-3-hive-2.3 @@ -130,8 +130,8 @@ jersey-container-servlet/2.40//jersey-container-servlet-2.40.jar jersey-hk2/2.40//jersey-hk2-2.40.jar jersey-server/2.40//jersey-server-2.40.jar jettison/1.1//jettison-1.1.jar -jetty-util-ajax/9.4.54.v20240208//jetty-util-ajax-9.4.54.v20240208.jar -jetty-util/9.4.54.v20240208//jetty-util-9.4.54.v20240208.jar +jetty-util-ajax/9.4.56.v20240826//jetty-util-ajax-9.4.56.v20240826.jar +jetty-util/9.4.56.v20240826//jetty-util-9.4.56.v20240826.jar jline/2.14.6//jline-2.14.6.jar joda-time/2.12.5//joda-time-2.12.5.jar jodd-core/3.5.2//jodd-core-3.5.2.jar @@ -212,9 +212,9 @@ opencsv/2.3//opencsv-2.3.jar opentracing-api/0.33.0//opentracing-api-0.33.0.jar opentracing-noop/0.33.0//opentracing-noop-0.33.0.jar opentracing-util/0.33.0//opentracing-util-0.33.0.jar -orc-core/1.9.4/shaded-protobuf/orc-core-1.9.4-shaded-protobuf.jar -orc-mapreduce/1.9.4/shaded-protobuf/orc-mapreduce-1.9.4-shaded-protobuf.jar -orc-shims/1.9.4//orc-shims-1.9.4.jar +orc-core/1.9.5/shaded-protobuf/orc-core-1.9.5-shaded-protobuf.jar +orc-mapreduce/1.9.5/shaded-protobuf/orc-mapreduce-1.9.5-shaded-protobuf.jar +orc-shims/1.9.5//orc-shims-1.9.5.jar oro/2.0.8//oro-2.0.8.jar osgi-resource-locator/1.0.3//osgi-resource-locator-1.0.3.jar paranamer/2.8//paranamer-2.8.jar diff --git a/dev/requirements.txt b/dev/requirements.txt index 0749af75aa4be..e3c3cae59d05d 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -67,5 +67,5 @@ torchvision torcheval # DeepspeedTorchDistributor dependencies -deepspeed +deepspeed; sys_platform != 'darwin' diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index d29fc8726018d..5df59476007a0 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -113,6 +113,14 @@ def __hash__(self): ], ) +utils = Module( + name="utils", + dependencies=[tags], + source_file_regexes=[ + "common/utils/", + ], +) + kvstore = Module( name="kvstore", dependencies=[tags], @@ -126,7 +134,7 @@ def __hash__(self): network_common = Module( name="network-common", - dependencies=[tags], + dependencies=[tags, utils], source_file_regexes=[ "common/network-common/", ], @@ -148,7 +156,7 @@ def __hash__(self): unsafe = Module( name="unsafe", - dependencies=[tags], + dependencies=[tags, utils], source_file_regexes=[ "common/unsafe", ], @@ -179,7 +187,7 @@ def __hash__(self): core = Module( name="core", - dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher], + dependencies=[kvstore, network_common, network_shuffle, unsafe, launcher, utils], source_file_regexes=[ "core/", ], @@ -188,9 +196,17 @@ def __hash__(self): ], ) +api = Module( + name="api", + dependencies=[utils, unsafe], + source_file_regexes=[ + "sql/api/", + ], +) + catalyst = Module( name="catalyst", - dependencies=[tags, sketch, core], + dependencies=[tags, sketch, core, api], source_file_regexes=[ "sql/catalyst/", ], diff --git a/docs/_config.yml b/docs/_config.yml index 2dfc5322b2ffa..a207cc2d911b5 100644 --- a/docs/_config.yml +++ b/docs/_config.yml @@ -19,8 +19,8 @@ include: # These allow the documentation to be updated with newer releases # of Spark, Scala, and Mesos. -SPARK_VERSION: 3.5.3 -SPARK_VERSION_SHORT: 3.5.3 +SPARK_VERSION: 3.5.4 +SPARK_VERSION_SHORT: 3.5.4 SCALA_BINARY_VERSION: "2.12" SCALA_VERSION: "2.12.18" MESOS_VERSION: 1.0.0 @@ -40,7 +40,7 @@ DOCSEARCH_SCRIPT: | inputSelector: '#docsearch-input', enhancedSearchInput: true, algoliaOptions: { - 'facetFilters': ["version:3.5.3"] + 'facetFilters': ["version:3.5.4"] }, debug: false // Set debug to true if you want to inspect the dropdown }); diff --git a/docs/_data/menu-sql.yaml b/docs/_data/menu-sql.yaml index ff93f09a83ce1..99d7aded3f25a 100644 --- a/docs/_data/menu-sql.yaml +++ b/docs/_data/menu-sql.yaml @@ -87,6 +87,8 @@ url: sql-ref-functions.html - text: Identifiers url: sql-ref-identifier.html + - text: IDENTIFIER clause + url: sql-ref-identifier-clause.html - text: Literals url: sql-ref-literals.html - text: Null Semantics diff --git a/docs/configuration.md b/docs/configuration.md index abd934572bd00..302348aa0f0eb 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -1959,7 +1959,7 @@ Apart from these, the following properties are also available, and may be useful spark.storage.replication.proactive - false + true Enables proactive block replication for RDD blocks. Cached RDD block replicas lost due to executor failures are replenished if there are any existing available replicas. This tries diff --git a/docs/core-migration-guide.md b/docs/core-migration-guide.md index 36465cc3f4e86..9381b28c8b078 100644 --- a/docs/core-migration-guide.md +++ b/docs/core-migration-guide.md @@ -22,6 +22,10 @@ license: | * Table of contents {:toc} +## Upgrading from Core 3.5.3 to 3.5.4 + +- Since Spark 3.5.4, when reading files hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.files.ignoreCorruptFiles` is set to `true`. + ## Upgrading from Core 3.4 to 3.5 - Since Spark 3.5, `spark.yarn.executor.failuresValidityInterval` is deprecated. Use `spark.executor.failuresValidityInterval` instead. diff --git a/docs/running-on-yarn.md b/docs/running-on-yarn.md index ce7121b806cb0..d0e725f6a98f4 100644 --- a/docs/running-on-yarn.md +++ b/docs/running-on-yarn.md @@ -121,15 +121,15 @@ all environment variables used for launching each container. This process is use classpath problems in particular. (Note that enabling this requires admin privileges on cluster settings and a restart of all node managers. Thus, this is not applicable to hosted clusters). -To use a custom log4j configuration for the application master or executors, here are the options: +To use a custom log4j2 configuration for the application master or executors, here are the options: -- upload a custom `log4j.properties` using `spark-submit`, by adding it to the `--files` list of files +- upload a custom `log4j2.properties` using `spark-submit`, by adding it to the `--files` list of files to be uploaded with the application. -- add `-Dlog4j.configuration=` to `spark.driver.extraJavaOptions` +- add `-Dlog4j.configurationFile=` to `spark.driver.extraJavaOptions` (for the driver) or `spark.executor.extraJavaOptions` (for executors). Note that if using a file, the `file:` protocol should be explicitly provided, and the file needs to exist locally on all the nodes. -- update the `$SPARK_CONF_DIR/log4j.properties` file and it will be automatically uploaded along +- update the `$SPARK_CONF_DIR/log4j2.properties` file and it will be automatically uploaded along with the other configurations. Note that other 2 options has higher priority than this option if multiple options are specified. diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md index 964f7de637e8b..be4e92ec4df43 100644 --- a/docs/sql-migration-guide.md +++ b/docs/sql-migration-guide.md @@ -22,6 +22,10 @@ license: | * Table of contents {:toc} +## Upgrading from Spark SQL 3.5.3 to 3.5.4 + +- Since Spark 3.5.4, when reading SQL tables hits `org.apache.hadoop.security.AccessControlException` and `org.apache.hadoop.hdfs.BlockMissingException`, the exception will be thrown and fail the task, even if `spark.sql.files.ignoreCorruptFiles` is set to `true`. + ## Upgrading from Spark SQL 3.5.1 to 3.5.2 - Since 3.5.2, MySQL JDBC datasource will read TINYINT UNSIGNED as ShortType, while in 3.5.1, it was wrongly read as ByteType. @@ -595,142 +599,111 @@ license: | - Since Spark 2.3, the Join/Filter's deterministic predicates that are after the first non-deterministic predicates are also pushed down/through the child operators, if possible. In prior Spark versions, these filters are not eligible for predicate pushdown. - Partition column inference previously found incorrect common type for different inferred types, for example, previously it ended up with double type as the common type for double type and date type. Now it finds the correct common type for such conflicts. The conflict resolution follows the table below: + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
- InputA \ InputB - - NullType - - IntegerType - - LongType - - DecimalType(38,0)* - - DoubleType - - DateType - - TimestampType - - StringType -
- NullType - NullTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeDateTypeTimestampTypeStringType
- IntegerType - IntegerTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeStringTypeStringTypeStringType
- LongType - LongTypeLongTypeLongTypeDecimalType(38,0)StringTypeStringTypeStringTypeStringType
- DecimalType(38,0)* - DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)StringTypeStringTypeStringTypeStringType
- DoubleType - DoubleTypeDoubleTypeStringTypeStringTypeDoubleTypeStringTypeStringTypeStringType
- DateType - DateTypeStringTypeStringTypeStringTypeStringTypeDateTypeTimestampTypeStringType
- TimestampType - TimestampTypeStringTypeStringTypeStringTypeStringTypeTimestampTypeTimestampTypeStringType
- StringType - StringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringType
InputA \ InputBNullTypeIntegerTypeLongTypeDecimalType(38,0)*DoubleTypeDateTypeTimestampTypeStringType
NullTypeNullTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeDateTypeTimestampTypeStringType
IntegerTypeIntegerTypeIntegerTypeLongTypeDecimalType(38,0)DoubleTypeStringTypeStringTypeStringType
LongTypeLongTypeLongTypeLongTypeDecimalType(38,0)StringTypeStringTypeStringTypeStringType
DecimalType(38,0)*DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)DecimalType(38,0)StringTypeStringTypeStringTypeStringType
DoubleTypeDoubleTypeDoubleTypeStringTypeStringTypeDoubleTypeStringTypeStringTypeStringType
DateTypeDateTypeStringTypeStringTypeStringTypeStringTypeDateTypeTimestampTypeStringType
TimestampTypeTimestampTypeStringTypeStringTypeStringTypeStringTypeTimestampTypeTimestampTypeStringType
StringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringTypeStringType
Note that, for DecimalType(38,0)*, the table above intentionally does not cover all other combinations of scales and precisions because currently we only infer decimal type like `BigInteger`/`BigInt`. For example, 1.1 is inferred as double type. diff --git a/examples/pom.xml b/examples/pom.xml index 74f7a8562cbb0..5efc255218570 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala index 20c5eb1700155..9289b005e3ba4 100644 --- a/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala +++ b/examples/src/main/scala/org/apache/spark/examples/streaming/StreamingExamples.scala @@ -31,7 +31,7 @@ object StreamingExamples extends Logging { // We first log something to initialize Spark's default logging, then we override the // logging level. logInfo("Setting log level to [WARN] for streaming example." + - " To override add a custom log4j.properties to the classpath.") + " To override add a custom log4j2.properties to the classpath.") Configurator.setRootLevel(Level.WARN) } } diff --git a/graphx/pom.xml b/graphx/pom.xml index 8a562f6ab0a14..74ad5a732f054 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/hadoop-cloud/pom.xml b/hadoop-cloud/pom.xml index 72fb8e9ae9bef..cb89c27d5f0ba 100644 --- a/hadoop-cloud/pom.xml +++ b/hadoop-cloud/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index d548e91536d93..e36d57fe6a573 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 8733e01ac89b9..26e9a1502abab 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 395f00c4c42f4..079ce72d5d959 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 ../pom.xml diff --git a/pom.xml b/pom.xml index ec77fcc9455d3..8dc47f391f967 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.12 - 3.5.3 + 3.5.4 pom Spark Project Parent POM https://spark.apache.org/ @@ -141,9 +141,9 @@ 10.14.2.0 1.13.1 - 1.9.4 + 1.9.5 shaded-protobuf - 9.4.54.v20240208 + 9.4.56.v20240826 4.0.3 0.10.0