Eventlog 示例
{
"Event": "org.apache.spark.sql.execution.ui.SparkListenerSQLExecutionStart",
"executionId": 0,
"rootExecutionId": 0,
"desc ription": "select round(a, 2), a from double_table",
"details": "org.apache.spark.sql.hive.thriftserver.SparkSQLCLIDriver.main(SparkSQLCLIDriver.scala)\nsun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)\nsun.reflect.NativeMetho dAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)\nsun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)\njava.lang.reflect.Method.invoke(Method.java:498)\norg.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)\norg.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain (SparkSubmit.scala:1029)\norg.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:194)\norg.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:217)\norg.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:91)\norg.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1120)\norg.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1129)\norg.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)",
"physicalPlanDescription": "== Physical Plan ==\n* Project (3)\n+- * ColumnarToRow (2)\n +- Scan parquet spark_catalog.default.double_table (1)\n \n\n(1) Scan parquet spark_catalog.default.double_table\nOutput [1]: [a#0]\nBatched: true\nLocation: InMemoryFileInde x [file:/home/hadoop/files/double_table]\nReadSchema: struct<a:double>\n\n(2) ColumnarToRow [codegen id : 1]\nInput [1]: [a#0]\n\n(3) Project [codegen id : 1]\nOutput [2]: [round(a#0, 2) AS round(a, 2)#1, a#0]\nInput [1]: [a#0]\n\n",
" sparkPlanInfo": {
"nodeName": "WholeStageCodegen (1)",
"simpleString": "WholeStageCodegen (1)",
"children": [
{
"nodeName": "Project",
"simpleString": "Project [round(a#0, 2) AS round(a, 2)#1, a#0]",
"children": [
{
"nodeName": "ColumnarToRow",
"simple String": "ColumnarToRow",
"children": [
{
"nodeName": "InputAdapter",
"simpleString": "InputAdapter",
"children": [
{
"nodeName": "Scan parquet spark_catalog.default.double_table",
"simpleString": "FileScan parquet spark_catalog.default.double_table [a#0] Batched: true, DataFilters: [], Format: Parquet, Location: InMemoryFileIndex(1 paths)[file:/home/hadoop/files/d ouble_table], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<a:double>",
"children": [
],
"metadata": {
"Locat ion": "InMemoryFileIndex(1 paths)[file:/home/hadoop/files/double_table]",
"ReadSchema": "struct<a:double>",
"Format": "Par quet",
"Batched": "true",
"PartitionFilters": "[]",
"PushedFilters": "[]",
"DataFilters": "[]"
},
"metrics": [
{
"name": "number of files read",
"accumulatorId": 5,
"metricType": "sum"
},
{
"name": "scan time",
"accumulatorId": 4,
"metricType": "timing"
},
{
"nam e": "metadata time",
"accumulatorId": 6,
"metricType": "timing"
},
{
"name": "size of files read",
"accumulatorId": 7,
"metricTyp e": "size"
},
{
"name": "number of output rows",
"accumulatorId": 3,
"metricType": "sum"
}
]
}
],
"metadata": {
},
"metrics": [
]
}
],
"met adata": {
},
"metrics": [
{
"name": "number of output rows",
"accumulatorId": 1,
"metricType": "sum"
},
{
"name": "number of input b atches",
"accumulatorId": 2,
"metricType": "sum"
}
]
}
],
"metadata": {
},
"metrics": [
]
}
],
"metadata": {
},
"metrics": [
{
"name": "durat ion",
"accumulatorId": 0,
"metricType": "timing"
}
]
},
"time": 1741661558528,
"modifiedConfigs": {
},
"jobTags": [
]
}
== Physical Plan ==
* Project (3)
+- * ColumnarToRow (2)
+- Scan parquet spark_catalog.default.double_table (1)
对应于
== Physical Plan ==\n* Project (3)\n+- * ColumnarToRow (2)\n +- Scan parquet spark_catalog.default.double_table (1)\n \n\n
(1) Scan parquet spark_catalog.default.double_table
Output [1]: [a#0]
Batched: true
Location: InMemoryFileIndex [file:/home/hadoop/files/double_table]
ReadSchema: struct<a:double>
(2) ColumnarToRow [codegen id : 1]
Input [1]: [a#0]
(3) Project [codegen id : 1]
Output [2]: [round(a#0, 2) AS round(a, 2)#1, a#0]
Input [1]: [a#0]
对应于
"physicalPlanDescription": "== Physical Plan ==\n* Project (3)\n+- * ColumnarToRow (2)\n +- Scan parquet spark_catalog.default.double_table (1)\n \n\n(1) Scan parquet spark_catalog.default.double_table\nOutput [1]: [a#0]\nBatched: true\nLocation: InMemoryFileIndex [file:/home/hadoop/files/double_table]\nReadSchema: struct<a:double>\n\n(2) ColumnarToRow [codegen id : 1]\nInput [1]: [a#0]\n\n(3) Project [codegen id : 1]\nOutput [2]: [round(a#0, 2) AS round(a, 2)#1, a#0]\nInput [1]: [a#0]\n\n",