如何修复 Py4JJavaError:调用collectToPython时发生错误

Jos*_*eva 5 py4j kubernetes pyspark apache-zeppelin

我正在尝试在使用 Kubernetes 部署的 zeppelin 笔记本上使用 pyspark 解释器。我已将 Spark 配置为使用 Spark 执行器(5 个核心,1G 存储)。但是,当我尝试运行 pandas/seaborn 并操作 pandas dataframe 时,出现以下错误:

Traceback (most recent call last):
  File "/tmp/zeppelin_pyspark-6458200865742049511.py", line 367, in <module>
    raise Exception(traceback.format_exc())
Exception: Traceback (most recent call last):
  File "/tmp/zeppelin_pyspark-6458200865742049511.py", line 355, in <module>
    exec(code, _zcUserQueryNameSpace)
  File "<stdin>", line 2, in <module>
  File "/opt/spark/python/pyspark/sql/dataframe.py", line 1703, in toPandas
    return pd.DataFrame.from_records(self.collect(), columns=self.columns)
  File "/opt/spark/python/pyspark/sql/dataframe.py", line 438, in collect
    port = self._jdf.collectToPython()
  File "/opt/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__
    answer, self.gateway_client, self.target_id, self.name)
  File "/opt/spark/python/pyspark/sql/utils.py", line 63, in deco
    return f(*a, **kw)
  File "/opt/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
    format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o11395.collectToPython.:
org.apache.spark.SparkException: Job aborted due to stage failure: ResultStage 1395 (toPandas at <stdin>:2) has failed the maximum allowable number of times: 4.
Most recent failure reason: org.apache.spark.shuffle.FetchFailedException: Failure while fetching StreamChunkId{streamId=1165701532984, chunkIndex=0}: 
java.lang.RuntimeException: 

 Failed to open file: /tmp/spark-local/blockmgr-aa951820-47d3-404f-a97e-12d25f460aec/13/shuffle_311_0_0.index


at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.getSortBasedShuffleBlockData(ExternalShuffleBlockResolver.java:249)    at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.getBlockData(ExternalShuffleBlockResolver.java:174)    at org.apache.spark.network.shuffle.ExternalShuffleBlockHandler$1.next(ExternalShuffleBlockHandler.java:105)    at org.apache.spark.network.shuffle.ExternalShuffleBlockHandler$1.next(ExternalShuffleBlockHandler.java:95)     at org.apache.spark.network.server.OneForOneStreamManager.getChunk(OneForOneStreamManager.java:89)  at org.apache.spark.network.server.TransportRequestHandler.processFetchRequest(TransportRequestHandler.java:125)    at org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:103)     at org.apache.spark.network.server.TransportChannelHandler.channelRead(TransportChannelHandler.java:118)    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at org.apache.spark.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85)   at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)     at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)  at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:643)  at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566)    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480)     at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442)     at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131)     at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)    at java.lang.Thread.run(Thread.java:748) Caused by: java.util.concurrent.ExecutionException: java.io.FileNotFoundException: /tmp/spark-local/blockmgr-aa951820-47d3-404f-a97e-12d25f460aec/13/shuffle_311_0_0.index (No such file or directory)     at org.spark_project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306)    at org.spark_project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293)     at org.spark_project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116)  at org.spark_project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135)   at org.spark_project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410)     at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380)  at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)   at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)   at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000)   at org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)     at org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)     at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.getSortBasedShuffleBlockData(ExternalShuffleBlockResolver.java:240)    ... 34 more Caused by: java.io.FileNotFoundException: /tmp/spark-local/blockmgr-aa951820-47d3-404f-a97e-12d25f460aec/13/shuffle_311_0_0.index (No such file or directory)   at java.io.FileInputStream.open0(Native Method)     at java.io.FileInputStream.open(FileInputStream.java:195)   at java.io.FileInputStream.<init>(FileInputStream.java:138)     at org.apache.spark.network.shuffle.ShuffleIndexInformation.<init>(ShuffleIndexInformation.java:41)     at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver$1.load(ExternalShuffleBlockResolver.java:111)  at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver$1.load(ExternalShuffleBlockResolver.java:109)  at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)  at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)  ... 40 more     at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:442)    at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:418)     at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:59)  at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)   at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)   at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)   at org.apache.spark.util.CompletionIterator.hasNext(CompletionIterator.scala:32)    at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)   at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)   at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.agg_doAggregateWithKeys$(Unknown Source)  at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)   at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)  at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:395)     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)   at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)   at org.apache.spark.util.random.SamplingUtils$.reservoirSampleAndCount(SamplingUtils.scala:41)  at org.apache.spark.RangePartitioner$$anonfun$9.apply(Partitioner.scala:263)    at org.apache.spark.RangePartitioner$$anonfun$9.apply(Partitioner.scala:261)    at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$26.apply(RDD.scala:844)    at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsWithIndex$1$$anonfun$apply$26.apply(RDD.scala:844)    at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)     at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)  at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)     at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)   at org.apache.spark.scheduler.Task.run(Task.scala:108)  at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:335)    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)  at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)  at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.network.client.ChunkFetchFailureException: Failure while fetching StreamChunkId{streamId=1165701532984, chunkIndex=0}: java.lang.RuntimeException: Failed to open file: /tmp/spark-local/blockmgr-aa951820-47d3-404f-a97e-12d25f460aec/13/shuffle_311_0_0.index    at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.getSortBasedShuffleBlockData(ExternalShuffleBlockResolver.java:249)    at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.getBlockData(ExternalShuffleBlockResolver.java:174)    at org.apache.spark.network.shuffle.ExternalShuffleBlockHandler$1.next(ExternalShuffleBlockHandler.java:105)    at org.apache.spark.network.shuffle.ExternalShuffleBlockHandler$1.next(ExternalShuffleBlockHandler.java:95)     at org.apache.spark.network.server.OneForOneStreamManager.getChunk(OneForOneStreamManager.java:89)  at org.apache.spark.network.server.TransportRequestHandler.processFetchRequest(TransportRequestHandler.java:125)    at org.apache.spark.network.server.TransportRequestHandler.handle(TransportRequestHandler.java:103)     at org.apache.spark.network.server.TransportChannelHandler.channelRead(TransportChannelHandler.java:118)    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at org.apache.spark.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85)   at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)     at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)  at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:643)  at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566)    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480)     at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442)     at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131)     at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)    at java.lang.Thread.run(Thread.java:748) Caused by: java.util.concurrent.ExecutionException: java.io.FileNotFoundException: /tmp/spark-local/blockmgr-aa951820-47d3-404f-a97e-12d25f460aec/13/shuffle_311_0_0.index (No such file or directory)     at org.spark_project.guava.util.concurrent.AbstractFuture$Sync.getValue(AbstractFuture.java:306)    at org.spark_project.guava.util.concurrent.AbstractFuture$Sync.get(AbstractFuture.java:293)     at org.spark_project.guava.util.concurrent.AbstractFuture.get(AbstractFuture.java:116)  at org.spark_project.guava.util.concurrent.Uninterruptibles.getUninterruptibly(Uninterruptibles.java:135)   at org.spark_project.guava.cache.LocalCache$Segment.getAndRecordStats(LocalCache.java:2410)     at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2380)  at org.spark_project.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)   at org.spark_project.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)   at org.spark_project.guava.cache.LocalCache.get(LocalCache.java:4000)   at org.spark_project.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)     at org.spark_project.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)     at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.getSortBasedShuffleBlockData(ExternalShuffleBlockResolver.java:240)    ... 34 more Caused by: java.io.FileNotFoundException: /tmp/spark-local/blockmgr-aa951820-47d3-404f-a97e-12d25f460aec/13/shuffle_311_0_0.index (No such file or directory)   at java.io.FileInputStream.open0(Native Method)     at java.io.FileInputStream.open(FileInputStream.java:195)   at java.io.FileInputStream.<init>(FileInputStream.java:138)     at org.apache.spark.network.shuffle.ShuffleIndexInformation.<init>(ShuffleIndexInformation.java:41)     at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver$1.load(ExternalShuffleBlockResolver.java:111)  at org.apache.spark.network.shuffle.ExternalShuffleBlockResolver$1.load(ExternalShuffleBlockResolver.java:109)  at org.spark_project.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)  at org.spark_project.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)  ... 40 more     at org.apache.spark.network.client.TransportResponseHandler.handle(TransportResponseHandler.java:182)   at org.apache.spark.network.server.TransportChannelHandler.channelRead(TransportChannelHandler.java:120)    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:287)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.handler.codec.MessageToMessageDecoder.channelRead(MessageToMessageDecoder.java:102)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at org.apache.spark.network.util.TransportFrameDecoder.channelRead(TransportFrameDecoder.java:85)   at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:336)   at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1294)    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:357)     at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:343)     at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:911)     at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:131)  at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:643)  at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:566)    at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:480)     at io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:442)     at io.netty.util.concurrent.SingleThreadEventExecutor$2.run(SingleThreadEventExecutor.java:131)     at io.netty.util.concurrent.DefaultThreadFactory$DefaultRunnableDecorator.run(DefaultThreadFactory.java:144)    ... 1 more 
    at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1499)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1487)
    at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1486)
    at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
    at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
    at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1486)
    at org.apache.spark.scheduler.DAGScheduler.handleTaskCompletion(DAGScheduler.scala:1310)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1711)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1669)
    at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1658)
    at o