'Scala error - Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
I have a requirement where i am reading data from a CSV file and writing data to a Delta table over scala on window OS. My scala code is given below:-
import com.typesafe.config.ConfigFactory
import com.typesafe.scalalogging.Logger
import org.apache.spark.sql.SparkSession
object DelataLake_Pipeline {
def main(args: Array[String]): Unit = {
val masterConfig = ConfigFactory.load()
val sourceConfig = masterConfig.getConfig("source-db")
val logger = Logger("Pipeline")
println("Printing Step 1")
val spark = SparkSession
.builder
.appName("GPipe")
.master("local")
.getOrCreate()
/**************suppress Logs******************/
org.slf4j.LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME)
.asInstanceOf[ch.qos.logback.classic.Logger]
.setLevel(ch.qos.logback.classic.Level.WARN)
/*Read data from source file*/
val sourceTableDF = spark.read.options(Map("inferSchema"->"false","delimiter"->",","header"->"true")).csv(sourceConfig.getString("sourcefile"))
println("Printing source CSV data")
sourceTableDF.show()
/* ***************Create Delta Table*************************/
println("Start creating delta-table ")
try {
sourceTableDF.write.format("delta").mode("overwrite").save("datable")
} catch {
case _: Throwable => // Catching all exceptions and not doing anything with them
}
println("End creating delta-table")
/* ***************Read Delta table into a dataframe ********** */
val df_dd = spark.read.format("delta").load("datable")
df_dd.show()
println("printing delta table data")
spark.stop()
}
}
When i execute above code then i am getting following exception :-
Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:793)
at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1215)
at org.apache.hadoop.fs.FileUtil.list(FileUtil.java:1420)
at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:601)
at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:761)
at org.apache.spark.sql.delta.storage.HadoopFileSystemLogStore.listFrom(HadoopFileSystemLogStore.scala:83)
at org.apache.spark.sql.delta.storage.DelegatingLogStore.listFrom(DelegatingLogStore.scala:119)
at org.apache.spark.sql.delta.SnapshotManagement.listFrom(SnapshotManagement.scala:62)
at org.apache.spark.sql.delta.SnapshotManagement.listFrom$(SnapshotManagement.scala:61)
at org.apache.spark.sql.delta.DeltaLog.listFrom(DeltaLog.scala:62)
at org.apache.spark.sql.delta.SnapshotManagement.getLogSegmentForVersion(SnapshotManagement.scala:95)
at org.apache.spark.sql.delta.SnapshotManagement.getLogSegmentForVersion$(SnapshotManagement.scala:89)
at org.apache.spark.sql.delta.DeltaLog.getLogSegmentForVersion(DeltaLog.scala:62)
at org.apache.spark.sql.delta.SnapshotManagement.$anonfun$updateInternal$1(SnapshotManagement.scala:284)
at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:77)
at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:67)
at org.apache.spark.sql.delta.DeltaLog.recordOperation(DeltaLog.scala:62)
at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:112)
at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:97)
at org.apache.spark.sql.delta.DeltaLog.recordDeltaOperation(DeltaLog.scala:62)
at org.apache.spark.sql.delta.SnapshotManagement.updateInternal(SnapshotManagement.scala:282)
at org.apache.spark.sql.delta.SnapshotManagement.updateInternal$(SnapshotManagement.scala:281)
at org.apache.spark.sql.delta.DeltaLog.updateInternal(DeltaLog.scala:62)
at org.apache.spark.sql.delta.SnapshotManagement.$anonfun$update$1(SnapshotManagement.scala:243)
at org.apache.spark.sql.delta.DeltaLog.lockInterruptibly(DeltaLog.scala:163)
at org.apache.spark.sql.delta.SnapshotManagement.update(SnapshotManagement.scala:243)
at org.apache.spark.sql.delta.SnapshotManagement.update$(SnapshotManagement.scala:239)
at org.apache.spark.sql.delta.DeltaLog.update(DeltaLog.scala:62)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommit(OptimisticTransaction.scala:749)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommit$(OptimisticTransaction.scala:715)
at org.apache.spark.sql.delta.OptimisticTransaction.doCommit(OptimisticTransaction.scala:86)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$doCommitRetryIteratively$2(OptimisticTransaction.scala:684)
at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:77)
at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:67)
at org.apache.spark.sql.delta.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:86)
at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:112)
at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:97)
at org.apache.spark.sql.delta.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:86)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$doCommitRetryIteratively$1(OptimisticTransaction.scala:680)
at org.apache.spark.sql.delta.DeltaLog.lockInterruptibly(DeltaLog.scala:163)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.lockCommitIfEnabled(OptimisticTransaction.scala:659)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommitRetryIteratively(OptimisticTransaction.scala:674)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommitRetryIteratively$(OptimisticTransaction.scala:671)
at org.apache.spark.sql.delta.OptimisticTransaction.doCommitRetryIteratively(OptimisticTransaction.scala:86)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.liftedTree1$1(OptimisticTransaction.scala:522)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$commit$1(OptimisticTransaction.scala:462)
at scala.runtime.java8.JFunction0$mcJ$sp.apply(JFunction0$mcJ$sp.scala:17)
at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:77)
at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:67)
at org.apache.spark.sql.delta.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:86)
at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:112)
at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:97)
at org.apache.spark.sql.delta.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:86)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.commit(OptimisticTransaction.scala:459)
at org.apache.spark.sql.delta.OptimisticTransactionImpl.commit$(OptimisticTransaction.scala:457)
at org.apache.spark.sql.delta.OptimisticTransaction.commit(OptimisticTransaction.scala:86)
at org.apache.spark.sql.delta.commands.WriteIntoDelta.$anonfun$run$1(WriteIntoDelta.scala:83)
at org.apache.spark.sql.delta.commands.WriteIntoDelta.$anonfun$run$1$adapted(WriteIntoDelta.scala:78)
at org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:198)
at org.apache.spark.sql.delta.commands.WriteIntoDelta.run(WriteIntoDelta.scala:78)
at org.apache.spark.sql.delta.sources.DeltaDataSource.createRelation(DeltaDataSource.scala:154)
at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:303)
at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
at DelataLake_Pipeline$.writeData(DelataLake_Pipeline.scala:217)
at DelataLake_Pipeline$.main(DelataLake_Pipeline.scala:189)
at DelataLake_Pipeline.main(DelataLake_Pipeline.scala)
I have properly configured enviroment variables and my Hadoop_Home folder has hadoop.dll
winutils.exe
files under BIN folder.
Looking forward for your input how to resolve this issue, because of this exception i cannot read data from Delta table and apply transformations.
Solution 1:[1]
I had the same problem running scalatest using spark 3.2.1. I solved it updating wintuils to Hadoop 3.3.1 from this repo
https://github.com/kontext-tech/winutils
Cheers
Solution 2:[2]
This is what worked for me: Download the latest winutils https://github.com/kontext-tech/winutils Or check your spark Release text, it shows the cer of Hadoop it is using
Steps
- Dowload repo
- Create a folder named hadoop anywhere (e.g. desktop/hadoop)
- Paste the bin into that folder (you will then have hadoop/bin)
- copy hadoop.dll to windows/system32
- Set system environment: set HADOOP_HOME=c:/desktop/hadoop set PATH=%PATH%;%HADOOP_HOME%/bin;
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Rafa Martinez de Castilla Diez |
Solution 2 | Ardan |