'Scala error - Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z

I have a requirement where i am reading data from a CSV file and writing data to a Delta table over scala on window OS. My scala code is given below:-

import com.typesafe.config.ConfigFactory
import com.typesafe.scalalogging.Logger
import org.apache.spark.sql.SparkSession

object DelataLake_Pipeline {
  def main(args: Array[String]): Unit = {
    val masterConfig = ConfigFactory.load()
    val sourceConfig = masterConfig.getConfig("source-db")
    val logger = Logger("Pipeline")
    println("Printing Step 1")

    val spark = SparkSession
      .builder
      .appName("GPipe")
      .master("local")
      .getOrCreate()

    /**************suppress Logs******************/
    org.slf4j.LoggerFactory.getLogger(org.slf4j.Logger.ROOT_LOGGER_NAME)
      .asInstanceOf[ch.qos.logback.classic.Logger]
      .setLevel(ch.qos.logback.classic.Level.WARN)

    /*Read data from source file*/
    val sourceTableDF = spark.read.options(Map("inferSchema"->"false","delimiter"->",","header"->"true")).csv(sourceConfig.getString("sourcefile"))

    println("Printing source CSV data")
    sourceTableDF.show()

    /* ***************Create Delta Table*************************/
    println("Start creating delta-table ")

    try {
      sourceTableDF.write.format("delta").mode("overwrite").save("datable")
    } catch {
      case _: Throwable => // Catching all exceptions and not doing anything with them
    }
    println("End creating delta-table")

    /* ***************Read Delta table into a dataframe ********** */
      val df_dd = spark.read.format("delta").load("datable")
      df_dd.show()


    println("printing delta table data")
    spark.stop()
  }
}

When i execute above code then i am getting following exception :-

Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)Z
    at org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Native Method)
    at org.apache.hadoop.io.nativeio.NativeIO$Windows.access(NativeIO.java:793)
    at org.apache.hadoop.fs.FileUtil.canRead(FileUtil.java:1215)
    at org.apache.hadoop.fs.FileUtil.list(FileUtil.java:1420)
    at org.apache.hadoop.fs.RawLocalFileSystem.listStatus(RawLocalFileSystem.java:601)
    at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:1972)
    at org.apache.hadoop.fs.FileSystem.listStatus(FileSystem.java:2014)
    at org.apache.hadoop.fs.ChecksumFileSystem.listStatus(ChecksumFileSystem.java:761)
    at org.apache.spark.sql.delta.storage.HadoopFileSystemLogStore.listFrom(HadoopFileSystemLogStore.scala:83)
    at org.apache.spark.sql.delta.storage.DelegatingLogStore.listFrom(DelegatingLogStore.scala:119)
    at org.apache.spark.sql.delta.SnapshotManagement.listFrom(SnapshotManagement.scala:62)
    at org.apache.spark.sql.delta.SnapshotManagement.listFrom$(SnapshotManagement.scala:61)
    at org.apache.spark.sql.delta.DeltaLog.listFrom(DeltaLog.scala:62)
    at org.apache.spark.sql.delta.SnapshotManagement.getLogSegmentForVersion(SnapshotManagement.scala:95)
    at org.apache.spark.sql.delta.SnapshotManagement.getLogSegmentForVersion$(SnapshotManagement.scala:89)
    at org.apache.spark.sql.delta.DeltaLog.getLogSegmentForVersion(DeltaLog.scala:62)
    at org.apache.spark.sql.delta.SnapshotManagement.$anonfun$updateInternal$1(SnapshotManagement.scala:284)
    at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:77)
    at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:67)
    at org.apache.spark.sql.delta.DeltaLog.recordOperation(DeltaLog.scala:62)
    at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:112)
    at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:97)
    at org.apache.spark.sql.delta.DeltaLog.recordDeltaOperation(DeltaLog.scala:62)
    at org.apache.spark.sql.delta.SnapshotManagement.updateInternal(SnapshotManagement.scala:282)
    at org.apache.spark.sql.delta.SnapshotManagement.updateInternal$(SnapshotManagement.scala:281)
    at org.apache.spark.sql.delta.DeltaLog.updateInternal(DeltaLog.scala:62)
    at org.apache.spark.sql.delta.SnapshotManagement.$anonfun$update$1(SnapshotManagement.scala:243)
    at org.apache.spark.sql.delta.DeltaLog.lockInterruptibly(DeltaLog.scala:163)
    at org.apache.spark.sql.delta.SnapshotManagement.update(SnapshotManagement.scala:243)
    at org.apache.spark.sql.delta.SnapshotManagement.update$(SnapshotManagement.scala:239)
    at org.apache.spark.sql.delta.DeltaLog.update(DeltaLog.scala:62)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommit(OptimisticTransaction.scala:749)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommit$(OptimisticTransaction.scala:715)
    at org.apache.spark.sql.delta.OptimisticTransaction.doCommit(OptimisticTransaction.scala:86)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$doCommitRetryIteratively$2(OptimisticTransaction.scala:684)
    at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:77)
    at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:67)
    at org.apache.spark.sql.delta.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:86)
    at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:112)
    at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:97)
    at org.apache.spark.sql.delta.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:86)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$doCommitRetryIteratively$1(OptimisticTransaction.scala:680)
    at org.apache.spark.sql.delta.DeltaLog.lockInterruptibly(DeltaLog.scala:163)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.lockCommitIfEnabled(OptimisticTransaction.scala:659)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommitRetryIteratively(OptimisticTransaction.scala:674)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.doCommitRetryIteratively$(OptimisticTransaction.scala:671)
    at org.apache.spark.sql.delta.OptimisticTransaction.doCommitRetryIteratively(OptimisticTransaction.scala:86)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.liftedTree1$1(OptimisticTransaction.scala:522)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.$anonfun$commit$1(OptimisticTransaction.scala:462)
    at scala.runtime.java8.JFunction0$mcJ$sp.apply(JFunction0$mcJ$sp.scala:17)
    at com.databricks.spark.util.DatabricksLogging.recordOperation(DatabricksLogging.scala:77)
    at com.databricks.spark.util.DatabricksLogging.recordOperation$(DatabricksLogging.scala:67)
    at org.apache.spark.sql.delta.OptimisticTransaction.recordOperation(OptimisticTransaction.scala:86)
    at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation(DeltaLogging.scala:112)
    at org.apache.spark.sql.delta.metering.DeltaLogging.recordDeltaOperation$(DeltaLogging.scala:97)
    at org.apache.spark.sql.delta.OptimisticTransaction.recordDeltaOperation(OptimisticTransaction.scala:86)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.commit(OptimisticTransaction.scala:459)
    at org.apache.spark.sql.delta.OptimisticTransactionImpl.commit$(OptimisticTransaction.scala:457)
    at org.apache.spark.sql.delta.OptimisticTransaction.commit(OptimisticTransaction.scala:86)
    at org.apache.spark.sql.delta.commands.WriteIntoDelta.$anonfun$run$1(WriteIntoDelta.scala:83)
    at org.apache.spark.sql.delta.commands.WriteIntoDelta.$anonfun$run$1$adapted(WriteIntoDelta.scala:78)
    at org.apache.spark.sql.delta.DeltaLog.withNewTransaction(DeltaLog.scala:198)
    at org.apache.spark.sql.delta.commands.WriteIntoDelta.run(WriteIntoDelta.scala:78)
    at org.apache.spark.sql.delta.sources.DeltaDataSource.createRelation(DeltaDataSource.scala:154)
    at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:45)
    at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:75)
    at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:73)
    at org.apache.spark.sql.execution.command.ExecutedCommandExec.executeCollect(commands.scala:84)
    at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:110)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
    at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
    at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
    at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
    at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
    at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:110)
    at org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:106)
    at org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:481)
    at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:82)
    at org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:481)
    at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:30)
    at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267)
    at org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263)
    at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
    at org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:30)
    at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:457)
    at org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:106)
    at org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:93)
    at org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:91)
    at org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:128)
    at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:848)
    at org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:382)
    at org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:303)
    at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239)
    at DelataLake_Pipeline$.writeData(DelataLake_Pipeline.scala:217)
    at DelataLake_Pipeline$.main(DelataLake_Pipeline.scala:189)
    at DelataLake_Pipeline.main(DelataLake_Pipeline.scala)

I have properly configured enviroment variables and my Hadoop_Home folder has hadoop.dll winutils.exe files under BIN folder.

Looking forward for your input how to resolve this issue, because of this exception i cannot read data from Delta table and apply transformations.



Solution 1:[1]

I had the same problem running scalatest using spark 3.2.1. I solved it updating wintuils to Hadoop 3.3.1 from this repo

https://github.com/kontext-tech/winutils

Cheers

Solution 2:[2]

This is what worked for me: Download the latest winutils https://github.com/kontext-tech/winutils Or check your spark Release text, it shows the cer of Hadoop it is using

Steps

  1. Dowload repo
  2. Create a folder named hadoop anywhere (e.g. desktop/hadoop)
  3. Paste the bin into that folder (you will then have hadoop/bin)
  4. copy hadoop.dll to windows/system32
  5. Set system environment: set HADOOP_HOME=c:/desktop/hadoop set PATH=%PATH%;%HADOOP_HOME%/bin;

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Rafa Martinez de Castilla Diez
Solution 2 Ardan