'List All objects in S3 with given Prefix in scala

I am trying list all objects in AWS S3 Buckets with input Bucket Name & Filter Prefix using following code.

import scala.collection.JavaConverters._
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.ListObjectsV2Request

val bucket_name = "Mybucket"
val fiter_prefix = "Test/a/"

def list_objects(str: String): mutable.Buffer[String] = {
        val request : ListObjectsV2Request = new ListObjectsV2Request().withBucketName(bucket_name).withPrefix(str)
        var result: ListObjectsV2Result = new ListObjectsV2Result()
        do {
         result = s3_client.listObjectsV2(request)
         val token = result.getNextContinuationToken
         System.out.println("Next Continuation Token: " + token)
         request.setContinuationToken(token)
        }while(result.isTruncated)
        result.getObjectSummaries.asScala.map(_.getKey).size
}

list_objects(fiter_prefix)

I have applied continuation method but i am just getting last object list. for example is prefix has 2210 objects i am getting back 210 objects only.

Regards Mahi



Solution 1:[1]

This is the code which worked for me.

import scala.collection.JavaConverters._
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.ListObjectsV2Request

val bucket_name = "Mybucket"
val fiter_prefix = "Test/a/"

def list_objects(str: String): List[String] = {
        val s3_client = new AmazonS3Client
        var final_list: List[String] = List()
        var list: List[String] = List()
        val request: ListObjectsV2Request = new ListObjectsV2Request().withBucketName(bucket_name).withPrefix(str)
        var result: ListObjectsV2Result = new ListObjectsV2Result()
        do {
          result = s3_client.listObjectsV2(request)
          val token = result.getNextContinuationToken
          System.out.println("Next Continuation Token: " + token)
          request.setContinuationToken(token)
          list = (result.getObjectSummaries.asScala.map(_.getKey)).toList
          println(list.size)
          final_list = final_list ::: list
          println(final_list)
        } while (result.isTruncated)
        println("size", final_list.size)
        final_list
      }

list_objects(fiter_prefix)

Solution 2:[2]

listObjectsV2 returns some or all (up to 1,000) of the objects in a bucket as it is stated here. You need to use Continuation Token to iterate rest of the objects in the bucket.

There is an example code here for java.

Solution 3:[3]

A solution using vanilla Scala avoiding vars and tail recursion:

  import software.amazon.awssdk.regions.Region
  import software.amazon.awssdk.services.s3.S3Client
  import software.amazon.awssdk.services.s3.model.{ListObjectsV2Request, 
  ListObjectsV2Response}

  import scala.annotation.tailrec
  import scala.collection.JavaConverters.asScalaBufferConverter
  import scala.collection.mutable
  import scala.collection.mutable.ListBuffer

  val sourceBucket    = "yourbucket"
  val sourceKey       = "yourKey"
  val subFolderPrefix = "yourprefix"


  def getAllPaths(s3Client: S3Client, initReq: ListObjectsV2Request): List[String] = {
    @tailrec
    def listAllObjectsV2(
      s3Client: S3Client,
      req: ListObjectsV2Request,
      tokenOpt: Option[String],
      isFirstTime: Boolean,
      initList: ListBuffer[String]
    ): ListBuffer[String] = {
      println(s"IsFirstTime = ${isFirstTime}, continuationToken = ${tokenOpt}")
      (isFirstTime, tokenOpt) match {
        case (true, Some(x)) =>
          // this combo is not possible..
          initList
        case (false, None) =>
          // end
          initList
        case (_, _) =>
          // possible scenarios are :
          // true, None : First iteration
          // false, Some(x): Second iteration onwards
          val response =
            s3Client.listObjectsV2(tokenOpt.fold(req)(token => req.toBuilder.continuationToken(token).build()))
          val keys: Seq[String] = response.contents().asScala.toList.map(_.key())
          val nextTokenOpt      = Option(response.nextContinuationToken())
          listAllObjectsV2(s3Client, req, nextTokenOpt, isFirstTime = false, keys ++: initList)
      }
    }
    listAllObjectsV2(s3Client, initReq, None, true, mutable.ListBuffer.empty[String]).toList
  }
  val s3Client = S3Client.builder().region(Region.US_WEST_2).build()
  val request: ListObjectsV2Request =
      ListObjectsV2Request.builder
        .bucket(sourceBucket)
        .prefix(sourceKey + "/" + subFolderPrefix)
        .build

  val listofAllKeys: List[String] = getAllPaths(s3Client, request)

Sources

This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.

Source: Stack Overflow

Solution Source
Solution 1 Mahi
Solution 2
Solution 3