'List All objects in S3 with given Prefix in scala
I am trying list all objects in AWS S3 Buckets with input Bucket Name & Filter Prefix using following code.
import scala.collection.JavaConverters._
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.ListObjectsV2Request
val bucket_name = "Mybucket"
val fiter_prefix = "Test/a/"
def list_objects(str: String): mutable.Buffer[String] = {
val request : ListObjectsV2Request = new ListObjectsV2Request().withBucketName(bucket_name).withPrefix(str)
var result: ListObjectsV2Result = new ListObjectsV2Result()
do {
result = s3_client.listObjectsV2(request)
val token = result.getNextContinuationToken
System.out.println("Next Continuation Token: " + token)
request.setContinuationToken(token)
}while(result.isTruncated)
result.getObjectSummaries.asScala.map(_.getKey).size
}
list_objects(fiter_prefix)
I have applied continuation method but i am just getting last object list. for example is prefix has 2210 objects i am getting back 210 objects only.
Regards Mahi
Solution 1:[1]
This is the code which worked for me.
import scala.collection.JavaConverters._
import com.amazonaws.services.s3.AmazonS3Client
import com.amazonaws.services.s3.model.ListObjectsV2Request
val bucket_name = "Mybucket"
val fiter_prefix = "Test/a/"
def list_objects(str: String): List[String] = {
val s3_client = new AmazonS3Client
var final_list: List[String] = List()
var list: List[String] = List()
val request: ListObjectsV2Request = new ListObjectsV2Request().withBucketName(bucket_name).withPrefix(str)
var result: ListObjectsV2Result = new ListObjectsV2Result()
do {
result = s3_client.listObjectsV2(request)
val token = result.getNextContinuationToken
System.out.println("Next Continuation Token: " + token)
request.setContinuationToken(token)
list = (result.getObjectSummaries.asScala.map(_.getKey)).toList
println(list.size)
final_list = final_list ::: list
println(final_list)
} while (result.isTruncated)
println("size", final_list.size)
final_list
}
list_objects(fiter_prefix)
Solution 2:[2]
listObjectsV2
returns some or all (up to 1,000
) of the objects in a bucket as it is stated here. You need to use Continuation Token
to iterate rest of the objects in the bucket.
There is an example code here for java.
Solution 3:[3]
A solution using vanilla Scala avoiding vars and tail recursion:
import software.amazon.awssdk.regions.Region
import software.amazon.awssdk.services.s3.S3Client
import software.amazon.awssdk.services.s3.model.{ListObjectsV2Request,
ListObjectsV2Response}
import scala.annotation.tailrec
import scala.collection.JavaConverters.asScalaBufferConverter
import scala.collection.mutable
import scala.collection.mutable.ListBuffer
val sourceBucket = "yourbucket"
val sourceKey = "yourKey"
val subFolderPrefix = "yourprefix"
def getAllPaths(s3Client: S3Client, initReq: ListObjectsV2Request): List[String] = {
@tailrec
def listAllObjectsV2(
s3Client: S3Client,
req: ListObjectsV2Request,
tokenOpt: Option[String],
isFirstTime: Boolean,
initList: ListBuffer[String]
): ListBuffer[String] = {
println(s"IsFirstTime = ${isFirstTime}, continuationToken = ${tokenOpt}")
(isFirstTime, tokenOpt) match {
case (true, Some(x)) =>
// this combo is not possible..
initList
case (false, None) =>
// end
initList
case (_, _) =>
// possible scenarios are :
// true, None : First iteration
// false, Some(x): Second iteration onwards
val response =
s3Client.listObjectsV2(tokenOpt.fold(req)(token => req.toBuilder.continuationToken(token).build()))
val keys: Seq[String] = response.contents().asScala.toList.map(_.key())
val nextTokenOpt = Option(response.nextContinuationToken())
listAllObjectsV2(s3Client, req, nextTokenOpt, isFirstTime = false, keys ++: initList)
}
}
listAllObjectsV2(s3Client, initReq, None, true, mutable.ListBuffer.empty[String]).toList
}
val s3Client = S3Client.builder().region(Region.US_WEST_2).build()
val request: ListObjectsV2Request =
ListObjectsV2Request.builder
.bucket(sourceBucket)
.prefix(sourceKey + "/" + subFolderPrefix)
.build
val listofAllKeys: List[String] = getAllPaths(s3Client, request)
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | Mahi |
Solution 2 | |
Solution 3 |