'How to copy/move all objects in Amazon S3 from one prefix to other using the AWS SDK for Node.js
How do I copy all objects from one prefix to other? I have tried all possible ways to copy all objects in one shot from one prefix to other, but the only way that seems to work is by looping over a list of objects and copying them one by one. This is really inefficient. If I have hundreds of files in a folder, will I have to make 100 calls?
var params = {
Bucket: bucket,
CopySource: bucket+'/'+oldDirName+'/filename.txt',
Key: newDirName+'/filename.txt',
};
s3.copyObject(params, function(err, data) {
if (err) {
callback.apply(this, [{
type: "error",
message: "Error while renaming Directory",
data: err
}]);
} else {
callback.apply(this, [{
type: "success",
message: "Directory renamed successfully",
data: data
}]);
}
});
Solution 1:[1]
You will need to make one AWS.S3.listObjects()
to list your objects with a specific prefix. But you are correct in that you will need to make one call for every object that you want to copy from one bucket/prefix to the same or another bucket/prefix.
You can also use a utility library like async to manage your requests.
var AWS = require('aws-sdk');
var async = require('async');
var bucketName = 'foo';
var oldPrefix = 'abc/';
var newPrefix = 'xyz/';
var s3 = new AWS.S3({params: {Bucket: bucketName}, region: 'us-west-2'});
var done = function(err, data) {
if (err) console.log(err);
else console.log(data);
};
s3.listObjects({Prefix: oldPrefix}, function(err, data) {
if (data.Contents.length) {
async.each(data.Contents, function(file, cb) {
var params = {
Bucket: bucketName,
CopySource: bucketName + '/' + file.Key,
Key: file.Key.replace(oldPrefix, newPrefix)
};
s3.copyObject(params, function(copyErr, copyData){
if (copyErr) {
console.log(copyErr);
}
else {
console.log('Copied: ', params.Key);
cb();
}
});
}, done);
}
});
Hope this helps!
Solution 2:[2]
Here is a code snippet that do it in the "async await" way:
const AWS = require('aws-sdk');
AWS.config.update({
credentials: new AWS.Credentials(....), // credential parameters
});
AWS.config.setPromisesDependency(require('bluebird'));
const s3 = new AWS.S3();
... ...
const bucketName = 'bucketName'; // example bucket
const folderToMove = 'folderToMove/'; // old folder name
const destinationFolder = 'destinationFolder/'; // new destination folder
try {
const listObjectsResponse = await s3.listObjects({
Bucket: bucketName,
Prefix: folderToMove,
Delimiter: '/',
}).promise();
const folderContentInfo = listObjectsResponse.Contents;
const folderPrefix = listObjectsResponse.Prefix;
await Promise.all(
folderContentInfo.map(async (fileInfo) => {
await s3.copyObject({
Bucket: bucketName,
CopySource: `${bucketName}/${fileInfo.Key}`, // old file Key
Key: `${destinationFolder}/${fileInfo.Key.replace(folderPrefix, '')}`, // new file Key
}).promise();
await s3.deleteObject({
Bucket: bucketName,
Key: fileInfo.Key,
}).promise();
})
);
} catch (err) {
console.error(err); // error handling
}
Solution 3:[3]
A small change to the code of Aditya Manohar that improves the error handling in the s3.copyObject function and will actually finish the "move" request by removing the source files after the copy requests have been executed:
const AWS = require('aws-sdk');
const async = require('async');
const bucketName = 'foo';
const oldPrefix = 'abc/';
const newPrefix = 'xyz/';
const s3 = new AWS.S3({
params: {
Bucket: bucketName
},
region: 'us-west-2'
});
// 1) List all the objects in the source "directory"
s3.listObjects({
Prefix: oldPrefix
}, function (err, data) {
if (data.Contents.length) {
// Build up the paramters for the delete statement
let paramsS3Delete = {
Bucket: bucketName,
Delete: {
Objects: []
}
};
// Expand the array with all the keys that we have found in the ListObjects function call, so that we can remove all the keys at once after we have copied all the keys
data.Contents.forEach(function (content) {
paramsS3Delete.Delete.Objects.push({
Key: content.Key
});
});
// 2) Copy all the source files to the destination
async.each(data.Contents, function (file, cb) {
var params = {
CopySource: bucketName + '/' + file.Key,
Key: file.Key.replace(oldPrefix, newPrefix)
};
s3.copyObject(params, function (copyErr, copyData) {
if (copyErr) {
console.log(err);
} else {
console.log('Copied: ', params.Key);
}
cb();
});
}, function (asyncError, asyncData) {
// All the requests for the file copy have finished
if (asyncError) {
return console.log(asyncError);
} else {
console.log(asyncData);
// 3) Now remove the source files - that way we effectively moved all the content
s3.deleteObjects(paramsS3Delete, (deleteError, deleteData) => {
if (deleteError) return console.log(deleteError);
return console.log(deleteData);
})
}
});
}
});
Note that I have moved the cb()
callback function outside the if-then-else loop. That way even when an error occurs the async module will fire the done()
function.
Solution 4:[4]
More update on the original code which copies folders recursively. Some limitations is that the code does not handle more than 1000 objects per Prefix and of course the depth limitation if your folders are very deep.
import AWS from 'aws-sdk';
AWS.config.update({ region: 'ap-southeast-1' });
/**
* Copy s3 folder
* @param {string} bucket Params for the first argument
* @param {string} source for the 2nd argument
* @param {string} dest for the 2nd argument
* @returns {promise} the get object promise
*/
export default async function s3CopyFolder(bucket, source, dest) {
// sanity check: source and dest must end with '/'
if (!source.endsWith('/') || !dest.endsWith('/')) {
return Promise.reject(new Error('source or dest must ends with fwd slash'));
}
const s3 = new AWS.S3();
// plan, list through the source, if got continuation token, recursive
const listResponse = await s3.listObjectsV2({
Bucket: bucket,
Prefix: source,
Delimiter: '/',
}).promise();
// copy objects
await Promise.all(
listResponse.Contents.map(async (file) => {
await s3.copyObject({
Bucket: bucket,
CopySource: `${bucket}/${file.Key}`,
Key: `${dest}${file.Key.replace(listResponse.Prefix, '')}`,
}).promise();
}),
);
// recursive copy sub-folders
await Promise.all(
listResponse.CommonPrefixes.map(async (folder) => {
await s3CopyFolder(
bucket,
`${folder.Prefix}`,
`${dest}${folder.Prefix.replace(listResponse.Prefix, '')}`,
);
}),
);
return Promise.resolve('ok');
}
Solution 5:[5]
None of the above handle large directories, as the list-objects-v2
command returns no more than 1000 results at a time, providing a continuation token to access additional "pages".
Here is a solution using the modern, v3 sdk:
const copyAll = async ({
s3Client,
sourceBucket,
targetBucket = sourceBucket,
sourcePrefix,
targetPrefix,
concurrency = 1,
deleteSource = false,
}) => {
let ContinuationToken;
const copyFile = async (sourceKey) => {
const targetKey = sourceKey.replace(sourcePrefix, targetPrefix);
await s3Client.send(
new CopyObjectCommand({
Bucket: targetBucket,
Key: targetKey,
CopySource: `${sourceBucket}/${sourceKey}`,
}),
);
if (deleteSource) {
await s3Client.send(
new DeleteObjectCommand({
Bucket: sourceBucket,
Key: sourceKey,
}),
);
}
};
do {
const { Contents = [], NextContinuationToken } = await s3Client.send(
new ListObjectsV2Command({
Bucket: sourceBucket,
Prefix: sourcePrefix,
ContinuationToken,
}),
);
const sourceKeys = Contents.map(({ Key }) => Key);
await Promise.all(
new Array(concurrency).fill(null).map(async () => {
while (sourceKeys.length) {
await copyFile(sourceKeys.pop());
}
}),
);
ContinuationToken = NextContinuationToken;
} while (ContinuationToken);
};
If the Promise.all
part is unclear, it's just a poor man's "thread pool", allowing you to copy multiple files concurrently, which can dramatically speed things up. These don't use any bandwidth since the content is copied within AWS, so I had no issues with a value of 20 or more for concurrency
. For clarity, it's just parallelized version of:
const sourceKeys = Contents.map(({ Key }) => Key);
while (sourceKeys.length) {
await copyFile(sourceKeys.pop());
}
Solution 6:[6]
Here's what I use for moving multiple objects.
const asyncForEach = async (array, callback) => {
for (let i = 0; i < array.length; i++) {
await callback(array[i], i, array)
}
}
const awsMove = async ({ files }) => {
try {
const s3 = new aws.S3()
const AWS_BUCKET = 'bucket'
await asyncForEach(files, async file => {
const copyParams = {
Key: file.newPath,
ACL: 'public-read',
Bucket: AWS_BUCKET,
CopySource: encodeURI(`/${AWS_BUCKET}/${file.oldPath}`)
}
await s3.copyObject(copyParams).promise()
const deleteParams = {
Key: file.oldPath,
Bucket: AWS_BUCKET
}
await s3.deleteObject(deleteParams).promise()
})
} catch (err) {
console.log(err)
}
}
const files = [
{ oldPath: 'folder/file', newPath: 'folder-copy/file' },
{ oldPath: 'another-folder/file', newPath: 'another-folder-copy/file' }
]
await awsMove({ files })
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | 7hibault |
Solution 2 | |
Solution 3 | |
Solution 4 | erwinkarim |
Solution 5 | |
Solution 6 | ozgrozer |