Mee*_*ohi 62
使用官方aws-sdk:
var allKeys = [];
function listAllKeys(marker, cb)
{
s3.listObjects({Bucket: s3bucket, Marker: marker}, function(err, data){
allKeys.push(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextMarker, cb);
else
cb();
});
}
Run Code Online (Sandbox Code Playgroud)
编辑2017:相同的基本想法,但listObjectsV2( ... )现在推荐使用a ContinuationToken(参见s3.listObjectsV2):
var allKeys = [];
function listAllKeys(token, cb)
{
var opts = { Bucket: s3bucket };
if(token) opts.ContinuationToken = token;
s3.listObjectsV2(opts, function(err, data){
allKeys = allKeys.concat(data.Contents);
if(data.IsTruncated)
listAllKeys(data.NextContinuationToken, cb);
else
cb();
});
}
Run Code Online (Sandbox Code Playgroud)
Ken*_*Lin 15
这是我编写的用于从截断列表中组装S3对象的Node代码.
var params = {
Bucket: <yourbucket>,
Prefix: <yourprefix>,
};
var s3DataContents = []; // Single array of all combined S3 data.Contents
function s3Print() {
if (program.al) {
// --al: Print all objects
console.log(JSON.stringify(s3DataContents, null, " "));
} else {
// --b: Print key only, otherwise also print index
var i;
for (i = 0; i < s3DataContents.length; i++) {
var head = !program.b ? (i+1) + ': ' : '';
console.log(head + s3DataContents[i].Key);
}
}
}
function s3ListObjects(params, cb) {
s3.listObjects(params, function(err, data) {
if (err) {
console.log("listS3Objects Error:", err);
} else {
var contents = data.Contents;
s3DataContents = s3DataContents.concat(contents);
if (data.IsTruncated) {
// Set Marker to last returned key
params.Marker = contents[contents.length-1].Key;
s3ListObjects(params, cb);
} else {
cb();
}
}
});
}
s3ListObjects(params, s3Print);
Run Code Online (Sandbox Code Playgroud)
请注意listObject的NextMarker文档,它并不总是存在于返回的数据对象中,因此我在上面的代码中根本不使用它...
NextMarker - (字符串)当截断响应时(响应中的IsTruncated 元素值为true),您可以使用此字段中的键名作为后续请求中的标记来获取下一组对象.Amazon S3按字母顺序列出对象注意:仅当您指定了分隔符请求参数时,才会返回此元素.如果响应不包含NextMarker并且它被截断,则可以使用响应中最后一个Key的值作为后续请求中的标记来获取下一组对象键.
整个程序现已推送到https://github.com/kenklin/s3list.
nki*_*tku 12
const { S3 } = require("aws-sdk");
const s3 = new S3();
Run Code Online (Sandbox Code Playgroud)
async function* listAllKeys(opts) {
opts = { ...opts };
do {
const data = await s3.listObjectsV2(opts).promise();
opts.ContinuationToken = data.NextContinuationToken;
yield data;
} while (opts.ContinuationToken);
}
Run Code Online (Sandbox Code Playgroud)
const opts = {
Bucket: "bucket-xyz" /* required */,
// ContinuationToken: 'STRING_VALUE',
// Delimiter: 'STRING_VALUE',
// EncodingType: url,
// FetchOwner: true || false,
// MaxKeys: 'NUMBER_VALUE',
// Prefix: 'STRING_VALUE',
// RequestPayer: requester,
// StartAfter: 'STRING_VALUE'
};
Run Code Online (Sandbox Code Playgroud)
async function main() {
// using for of await loop
for await (const data of listAllKeys(opts)) {
console.log(data.Contents);
}
}
main();
Run Code Online (Sandbox Code Playgroud)
就是这样
async function main() {
const keys = listAllKeys(opts);
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: {…}, done: false}
console.log(await keys.next());
// {value: undefined, done: true}
}
main();
Run Code Online (Sandbox Code Playgroud)
const lister = (opts) => (o$) => {
let needMore = true;
const process = async () => {
for await (const data of listAllKeys(opts)) {
o$.next(data);
if (!needMore) break;
}
o$.complete();
};
process();
return () => (needMore = false);
};
Run Code Online (Sandbox Code Playgroud)
// Using Rxjs
const { Observable } = require("rxjs");
const { flatMap } = require("rxjs/operators");
function listAll() {
return Observable.create(lister(opts))
.pipe(flatMap((v) => v.Contents))
.subscribe(console.log);
}
listAll();
Run Code Online (Sandbox Code Playgroud)
const EventEmitter = require("events");
const _eve = new EventEmitter();
async function onData(data) {
// will be called for each set of data
console.log(data);
}
async function onError(error) {
// will be called if any error
console.log(error);
}
async function onComplete() {
// will be called when data completely received
}
_eve.on("next", onData);
_eve.on("error", onError);
_eve.on("complete", onComplete);
const stop = lister(opts)({
next: (v) => _eve.emit("next", v),
error: (e) => _eve.emit("error", e),
complete: (v) => _eve.emit("complete", v),
});
Run Code Online (Sandbox Code Playgroud)
import {
paginateListObjectsV2,
S3Client,
S3ClientConfig,
} from "@aws-sdk/client-s3";
/* // For Deno
import {
paginateListObjectsV2,
S3Client,
S3ClientConfig,
} from "https://deno.land/x/aws_sdk@v3.14.0.0/client-s3/mod.ts"; */
const s3Config: S3ClientConfig = {
credentials: {
accessKeyId: "accessKeyId",
secretAccessKey: "secretAccessKey",
},
region: "us-east-1",
};
const client = new S3Client(s3Config);
const s3Opts = { Bucket: "bucket-xyz" };
async function getAllS3Files() {
const totalFiles = [];
for await (const data of paginateListObjectsV2({ client }, s3Opts)) {
totalFiles.push(...(data.Contents ?? []));
}
return totalFiles;
}
Run Code Online (Sandbox Code Playgroud)
实际上,aws2js通过s3.get()方法调用支持在低级别的桶中列出对象.要做到这一点,必须传递Amazon S3 REST API页面prefix上记录的参数:
var s3 = require('aws2js').load('s3', awsAccessKeyId, awsSecretAccessKey);
s3.setBucket(bucketName);
var folder = encodeURI('some/path/to/S3/folder');
var url = '?prefix=' + folder;
s3.get(url, 'xml', function (error, data) {
console.log(error);
console.log(data);
});
Run Code Online (Sandbox Code Playgroud)
data上面代码段中的变量包含bucketName存储桶中所有对象的列表.
当我找不到一个好的现有解决方案时,发布了knox-copy.将Rest API的所有分页细节包含在熟悉的节点流中:
var knoxCopy = require('knox-copy');
var client = knoxCopy.createClient({
key: '<api-key-here>',
secret: '<secret-here>',
bucket: 'mrbucket'
});
client.streamKeys({
// omit the prefix to list the whole bucket
prefix: 'buckets/of/fun'
}).on('data', function(key) {
console.log(key);
});
Run Code Online (Sandbox Code Playgroud)
如果您列出的文件少于1000个,则单个页面将起作用:
client.listPageOfKeys({
prefix: 'smaller/bucket/o/fun'
}, function(err, page) {
console.log(page.Contents); // <- Here's your list of files
});
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
50779 次 |
| 最近记录: |