'List nested folders in Google Cloud Storage Bucket in C#
Whats the best way to list nested folders in Google-Cloud-Storage bucket. I have a bucket in which first the year folder then months and days folder respectively and days folder have millions of files, but months folder can miss some days (folder). When i run the code it loops folders as well as files, which is taking hours. Here is my code which I'am using
public static IEnumerable<string> ListFolders(this StorageClient client, string bucket, string folder = "")
{
if (client == null) { throw new ArgumentNullException("this"); }
if (string.IsNullOrWhiteSpace(bucket)) { throw new ArgumentOutOfRangeException("bucket must be non-empty"); }
if (!string.IsNullOrEmpty(folder) && !folder.EndsWith(Delimiter.ToString())) { throw new ArgumentException("folder must end in " + Delimiter); }
if (!string.IsNullOrEmpty(folder) && folder == Delimiter.ToString()) { throw new ArgumentException("root folder is \"\", not " + Delimiter); }
var prefix = folder ?? "";
return client
.ListObjects(bucket, prefix)
.Select(o => o.Name.Substring(prefix.Length))
.Where(n => n.Contains(Delimiter))
.Select(n => n.Split(Delimiter).First())
.Distinct()
.Select(n => prefix + n + Delimiter);
}
private static void ListLiveFolders(string yearFolder)
{
var storage = StorageClient.Create(StorageHelper.Credentials);
var listGcpMonthFolders = StorageHelper.ListFolders(storage, settings.Bucket, $"{settings.BucketFolder}/{yearFolder}/").ToList();
try
{
foreach (var monthFolder in listGcpMonthFolders)
{
Console.WriteLine(monthFolder);
var listGcpDaysFolders = StorageHelper.ListFolders(storage, settings.Bucket, monthFolder).ToList();
foreach (var daysFolder in listGcpDaysFolders)
{
Console.WriteLine(daysFolder);
}
}
}
catch (Exception exception)
{
Console.WriteLine(exception.Message);
}
}
Solution 1:[1]
To achieve your goal you have to use internal application logic to filter your files. However, if the solution found in a similar question linked in the comments and official documentation isn't helpful you may open a Feature Request from Google using this Issue Tracker.
Solution 2:[2]
Here are my workgrounds using in my .net5
project
Get everything first
public async Task<List<FolderTree>> GetObjectListAsync(string dirName) { var __request = _storageService.Objects.List(_bucketName); __request.Prefix = dirName; __request.Projection = ProjectionEnum.Full; var __res = await __request.ExecuteAsync(); return GCloudHelper.TreeGenerator(__res?.Items);//call this method from GCloudHelper.cs }
Working with resource locally
GCloudHelper.cs
using Newtonsoft.Json;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using Object = Google.Apis.Storage.v1.Data.Object;
namespace MyGCloudHelper
{
public class GCloudHelper
{
public static List<FolderTree> TreeGenerator(IEnumerable<Object> objects, string matchedPath = " ")
{
var __tree = new List<FolderTree>();
var __items = objects.ToList();
if (!__items.Any()) return new();
var __matchedPath = matchedPath;
var __toDoItems = matchedPath == " " ? __items.Where(x => !x.Name.Contains(__matchedPath)) : __items;
foreach(var item in __toDoItems)
{
var __newName = item.Name.Replace($"{matchedPath}/", "");
var __arr = __newName.Split('/');
for (var i = 0; i < __arr.Length; i++)
{
var __part = __arr[i];
var __pathArr = __arr[..(i+1)];
var __prefix = matchedPath == " " ? "" : $"{matchedPath}/";
var __currentPath = __prefix + string.Join('/', __pathArr);
//working with directory
if (!IsFile(__part))
{
__tree.Add(new()
{
level = i,
name = __part,
path = __currentPath
});
//check to find matched level from other items
var __children = __items.Where(cx => cx.Name != item.Name && cx.Name.Contains(__currentPath)).ToList();
if (__children.Any())
{
__matchedPath = __currentPath;
var __subs = GetSubs(__children, __currentPath);
__tree[i].subs = __subs;
}
}
}
}
//get files for each dir
__tree = GetFiles(__tree, __items);
return SizeSetter(__tree);
}
private static List<FolderTree> SizeSetter(List<FolderTree> tree)
{
tree.ForEach(item => {
var __size = item.files.Sum(f => (decimal)f.size);
if (item.subs.Any()) item.subs = SizeSetter(item.subs);//recurse
item.size = (ulong)(__size + item.subs.Sum(sx => (decimal)sx.size));
});
return tree;
}
private static List<FolderTree> GetFiles(List<FolderTree> dirs, List<Object> items)
{
foreach(var dir in dirs)
{
var __getFiles = items.Where(x => x.Name.Contains(dir.path));
foreach (var f in __getFiles)
{
var __parts = f.Name.Split(dir.path + "/");
var __fpart = __parts.LastOrDefault()?.Split('/').FirstOrDefault();
if (IsFile(__fpart))
{
dir.files.Add(new()
{
name = __fpart,
link = HttpUtility.UrlDecode(f.MediaLink),
size = f.Size,
type = f.ContentType,
path = $"{dir.path}/{__fpart}",
created = f.TimeCreated,
modified = f.Updated
});
}
}
if (dir.subs.Any())
dir.subs = GetFiles(dir.subs, items);//recurse
}
return dirs;
}
private static List<FolderTree> GetSubs(List<Object> children, string parantDir)
{
var __tree = new List<FolderTree>();
var __json = JsonConvert.SerializeObject(children);
var __newList = JsonConvert.DeserializeObject<List<Object>>(__json);
__newList.ForEach(x => x.Name = x.Name.Replace($"{parantDir}/", ""));
var __tmpStore = new List<TmpStore>();
for(var i = 0; i < __newList.Count(); i++)
{
var __name = __newList[i].Name.Split('/').FirstOrDefault();
if (!IsFile(__name))
__tmpStore.Add(new() { Name = __name, item = children.FirstOrDefault(fx => fx.Id == __newList[i].Id) });
}
var __group = __tmpStore.GroupBy(x => x.Name);
foreach (var group in __group)
{
var __tmp = group.FirstOrDefault();
var __currentPath = $"{parantDir}/{__tmp.Name}";
//find children
var __children = children.Where(cx => cx.Name != __tmp.item.Name && cx.Name.Contains(__currentPath)).ToList();
var __subs = new List<FolderTree>();
if (__children.Any())
__subs = GetSubs(__children, __currentPath);//recurse
__tree.Add(new()
{
name = __tmp.Name,
path = __currentPath,
subs = __subs,
});
}
return __tree;
}
private static bool IsFile(string part)
=> part.Split('.').Count() > 1;
}//class
internal class TmpStore
{
public string Name { get; set; }
public Object item { get; set; }
}
public class FolderTree
{
public string name { get; set; }
public int level { get; set; }
public string path { get; set; }
public ulong size { get; set; }
public List<FolderTree> subs { get; set; } = new();
public List<FileProp> files { get; set; } = new();
}
public class FileProp
{
public string name { get; set; }
public string link { get; set; }
public ulong? size { get; set; }
public string type { get; set; }
public string path { get; set; }
public DateTime? created { get; set; }
public DateTime? modified { get; set; }
}
}
Result
This will produce nested directory tree with files and size
which subs
is list of sub directories and files
is list of files
=> Result might be not as what as your concept, but it might be helpful
[
{
"name": "myFolder",
"level": 0,
"path": "myFolder",
"size": 304340,
"subs": [
{
"name": "Resource",
"level": 0,
"path": "myFolder/Resource",
"size": 304301,
"subs": [
{
"name": "Image",
"level": 0,
"path": "myFolder/Resource/Image",
"size": 304301,
"subs": [
{
"name": "Logo",
"level": 0,
"path": "myFolder/Resource/Image/Logo",
"size": 304301,
"subs": [],
"files": [
{
"name": "fileImg01.png",
"link": "https://storage.googleapis.com/download/storage/v1/b/myBucket/o/myFolder/Resource/Image/Logo/fileImg01.png",
"size": 64436,
"type": "image/jpeg",
"path": "myFolder/Resource/Image/Logo/fileImg01.png",
"created": "2022-05-01T11:13:27.727+07:00",
"modified": "2022-05-01T11:13:27.727+07:00"
},
{
"name": "fileImg02.png",
"link": "https://storage.googleapis.com/download/storage/v1/b/myBucket/o/myFolder/Resource/Image/Logo/fileImg02.png",
"size": 175429,
"type": "image/jpeg",
"path": "myFolder/Resource/Image/Logo/fileImg02.png",
"created": "2022-05-01T11:06:35.58+07:00",
"modified": "2022-05-01T11:06:35.58+07:00"
},
{
"name": "fileImg03.png",
"link": "https://storage.googleapis.com/download/storage/v1/b/myBucket/o/myFolder/Resource/Image/Logo/fileImg03.png",
"size": 64436,
"type": "image/jpeg",
"path": "myFolder/Resource/Image/Logo/fileImg03.png",
"created": "2022-05-01T11:18:42.365+07:00",
"modified": "2022-05-01T11:18:42.365+07:00"
}
]
}
],
"files": []
}
],
"files": []
}
],
"files": [
{
"name": "README.MD",
"link": "https://storage.googleapis.com/download/storage/v1/b/myBucket/o/myFolder/README.MD",
"size": 39,
"type": null,
"path": "myFolder/README.MD",
"created": "2022-05-01T11:04:57.565+07:00",
"modified": "2022-05-01T11:04:57.565+07:00"
}
]
}
]
Note using C# .net5
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | mdobrucki |
Solution 2 | sambath999 |