Skip to content

Commit

Permalink
Skip unsupported directory/file names in list
Browse files Browse the repository at this point in the history
GCS supports objects of path-type
e.g. <bucket>/<path1>//<path2>
and treats it different from
<bucket>/<path1>/<path2>.
GCSFuse being a POSIX-compliant file-system
can only support the latter and
throws error on former. This error can
disallow the listing of all directories in the
parent directory i.e. <path1>.
The current change ignores the listing of
prefixes (directory names) which are empty such
as "/" above to ignore the error and logs the
above event as a warning.

Similarly, GCS supports <bucket>/<path1>/./<path2>
and <bucket>/<path1>/../<path2> but these have
special reserved meaning in linux filesystem.
So, ignoring these two prefixes as well.

Similarly, linux filesystem does not support
'\0' in names of files or directories while it
is supported in GCS object names. So, ignoring
those as well in the GCS list call output.
  • Loading branch information
gargnitingoogle committed May 6, 2024
1 parent f089702 commit ef0b1fe
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 10 deletions.
12 changes: 11 additions & 1 deletion internal/storage/bucket_handle.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,10 @@ import (
"net/http"

"cloud.google.com/go/storage"
"github.com/googlecloudplatform/gcsfuse/v2/internal/logger"
"github.com/googlecloudplatform/gcsfuse/v2/internal/storage/gcs"
"github.com/googlecloudplatform/gcsfuse/v2/internal/storage/storageutil"
"github.com/googlecloudplatform/gcsfuse/v2/internal/util"
"google.golang.org/api/googleapi"
"google.golang.org/api/iterator"
)
Expand Down Expand Up @@ -278,8 +280,16 @@ func (b *bucketHandle) ListObjects(ctx context.Context, req *gcs.ListObjectsRequ
// https://github.com/GoogleCloudPlatform/gcsfuse/blob/master/vendor/cloud.google.com/go/storage/storage.go#L1304
// https://github.com/GoogleCloudPlatform/gcsfuse/blob/master/vendor/cloud.google.com/go/storage/http_client.go#L370
if attrs.Prefix != "" {
list.CollapsedRuns = append(list.CollapsedRuns, attrs.Prefix)
if util.IsUnsupportedDirectoryName(attrs.Prefix) {
logger.Warnf("Ignoring unsupported object-prefix (implicit-directory): \"%s\"", attrs.Prefix)
} else {
list.CollapsedRuns = append(list.CollapsedRuns, attrs.Prefix)
}
} else {
if util.IsUnsupportedObjectName(attrs.Name) {
logger.Warnf("Encoutered unsupported object: \"%s\"", attrs.Name)
}

// Converting attrs to *Object type.
currObject := storageutil.ObjectAttrsToBucketObject(attrs)
list.Objects = append(list.Objects, currObject)
Expand Down
53 changes: 44 additions & 9 deletions internal/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,18 +25,25 @@ import (
"strings"
)

const GCSFUSE_PARENT_PROCESS_DIR = "gcsfuse-parent-process-dir"
const (
GCSFUSE_PARENT_PROCESS_DIR = "gcsfuse-parent-process-dir"

// Constants for read types - Sequential/Random
const Sequential = "Sequential"
const Random = "Random"
// Constants for read types - Sequential/Random
Sequential = "Sequential"
Random = "Random"

const MaxMiBsInUint64 uint64 = math.MaxUint64 >> 20
MaxMiBsInUint64 uint64 = math.MaxUint64 >> 20

// HeapSizeToRssConversionFactor is a constant factor
// which we multiply to the calculated heap-size
// to get the corresponding resident set size.
const HeapSizeToRssConversionFactor float64 = 2
// HeapSizeToRssConversionFactor is a constant factor
// which we multiply to the calculated heap-size
// to get the corresponding resident set size.
HeapSizeToRssConversionFactor float64 = 2
)

var (
UnsupportedObjectNameSubstrings = []string{"//", "/./", "/../", "\000"}
UnsupportedDirectoryNamePrefixes = []string{"/", "./", "../"}
)

// 1. Returns the same filepath in case of absolute path or empty filename.
// 2. For child process, it resolves relative path like, ./test.txt, test.txt
Expand Down Expand Up @@ -110,3 +117,31 @@ func IsolateContextFromParentContext(ctx context.Context) (context.Context, cont
ctx = context.WithoutCancel(ctx)
return context.WithCancel(ctx)
}

// IsUnsupportedObjectName returns true if the passed
// string is an unsupported file/directory name in GCSFuse but
// is supported in GCS object names .
func IsUnsupportedObjectName(name string) bool {
for _, substring := range UnsupportedObjectNameSubstrings {
if strings.Contains(name, substring) {
return true
}
}

return false
}

// IsUnsupportedDirectoryName returns true if the passed
// string is an unsupported directory-name in GCSFuse but
// is supported in GCS object names .
func IsUnsupportedDirectoryName(dirName string) bool {
if IsUnsupportedObjectName(dirName) {
return true
}
for _, prefix := range UnsupportedDirectoryNamePrefixes {
if strings.HasPrefix(dirName, prefix) {
return true
}
}
return false
}
72 changes: 72 additions & 0 deletions internal/util/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,75 @@ func (ts *UtilTest) TestIsolateContextFromParentContext() {
newCtxCancel()
assert.ErrorIs(ts.T(), newCtx.Err(), context.Canceled)
}

func (ts *UtilTest) TestIsUnsupportedObjectName() {
cases := []struct {
name string
isUnsupported bool
}{
{
name: "abc",
isUnsupported: false,
},
{
name: "abc.txt",
isUnsupported: false,
},
{
name: "abc\x00",
isUnsupported: true,
},
{
name: "abc\x00/",
isUnsupported: true,
},
}

for _, tc := range cases {
assert.Equal(ts.T(), tc.isUnsupported, IsUnsupportedObjectName(tc.name))
}
}

func (ts *UtilTest) TestIsUnsupportedDirectoryName() {
cases := []struct {
name string
isUnsupported bool
}{
{
name: "abc/",
isUnsupported: false,
},
{
name: "abc//",
isUnsupported: true,
},
{
name: "abc/./",
isUnsupported: true,
},
{
name: "abc/../",
isUnsupported: true,
},
{
name: "abc\x00/",
isUnsupported: true,
},
{
name: "./",
isUnsupported: true,
},
{
name: "/",
isUnsupported: true,
},
{
name: "../",
isUnsupported: true,
},
}

for _, tc := range cases {
assert.Equal(ts.T(), tc.isUnsupported, IsUnsupportedDirectoryName(tc.name))
}
}

0 comments on commit ef0b1fe

Please sign in to comment.