From 2dc5c1b011f5d4cf5a48ebefd2a7ac38c4f6bf65 Mon Sep 17 00:00:00 2001 From: Leon Richardt Date: Mon, 17 Oct 2022 20:36:24 +0200 Subject: [PATCH] feat: improve file extension detection If a file extension is explicitly specified in the upload name, it is always used directly. Detection of common file extension combinations is also performed. Currently, only ".tar.gz" and ".tar.xz" are detected. If you would like to add support for more common combinations, please open an issue or pull request. If no file extension is explicitly specified, jaf falls back to MIME type detection via the github.com/gabriel-vasile/mimetype library. --- extdetect/extension_detection.go | 56 +++++++++++++++++++++++ extdetect/extension_detection_test.go | 65 +++++++++++++++++++++++++++ go.mod | 3 +- go.sum | 10 ++++- uploadhandler.go | 22 +++------ 5 files changed, 138 insertions(+), 18 deletions(-) create mode 100644 extdetect/extension_detection.go create mode 100644 extdetect/extension_detection_test.go diff --git a/extdetect/extension_detection.go b/extdetect/extension_detection.go new file mode 100644 index 0000000..b33691c --- /dev/null +++ b/extdetect/extension_detection.go @@ -0,0 +1,56 @@ +package extdetect + +import ( + "strings" + + "github.com/gabriel-vasile/mimetype" +) + +var knownCombinations []string = []string{ + ".tar.gz", + ".tar.xz", +} + +func BuildFileExtension(fileData []byte, name string) string { + // First, check whether any file ending has been specified manually + curExtIdx := strings.LastIndex(name, ".") + + if curExtIdx == -1 { + // No file ending specified in name, use MIME type detection + return mimetype.Detect(fileData).Extension() + } + + // Otherwise, some file extension was manually specified and we will use that. First, check + // whether this is an "easy" case of file extension, i.e., a name where there is only one "." + // character and we can treat what's after it as the file extension. + nextExtIdx := strings.LastIndex(name[:curExtIdx], ".") + if nextExtIdx == -1 { + // Just one ".", so an easy case + return name[curExtIdx:] + } + + // There are multiple "." in the name. Look for known extension combinations (e.g., ".tar.gz", + // ".tar.xz") and use that if found. + // XXX: This could be done more efficiently (at least in theory) with some suffix tree structure + // but for the few known combinations we have, it would likely be slower on real-world + // computer architectures. + stillBuilding := true + for stillBuilding { + stillBuilding = false + for _, comb := range knownCombinations { + if !strings.HasPrefix(comb, name[nextExtIdx:]) { + continue + } + + stillBuilding = true + curExtIdx = nextExtIdx + nextExtIdx = strings.LastIndex(name[:curExtIdx], ".") + if nextExtIdx == -1 { + // No more extension candidates -> return current state of the builder + return name[curExtIdx:] + } + } + } + + return name[curExtIdx:] +} diff --git a/extdetect/extension_detection_test.go b/extdetect/extension_detection_test.go new file mode 100644 index 0000000..dce6717 --- /dev/null +++ b/extdetect/extension_detection_test.go @@ -0,0 +1,65 @@ +package extdetect + +import ( + "os" + "testing" +) + +func TestDetectedExtensions(t *testing.T) { + const fixturePath = "../fixtures/gps.png" + + type tType struct { + name string + fileData []byte + expectedOutput string + } + + pngFile, err := os.ReadFile(fixturePath) + if err != nil { + t.Fatalf("Could not open \"%s\" which is required for the test. Error: %s", fixturePath, + err) + } + + tests := []tType{ + { // extension is detected correctly from file when not specified explicitly + name: "foo", + fileData: pngFile, + expectedOutput: ".png", + }, + { + name: "foo.txt", + expectedOutput: ".txt", + }, + { // simple extension that's the last part of a known combination is detected correctly + name: "foo.gz", + expectedOutput: ".gz", + }, + { // simple extension that's the first part of a known combination is detected correctly + name: "foo.tar", + expectedOutput: ".tar", + }, + { // combined extension is detected correctly + name: "foo.tar.gz", + expectedOutput: ".tar.gz", + }, + { + name: "foo.tar.xz", + expectedOutput: ".tar.xz", + }, + { // combined extension that is NOT known only returns the last part + name: "foo.jpg.zip", + expectedOutput: ".zip", + }, + { // combined extension is detected correctly even with many "." in the name + name: "foo.jpg.zip.tar.gz", + expectedOutput: ".tar.gz", + }, + } + + for _, test := range tests { + output := BuildFileExtension(test.fileData, test.name) + if output != test.expectedOutput { + t.Fatalf("got output '%s', expected '%s'", output, test.expectedOutput) + } + } +} diff --git a/go.mod b/go.mod index 5d81cf9..5c58d77 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/dsoprea/go-jpeg-image-structure/v2 v2.0.0-20210512043942-b434301c6836 github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d + github.com/gabriel-vasile/mimetype v1.4.1 github.com/go-errors/errors v1.1.1 golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e ) @@ -17,6 +18,6 @@ require ( github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect github.com/golang/geo v0.0.0-20200319012246-673a6f80352d // indirect - golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect + golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e // indirect gopkg.in/yaml.v2 v2.3.0 // indirect ) diff --git a/go.sum b/go.sum index 6902bc8..4889d1a 100644 --- a/go.sum +++ b/go.sum @@ -18,6 +18,8 @@ github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/ github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU= +github.com/gabriel-vasile/mimetype v1.4.1 h1:TRWk7se+TOjCYgRth7+1/OYLNiRNIotknkFtf/dnN7Q= +github.com/gabriel-vasile/mimetype v1.4.1/go.mod h1:05Vi0w3Y9c/lNvJOdmIwvrrAhX3rYhfQQCaf9VJcv7M= github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= github.com/go-errors/errors v1.0.2/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs= github.com/go-errors/errors v1.1.1 h1:ljK/pL5ltg3qoN+OtN6yCv9HWSfMwxSx90GJCZQxYNg= @@ -38,12 +40,18 @@ golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= +golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ= +golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= diff --git a/uploadhandler.go b/uploadhandler.go index 895c98b..a536dea 100644 --- a/uploadhandler.go +++ b/uploadhandler.go @@ -7,9 +7,9 @@ import ( "math/rand" "net/http" "os" - "strings" "github.com/leon-richardt/jaf/exifscrubber" + "github.com/leon-richardt/jaf/extdetect" ) type uploadHandler struct { @@ -65,8 +65,7 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) } } - _, fileExtension := splitFileName(header.Filename) - link, err := generateLink(handler, fileData[:], fileExtension) + link, err := generateLink(handler, fileData[:], header.Filename) if err != nil { http.Error(w, "could not save file: "+err.Error(), http.StatusInternalServerError) log.Println(" could not save file: " + err.Error()) @@ -80,13 +79,15 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) // Generates a valid link to uploadFile with the specified file extension. // Returns the link or an error in case of failure. // Does not close the passed file pointer. -func generateLink(handler *uploadHandler, fileData []byte, fileExtension string) (string, error) { +func generateLink(handler *uploadHandler, fileData []byte, fileName string) (string, error) { + ext := extdetect.BuildFileExtension(fileData, fileName) + // Find an unused file name var fullFileName string var savePath string for { fileStem := createRandomFileName(handler.config.LinkLength) - fullFileName = fileStem + fileExtension + fullFileName = fileStem + ext savePath = handler.config.FileDir + fullFileName if !fileExists(savePath) { @@ -125,14 +126,3 @@ func createRandomFileName(length int) string { return string(chars) } - -func splitFileName(name string) (string, string) { - extIndex := strings.LastIndex(name, ".") - - if extIndex == -1 { - // No dot at all - return name, "" - } - - return name[:extIndex], name[extIndex:] -}