feat: improve file extension detection

If a file extension is explicitly specified in the upload name, it is
always used directly. Detection of common file extension combinations is
also performed. Currently, only ".tar.gz" and ".tar.xz" are detected. If
you would like to add support for more common combinations, please open
an issue or pull request.

If no file extension is explicitly specified, jaf falls back to MIME
type detection via the github.com/gabriel-vasile/mimetype library.
This commit is contained in:
Leon Richardt 2022-10-17 20:36:24 +02:00
parent a885de840e
commit 2dc5c1b011
No known key found for this signature in database
GPG key ID: 03726644CC49FAFD
5 changed files with 138 additions and 18 deletions

View file

@ -0,0 +1,56 @@
package extdetect
import (
"strings"
"github.com/gabriel-vasile/mimetype"
)
var knownCombinations []string = []string{
".tar.gz",
".tar.xz",
}
func BuildFileExtension(fileData []byte, name string) string {
// First, check whether any file ending has been specified manually
curExtIdx := strings.LastIndex(name, ".")
if curExtIdx == -1 {
// No file ending specified in name, use MIME type detection
return mimetype.Detect(fileData).Extension()
}
// Otherwise, some file extension was manually specified and we will use that. First, check
// whether this is an "easy" case of file extension, i.e., a name where there is only one "."
// character and we can treat what's after it as the file extension.
nextExtIdx := strings.LastIndex(name[:curExtIdx], ".")
if nextExtIdx == -1 {
// Just one ".", so an easy case
return name[curExtIdx:]
}
// There are multiple "." in the name. Look for known extension combinations (e.g., ".tar.gz",
// ".tar.xz") and use that if found.
// XXX: This could be done more efficiently (at least in theory) with some suffix tree structure
// but for the few known combinations we have, it would likely be slower on real-world
// computer architectures.
stillBuilding := true
for stillBuilding {
stillBuilding = false
for _, comb := range knownCombinations {
if !strings.HasPrefix(comb, name[nextExtIdx:]) {
continue
}
stillBuilding = true
curExtIdx = nextExtIdx
nextExtIdx = strings.LastIndex(name[:curExtIdx], ".")
if nextExtIdx == -1 {
// No more extension candidates -> return current state of the builder
return name[curExtIdx:]
}
}
}
return name[curExtIdx:]
}

View file

@ -0,0 +1,65 @@
package extdetect
import (
"os"
"testing"
)
func TestDetectedExtensions(t *testing.T) {
const fixturePath = "../fixtures/gps.png"
type tType struct {
name string
fileData []byte
expectedOutput string
}
pngFile, err := os.ReadFile(fixturePath)
if err != nil {
t.Fatalf("Could not open \"%s\" which is required for the test. Error: %s", fixturePath,
err)
}
tests := []tType{
{ // extension is detected correctly from file when not specified explicitly
name: "foo",
fileData: pngFile,
expectedOutput: ".png",
},
{
name: "foo.txt",
expectedOutput: ".txt",
},
{ // simple extension that's the last part of a known combination is detected correctly
name: "foo.gz",
expectedOutput: ".gz",
},
{ // simple extension that's the first part of a known combination is detected correctly
name: "foo.tar",
expectedOutput: ".tar",
},
{ // combined extension is detected correctly
name: "foo.tar.gz",
expectedOutput: ".tar.gz",
},
{
name: "foo.tar.xz",
expectedOutput: ".tar.xz",
},
{ // combined extension that is NOT known only returns the last part
name: "foo.jpg.zip",
expectedOutput: ".zip",
},
{ // combined extension is detected correctly even with many "." in the name
name: "foo.jpg.zip.tar.gz",
expectedOutput: ".tar.gz",
},
}
for _, test := range tests {
output := BuildFileExtension(test.fileData, test.name)
if output != test.expectedOutput {
t.Fatalf("got output '%s', expected '%s'", output, test.expectedOutput)
}
}
}

3
go.mod
View file

@ -7,6 +7,7 @@ require (
github.com/dsoprea/go-jpeg-image-structure/v2 v2.0.0-20210512043942-b434301c6836 github.com/dsoprea/go-jpeg-image-structure/v2 v2.0.0-20210512043942-b434301c6836
github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d
github.com/gabriel-vasile/mimetype v1.4.1
github.com/go-errors/errors v1.1.1 github.com/go-errors/errors v1.1.1
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e
) )
@ -17,6 +18,6 @@ require (
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect
github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect
github.com/golang/geo v0.0.0-20200319012246-673a6f80352d // indirect github.com/golang/geo v0.0.0-20200319012246-673a6f80352d // indirect
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e // indirect
gopkg.in/yaml.v2 v2.3.0 // indirect gopkg.in/yaml.v2 v2.3.0 // indirect
) )

10
go.sum
View file

@ -18,6 +18,8 @@ github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/
github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8= github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU=
github.com/gabriel-vasile/mimetype v1.4.1 h1:TRWk7se+TOjCYgRth7+1/OYLNiRNIotknkFtf/dnN7Q=
github.com/gabriel-vasile/mimetype v1.4.1/go.mod h1:05Vi0w3Y9c/lNvJOdmIwvrrAhX3rYhfQQCaf9VJcv7M=
github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q=
github.com/go-errors/errors v1.0.2/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs= github.com/go-errors/errors v1.0.2/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs=
github.com/go-errors/errors v1.1.1 h1:ljK/pL5ltg3qoN+OtN6yCv9HWSfMwxSx90GJCZQxYNg= github.com/go-errors/errors v1.1.1 h1:ljK/pL5ltg3qoN+OtN6yCv9HWSfMwxSx90GJCZQxYNg=
@ -38,12 +40,18 @@ golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ=
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -7,9 +7,9 @@ import (
"math/rand" "math/rand"
"net/http" "net/http"
"os" "os"
"strings"
"github.com/leon-richardt/jaf/exifscrubber" "github.com/leon-richardt/jaf/exifscrubber"
"github.com/leon-richardt/jaf/extdetect"
) )
type uploadHandler struct { type uploadHandler struct {
@ -65,8 +65,7 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
} }
} }
_, fileExtension := splitFileName(header.Filename) link, err := generateLink(handler, fileData[:], header.Filename)
link, err := generateLink(handler, fileData[:], fileExtension)
if err != nil { if err != nil {
http.Error(w, "could not save file: "+err.Error(), http.StatusInternalServerError) http.Error(w, "could not save file: "+err.Error(), http.StatusInternalServerError)
log.Println(" could not save file: " + err.Error()) log.Println(" could not save file: " + err.Error())
@ -80,13 +79,15 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
// Generates a valid link to uploadFile with the specified file extension. // Generates a valid link to uploadFile with the specified file extension.
// Returns the link or an error in case of failure. // Returns the link or an error in case of failure.
// Does not close the passed file pointer. // Does not close the passed file pointer.
func generateLink(handler *uploadHandler, fileData []byte, fileExtension string) (string, error) { func generateLink(handler *uploadHandler, fileData []byte, fileName string) (string, error) {
ext := extdetect.BuildFileExtension(fileData, fileName)
// Find an unused file name // Find an unused file name
var fullFileName string var fullFileName string
var savePath string var savePath string
for { for {
fileStem := createRandomFileName(handler.config.LinkLength) fileStem := createRandomFileName(handler.config.LinkLength)
fullFileName = fileStem + fileExtension fullFileName = fileStem + ext
savePath = handler.config.FileDir + fullFileName savePath = handler.config.FileDir + fullFileName
if !fileExists(savePath) { if !fileExists(savePath) {
@ -125,14 +126,3 @@ func createRandomFileName(length int) string {
return string(chars) return string(chars)
} }
func splitFileName(name string) (string, string) {
extIndex := strings.LastIndex(name, ".")
if extIndex == -1 {
// No dot at all
return name, ""
}
return name[:extIndex], name[extIndex:]
}