Merge pull request #16 from leon-richardt/feat/better-file-extension-detection

feat: improve file extension detection
This commit is contained in:
Leon Richardt 2022-10-17 20:42:02 +02:00 committed by GitHub
commit 4d5f23377a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 138 additions and 18 deletions

View file

@ -0,0 +1,56 @@
package extdetect
import (
"strings"
"github.com/gabriel-vasile/mimetype"
)
var knownCombinations []string = []string{
".tar.gz",
".tar.xz",
}
func BuildFileExtension(fileData []byte, name string) string {
// First, check whether any file ending has been specified manually
curExtIdx := strings.LastIndex(name, ".")
if curExtIdx == -1 {
// No file ending specified in name, use MIME type detection
return mimetype.Detect(fileData).Extension()
}
// Otherwise, some file extension was manually specified and we will use that. First, check
// whether this is an "easy" case of file extension, i.e., a name where there is only one "."
// character and we can treat what's after it as the file extension.
nextExtIdx := strings.LastIndex(name[:curExtIdx], ".")
if nextExtIdx == -1 {
// Just one ".", so an easy case
return name[curExtIdx:]
}
// There are multiple "." in the name. Look for known extension combinations (e.g., ".tar.gz",
// ".tar.xz") and use that if found.
// XXX: This could be done more efficiently (at least in theory) with some suffix tree structure
// but for the few known combinations we have, it would likely be slower on real-world
// computer architectures.
stillBuilding := true
for stillBuilding {
stillBuilding = false
for _, comb := range knownCombinations {
if !strings.HasPrefix(comb, name[nextExtIdx:]) {
continue
}
stillBuilding = true
curExtIdx = nextExtIdx
nextExtIdx = strings.LastIndex(name[:curExtIdx], ".")
if nextExtIdx == -1 {
// No more extension candidates -> return current state of the builder
return name[curExtIdx:]
}
}
}
return name[curExtIdx:]
}

View file

@ -0,0 +1,65 @@
package extdetect
import (
"os"
"testing"
)
func TestDetectedExtensions(t *testing.T) {
const fixturePath = "../fixtures/gps.png"
type tType struct {
name string
fileData []byte
expectedOutput string
}
pngFile, err := os.ReadFile(fixturePath)
if err != nil {
t.Fatalf("Could not open \"%s\" which is required for the test. Error: %s", fixturePath,
err)
}
tests := []tType{
{ // extension is detected correctly from file when not specified explicitly
name: "foo",
fileData: pngFile,
expectedOutput: ".png",
},
{
name: "foo.txt",
expectedOutput: ".txt",
},
{ // simple extension that's the last part of a known combination is detected correctly
name: "foo.gz",
expectedOutput: ".gz",
},
{ // simple extension that's the first part of a known combination is detected correctly
name: "foo.tar",
expectedOutput: ".tar",
},
{ // combined extension is detected correctly
name: "foo.tar.gz",
expectedOutput: ".tar.gz",
},
{
name: "foo.tar.xz",
expectedOutput: ".tar.xz",
},
{ // combined extension that is NOT known only returns the last part
name: "foo.jpg.zip",
expectedOutput: ".zip",
},
{ // combined extension is detected correctly even with many "." in the name
name: "foo.jpg.zip.tar.gz",
expectedOutput: ".tar.gz",
},
}
for _, test := range tests {
output := BuildFileExtension(test.fileData, test.name)
if output != test.expectedOutput {
t.Fatalf("got output '%s', expected '%s'", output, test.expectedOutput)
}
}
}

3
go.mod
View file

@ -7,6 +7,7 @@ require (
github.com/dsoprea/go-jpeg-image-structure/v2 v2.0.0-20210512043942-b434301c6836 github.com/dsoprea/go-jpeg-image-structure/v2 v2.0.0-20210512043942-b434301c6836
github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d
github.com/gabriel-vasile/mimetype v1.4.1
github.com/go-errors/errors v1.1.1 github.com/go-errors/errors v1.1.1
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e
) )
@ -17,6 +18,6 @@ require (
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect
github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect
github.com/golang/geo v0.0.0-20200319012246-673a6f80352d // indirect github.com/golang/geo v0.0.0-20200319012246-673a6f80352d // indirect
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e // indirect
gopkg.in/yaml.v2 v2.3.0 // indirect gopkg.in/yaml.v2 v2.3.0 // indirect
) )

10
go.sum
View file

@ -18,6 +18,8 @@ github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/
github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8= github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E=
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU= github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU=
github.com/gabriel-vasile/mimetype v1.4.1 h1:TRWk7se+TOjCYgRth7+1/OYLNiRNIotknkFtf/dnN7Q=
github.com/gabriel-vasile/mimetype v1.4.1/go.mod h1:05Vi0w3Y9c/lNvJOdmIwvrrAhX3rYhfQQCaf9VJcv7M=
github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q= github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q=
github.com/go-errors/errors v1.0.2/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs= github.com/go-errors/errors v1.0.2/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs=
github.com/go-errors/errors v1.1.1 h1:ljK/pL5ltg3qoN+OtN6yCv9HWSfMwxSx90GJCZQxYNg= github.com/go-errors/errors v1.1.1 h1:ljK/pL5ltg3qoN+OtN6yCv9HWSfMwxSx90GJCZQxYNg=
@ -38,12 +40,18 @@ golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLL
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ=
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=

View file

@ -7,9 +7,9 @@ import (
"math/rand" "math/rand"
"net/http" "net/http"
"os" "os"
"strings"
"github.com/leon-richardt/jaf/exifscrubber" "github.com/leon-richardt/jaf/exifscrubber"
"github.com/leon-richardt/jaf/extdetect"
) )
type uploadHandler struct { type uploadHandler struct {
@ -65,8 +65,7 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
} }
} }
_, fileExtension := splitFileName(header.Filename) link, err := generateLink(handler, fileData[:], header.Filename)
link, err := generateLink(handler, fileData[:], fileExtension)
if err != nil { if err != nil {
http.Error(w, "could not save file: "+err.Error(), http.StatusInternalServerError) http.Error(w, "could not save file: "+err.Error(), http.StatusInternalServerError)
log.Println(" could not save file: " + err.Error()) log.Println(" could not save file: " + err.Error())
@ -80,13 +79,15 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
// Generates a valid link to uploadFile with the specified file extension. // Generates a valid link to uploadFile with the specified file extension.
// Returns the link or an error in case of failure. // Returns the link or an error in case of failure.
// Does not close the passed file pointer. // Does not close the passed file pointer.
func generateLink(handler *uploadHandler, fileData []byte, fileExtension string) (string, error) { func generateLink(handler *uploadHandler, fileData []byte, fileName string) (string, error) {
ext := extdetect.BuildFileExtension(fileData, fileName)
// Find an unused file name // Find an unused file name
var fullFileName string var fullFileName string
var savePath string var savePath string
for { for {
fileStem := createRandomFileName(handler.config.LinkLength) fileStem := createRandomFileName(handler.config.LinkLength)
fullFileName = fileStem + fileExtension fullFileName = fileStem + ext
savePath = handler.config.FileDir + fullFileName savePath = handler.config.FileDir + fullFileName
if !fileExists(savePath) { if !fileExists(savePath) {
@ -125,14 +126,3 @@ func createRandomFileName(length int) string {
return string(chars) return string(chars)
} }
func splitFileName(name string) (string, string) {
extIndex := strings.LastIndex(name, ".")
if extIndex == -1 {
// No dot at all
return name, ""
}
return name[:extIndex], name[extIndex:]
}