mirror of
https://github.com/lyx0/yaf.git
synced 2024-11-13 19:49:53 +01:00
feat: improve file extension detection
If a file extension is explicitly specified in the upload name, it is always used directly. Detection of common file extension combinations is also performed. Currently, only ".tar.gz" and ".tar.xz" are detected. If you would like to add support for more common combinations, please open an issue or pull request. If no file extension is explicitly specified, jaf falls back to MIME type detection via the github.com/gabriel-vasile/mimetype library.
This commit is contained in:
parent
a885de840e
commit
2dc5c1b011
5 changed files with 138 additions and 18 deletions
56
extdetect/extension_detection.go
Normal file
56
extdetect/extension_detection.go
Normal file
|
@ -0,0 +1,56 @@
|
|||
package extdetect
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/gabriel-vasile/mimetype"
|
||||
)
|
||||
|
||||
var knownCombinations []string = []string{
|
||||
".tar.gz",
|
||||
".tar.xz",
|
||||
}
|
||||
|
||||
func BuildFileExtension(fileData []byte, name string) string {
|
||||
// First, check whether any file ending has been specified manually
|
||||
curExtIdx := strings.LastIndex(name, ".")
|
||||
|
||||
if curExtIdx == -1 {
|
||||
// No file ending specified in name, use MIME type detection
|
||||
return mimetype.Detect(fileData).Extension()
|
||||
}
|
||||
|
||||
// Otherwise, some file extension was manually specified and we will use that. First, check
|
||||
// whether this is an "easy" case of file extension, i.e., a name where there is only one "."
|
||||
// character and we can treat what's after it as the file extension.
|
||||
nextExtIdx := strings.LastIndex(name[:curExtIdx], ".")
|
||||
if nextExtIdx == -1 {
|
||||
// Just one ".", so an easy case
|
||||
return name[curExtIdx:]
|
||||
}
|
||||
|
||||
// There are multiple "." in the name. Look for known extension combinations (e.g., ".tar.gz",
|
||||
// ".tar.xz") and use that if found.
|
||||
// XXX: This could be done more efficiently (at least in theory) with some suffix tree structure
|
||||
// but for the few known combinations we have, it would likely be slower on real-world
|
||||
// computer architectures.
|
||||
stillBuilding := true
|
||||
for stillBuilding {
|
||||
stillBuilding = false
|
||||
for _, comb := range knownCombinations {
|
||||
if !strings.HasPrefix(comb, name[nextExtIdx:]) {
|
||||
continue
|
||||
}
|
||||
|
||||
stillBuilding = true
|
||||
curExtIdx = nextExtIdx
|
||||
nextExtIdx = strings.LastIndex(name[:curExtIdx], ".")
|
||||
if nextExtIdx == -1 {
|
||||
// No more extension candidates -> return current state of the builder
|
||||
return name[curExtIdx:]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return name[curExtIdx:]
|
||||
}
|
65
extdetect/extension_detection_test.go
Normal file
65
extdetect/extension_detection_test.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
package extdetect
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDetectedExtensions(t *testing.T) {
|
||||
const fixturePath = "../fixtures/gps.png"
|
||||
|
||||
type tType struct {
|
||||
name string
|
||||
fileData []byte
|
||||
expectedOutput string
|
||||
}
|
||||
|
||||
pngFile, err := os.ReadFile(fixturePath)
|
||||
if err != nil {
|
||||
t.Fatalf("Could not open \"%s\" which is required for the test. Error: %s", fixturePath,
|
||||
err)
|
||||
}
|
||||
|
||||
tests := []tType{
|
||||
{ // extension is detected correctly from file when not specified explicitly
|
||||
name: "foo",
|
||||
fileData: pngFile,
|
||||
expectedOutput: ".png",
|
||||
},
|
||||
{
|
||||
name: "foo.txt",
|
||||
expectedOutput: ".txt",
|
||||
},
|
||||
{ // simple extension that's the last part of a known combination is detected correctly
|
||||
name: "foo.gz",
|
||||
expectedOutput: ".gz",
|
||||
},
|
||||
{ // simple extension that's the first part of a known combination is detected correctly
|
||||
name: "foo.tar",
|
||||
expectedOutput: ".tar",
|
||||
},
|
||||
{ // combined extension is detected correctly
|
||||
name: "foo.tar.gz",
|
||||
expectedOutput: ".tar.gz",
|
||||
},
|
||||
{
|
||||
name: "foo.tar.xz",
|
||||
expectedOutput: ".tar.xz",
|
||||
},
|
||||
{ // combined extension that is NOT known only returns the last part
|
||||
name: "foo.jpg.zip",
|
||||
expectedOutput: ".zip",
|
||||
},
|
||||
{ // combined extension is detected correctly even with many "." in the name
|
||||
name: "foo.jpg.zip.tar.gz",
|
||||
expectedOutput: ".tar.gz",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
output := BuildFileExtension(test.fileData, test.name)
|
||||
if output != test.expectedOutput {
|
||||
t.Fatalf("got output '%s', expected '%s'", output, test.expectedOutput)
|
||||
}
|
||||
}
|
||||
}
|
3
go.mod
3
go.mod
|
@ -7,6 +7,7 @@ require (
|
|||
github.com/dsoprea/go-jpeg-image-structure/v2 v2.0.0-20210512043942-b434301c6836
|
||||
github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd
|
||||
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d
|
||||
github.com/gabriel-vasile/mimetype v1.4.1
|
||||
github.com/go-errors/errors v1.1.1
|
||||
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e
|
||||
)
|
||||
|
@ -17,6 +18,6 @@ require (
|
|||
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect
|
||||
github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect
|
||||
github.com/golang/geo v0.0.0-20200319012246-673a6f80352d // indirect
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
|
||||
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e // indirect
|
||||
gopkg.in/yaml.v2 v2.3.0 // indirect
|
||||
)
|
||||
|
|
10
go.sum
10
go.sum
|
@ -18,6 +18,8 @@ github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/
|
|||
github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8=
|
||||
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E=
|
||||
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU=
|
||||
github.com/gabriel-vasile/mimetype v1.4.1 h1:TRWk7se+TOjCYgRth7+1/OYLNiRNIotknkFtf/dnN7Q=
|
||||
github.com/gabriel-vasile/mimetype v1.4.1/go.mod h1:05Vi0w3Y9c/lNvJOdmIwvrrAhX3rYhfQQCaf9VJcv7M=
|
||||
github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q=
|
||||
github.com/go-errors/errors v1.0.2/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs=
|
||||
github.com/go-errors/errors v1.1.1 h1:ljK/pL5ltg3qoN+OtN6yCv9HWSfMwxSx90GJCZQxYNg=
|
||||
|
@ -38,12 +40,18 @@ golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLL
|
|||
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ=
|
||||
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
|
|
|
@ -7,9 +7,9 @@ import (
|
|||
"math/rand"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/leon-richardt/jaf/exifscrubber"
|
||||
"github.com/leon-richardt/jaf/extdetect"
|
||||
)
|
||||
|
||||
type uploadHandler struct {
|
||||
|
@ -65,8 +65,7 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
|
|||
}
|
||||
}
|
||||
|
||||
_, fileExtension := splitFileName(header.Filename)
|
||||
link, err := generateLink(handler, fileData[:], fileExtension)
|
||||
link, err := generateLink(handler, fileData[:], header.Filename)
|
||||
if err != nil {
|
||||
http.Error(w, "could not save file: "+err.Error(), http.StatusInternalServerError)
|
||||
log.Println(" could not save file: " + err.Error())
|
||||
|
@ -80,13 +79,15 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
|
|||
// Generates a valid link to uploadFile with the specified file extension.
|
||||
// Returns the link or an error in case of failure.
|
||||
// Does not close the passed file pointer.
|
||||
func generateLink(handler *uploadHandler, fileData []byte, fileExtension string) (string, error) {
|
||||
func generateLink(handler *uploadHandler, fileData []byte, fileName string) (string, error) {
|
||||
ext := extdetect.BuildFileExtension(fileData, fileName)
|
||||
|
||||
// Find an unused file name
|
||||
var fullFileName string
|
||||
var savePath string
|
||||
for {
|
||||
fileStem := createRandomFileName(handler.config.LinkLength)
|
||||
fullFileName = fileStem + fileExtension
|
||||
fullFileName = fileStem + ext
|
||||
savePath = handler.config.FileDir + fullFileName
|
||||
|
||||
if !fileExists(savePath) {
|
||||
|
@ -125,14 +126,3 @@ func createRandomFileName(length int) string {
|
|||
|
||||
return string(chars)
|
||||
}
|
||||
|
||||
func splitFileName(name string) (string, string) {
|
||||
extIndex := strings.LastIndex(name, ".")
|
||||
|
||||
if extIndex == -1 {
|
||||
// No dot at all
|
||||
return name, ""
|
||||
}
|
||||
|
||||
return name[:extIndex], name[extIndex:]
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue