mirror of
https://github.com/lyx0/yaf.git
synced 2024-11-13 19:49:53 +01:00
Merge pull request #16 from leon-richardt/feat/better-file-extension-detection
feat: improve file extension detection
This commit is contained in:
commit
4d5f23377a
5 changed files with 138 additions and 18 deletions
56
extdetect/extension_detection.go
Normal file
56
extdetect/extension_detection.go
Normal file
|
@ -0,0 +1,56 @@
|
|||
package extdetect
|
||||
|
||||
import (
|
||||
"strings"
|
||||
|
||||
"github.com/gabriel-vasile/mimetype"
|
||||
)
|
||||
|
||||
var knownCombinations []string = []string{
|
||||
".tar.gz",
|
||||
".tar.xz",
|
||||
}
|
||||
|
||||
func BuildFileExtension(fileData []byte, name string) string {
|
||||
// First, check whether any file ending has been specified manually
|
||||
curExtIdx := strings.LastIndex(name, ".")
|
||||
|
||||
if curExtIdx == -1 {
|
||||
// No file ending specified in name, use MIME type detection
|
||||
return mimetype.Detect(fileData).Extension()
|
||||
}
|
||||
|
||||
// Otherwise, some file extension was manually specified and we will use that. First, check
|
||||
// whether this is an "easy" case of file extension, i.e., a name where there is only one "."
|
||||
// character and we can treat what's after it as the file extension.
|
||||
nextExtIdx := strings.LastIndex(name[:curExtIdx], ".")
|
||||
if nextExtIdx == -1 {
|
||||
// Just one ".", so an easy case
|
||||
return name[curExtIdx:]
|
||||
}
|
||||
|
||||
// There are multiple "." in the name. Look for known extension combinations (e.g., ".tar.gz",
|
||||
// ".tar.xz") and use that if found.
|
||||
// XXX: This could be done more efficiently (at least in theory) with some suffix tree structure
|
||||
// but for the few known combinations we have, it would likely be slower on real-world
|
||||
// computer architectures.
|
||||
stillBuilding := true
|
||||
for stillBuilding {
|
||||
stillBuilding = false
|
||||
for _, comb := range knownCombinations {
|
||||
if !strings.HasPrefix(comb, name[nextExtIdx:]) {
|
||||
continue
|
||||
}
|
||||
|
||||
stillBuilding = true
|
||||
curExtIdx = nextExtIdx
|
||||
nextExtIdx = strings.LastIndex(name[:curExtIdx], ".")
|
||||
if nextExtIdx == -1 {
|
||||
// No more extension candidates -> return current state of the builder
|
||||
return name[curExtIdx:]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return name[curExtIdx:]
|
||||
}
|
65
extdetect/extension_detection_test.go
Normal file
65
extdetect/extension_detection_test.go
Normal file
|
@ -0,0 +1,65 @@
|
|||
package extdetect
|
||||
|
||||
import (
|
||||
"os"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestDetectedExtensions(t *testing.T) {
|
||||
const fixturePath = "../fixtures/gps.png"
|
||||
|
||||
type tType struct {
|
||||
name string
|
||||
fileData []byte
|
||||
expectedOutput string
|
||||
}
|
||||
|
||||
pngFile, err := os.ReadFile(fixturePath)
|
||||
if err != nil {
|
||||
t.Fatalf("Could not open \"%s\" which is required for the test. Error: %s", fixturePath,
|
||||
err)
|
||||
}
|
||||
|
||||
tests := []tType{
|
||||
{ // extension is detected correctly from file when not specified explicitly
|
||||
name: "foo",
|
||||
fileData: pngFile,
|
||||
expectedOutput: ".png",
|
||||
},
|
||||
{
|
||||
name: "foo.txt",
|
||||
expectedOutput: ".txt",
|
||||
},
|
||||
{ // simple extension that's the last part of a known combination is detected correctly
|
||||
name: "foo.gz",
|
||||
expectedOutput: ".gz",
|
||||
},
|
||||
{ // simple extension that's the first part of a known combination is detected correctly
|
||||
name: "foo.tar",
|
||||
expectedOutput: ".tar",
|
||||
},
|
||||
{ // combined extension is detected correctly
|
||||
name: "foo.tar.gz",
|
||||
expectedOutput: ".tar.gz",
|
||||
},
|
||||
{
|
||||
name: "foo.tar.xz",
|
||||
expectedOutput: ".tar.xz",
|
||||
},
|
||||
{ // combined extension that is NOT known only returns the last part
|
||||
name: "foo.jpg.zip",
|
||||
expectedOutput: ".zip",
|
||||
},
|
||||
{ // combined extension is detected correctly even with many "." in the name
|
||||
name: "foo.jpg.zip.tar.gz",
|
||||
expectedOutput: ".tar.gz",
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
output := BuildFileExtension(test.fileData, test.name)
|
||||
if output != test.expectedOutput {
|
||||
t.Fatalf("got output '%s', expected '%s'", output, test.expectedOutput)
|
||||
}
|
||||
}
|
||||
}
|
3
go.mod
3
go.mod
|
@ -7,6 +7,7 @@ require (
|
|||
github.com/dsoprea/go-jpeg-image-structure/v2 v2.0.0-20210512043942-b434301c6836
|
||||
github.com/dsoprea/go-logging v0.0.0-20200710184922-b02d349568dd
|
||||
github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d
|
||||
github.com/gabriel-vasile/mimetype v1.4.1
|
||||
github.com/go-errors/errors v1.1.1
|
||||
golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e
|
||||
)
|
||||
|
@ -17,6 +18,6 @@ require (
|
|||
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e // indirect
|
||||
github.com/go-xmlfmt/xmlfmt v0.0.0-20191208150333-d5b6f63a941b // indirect
|
||||
github.com/golang/geo v0.0.0-20200319012246-673a6f80352d // indirect
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381 // indirect
|
||||
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e // indirect
|
||||
gopkg.in/yaml.v2 v2.3.0 // indirect
|
||||
)
|
||||
|
|
10
go.sum
10
go.sum
|
@ -18,6 +18,8 @@ github.com/dsoprea/go-png-image-structure/v2 v2.0.0-20210512210324-29b889a6093d/
|
|||
github.com/dsoprea/go-utility v0.0.0-20200711062821-fab8125e9bdf/go.mod h1:95+K3z2L0mqsVYd6yveIv1lmtT3tcQQ3dVakPySffW8=
|
||||
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e h1:IxIbA7VbCNrwumIYjDoMOdf4KOSkMC6NJE4s8oRbE7E=
|
||||
github.com/dsoprea/go-utility/v2 v2.0.0-20200717064901-2fccff4aa15e/go.mod h1:uAzdkPTub5Y9yQwXe8W4m2XuP0tK4a9Q/dantD0+uaU=
|
||||
github.com/gabriel-vasile/mimetype v1.4.1 h1:TRWk7se+TOjCYgRth7+1/OYLNiRNIotknkFtf/dnN7Q=
|
||||
github.com/gabriel-vasile/mimetype v1.4.1/go.mod h1:05Vi0w3Y9c/lNvJOdmIwvrrAhX3rYhfQQCaf9VJcv7M=
|
||||
github.com/go-errors/errors v1.0.1/go.mod h1:f4zRHt4oKfwPJE5k8C9vpYG+aDHdBFUsgrm6/TyX73Q=
|
||||
github.com/go-errors/errors v1.0.2/go.mod h1:psDX2osz5VnTOnFWbDeWwS7yejl+uV3FEWEp4lssFEs=
|
||||
github.com/go-errors/errors v1.1.1 h1:ljK/pL5ltg3qoN+OtN6yCv9HWSfMwxSx90GJCZQxYNg=
|
||||
|
@ -38,12 +40,18 @@ golang.org/x/net v0.0.0-20200320220750-118fecf932d8/go.mod h1:z5CRVTTTmAJ677TzLL
|
|||
golang.org/x/net v0.0.0-20200501053045-e0ff5e5a1de5/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200513185701-a91f0712d120/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200520182314-0ba52f642ac2/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381 h1:VXak5I6aEWmAXeQjA+QSZzlgNrpq9mjcfDemuexIKsU=
|
||||
golang.org/x/net v0.0.0-20200707034311-ab3426394381/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
|
||||
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e h1:TsQ7F31D3bUCLeqPT0u+yjp1guoArKaNKmCr22PYgTQ=
|
||||
golang.org/x/net v0.0.0-20220624214902-1bab6f366d9e/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.7/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
|
|
|
@ -7,9 +7,9 @@ import (
|
|||
"math/rand"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
|
||||
"github.com/leon-richardt/jaf/exifscrubber"
|
||||
"github.com/leon-richardt/jaf/extdetect"
|
||||
)
|
||||
|
||||
type uploadHandler struct {
|
||||
|
@ -65,8 +65,7 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
|
|||
}
|
||||
}
|
||||
|
||||
_, fileExtension := splitFileName(header.Filename)
|
||||
link, err := generateLink(handler, fileData[:], fileExtension)
|
||||
link, err := generateLink(handler, fileData[:], header.Filename)
|
||||
if err != nil {
|
||||
http.Error(w, "could not save file: "+err.Error(), http.StatusInternalServerError)
|
||||
log.Println(" could not save file: " + err.Error())
|
||||
|
@ -80,13 +79,15 @@ func (handler *uploadHandler) ServeHTTP(w http.ResponseWriter, r *http.Request)
|
|||
// Generates a valid link to uploadFile with the specified file extension.
|
||||
// Returns the link or an error in case of failure.
|
||||
// Does not close the passed file pointer.
|
||||
func generateLink(handler *uploadHandler, fileData []byte, fileExtension string) (string, error) {
|
||||
func generateLink(handler *uploadHandler, fileData []byte, fileName string) (string, error) {
|
||||
ext := extdetect.BuildFileExtension(fileData, fileName)
|
||||
|
||||
// Find an unused file name
|
||||
var fullFileName string
|
||||
var savePath string
|
||||
for {
|
||||
fileStem := createRandomFileName(handler.config.LinkLength)
|
||||
fullFileName = fileStem + fileExtension
|
||||
fullFileName = fileStem + ext
|
||||
savePath = handler.config.FileDir + fullFileName
|
||||
|
||||
if !fileExists(savePath) {
|
||||
|
@ -125,14 +126,3 @@ func createRandomFileName(length int) string {
|
|||
|
||||
return string(chars)
|
||||
}
|
||||
|
||||
func splitFileName(name string) (string, string) {
|
||||
extIndex := strings.LastIndex(name, ".")
|
||||
|
||||
if extIndex == -1 {
|
||||
// No dot at all
|
||||
return name, ""
|
||||
}
|
||||
|
||||
return name[:extIndex], name[extIndex:]
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue