mirror-yaf/exifscrubber/exifscrubber.go

271 lines
6.4 KiB
Go
Raw Normal View History

package exifscrubber
import (
"bytes"
"errors"
"fmt"
exif "github.com/dsoprea/go-exif/v3"
exifcommon "github.com/dsoprea/go-exif/v3/common"
jis "github.com/dsoprea/go-jpeg-image-structure/v2"
// This is only needed for the log.Is() function to test error types. I would normally just
// reimplement this function privately but it is pulled in as an indirect dependency anyway.
exiflog "github.com/dsoprea/go-logging"
pis "github.com/dsoprea/go-png-image-structure/v2"
)
var ErrUnknownFileType = errors.New("can't scrub EXIF for this file type")
type ExifScrubber struct {
includedTagIds []uint16
includedTagPaths []string
}
func NewExifScrubber(includedTagIds []uint16, includedTagPaths []string) ExifScrubber {
return ExifScrubber{
includedTagIds: includedTagIds,
includedTagPaths: includedTagPaths,
}
}
func (scrubber *ExifScrubber) ScrubExif(fileData []byte) ([]byte, error) {
// Try scrubbing using JPEG package
jpegParser := jis.NewJpegMediaParser()
if jpegParser.LooksLikeFormat(fileData) {
intfc, err := jpegParser.ParseBytes(fileData)
if err != nil {
return nil, err
}
segmentList := intfc.(*jis.SegmentList)
rootIfd, _, err := segmentList.Exif()
if err != nil {
if exiflog.Is(err, exif.ErrNoExif) {
// Incoming data contained no EXIF in the first place so we can return the original
return fileData, nil
}
return nil, err
}
filteredIb, err := scrubber.filteringIfdBuilder(rootIfd)
if err != nil {
return nil, err
}
segmentList.SetExif(filteredIb)
b := new(bytes.Buffer)
err = segmentList.Write(b)
if err != nil {
return nil, err
}
return b.Bytes(), nil
}
// Try scrubbing using PNG package
pngParser := pis.NewPngMediaParser()
if pngParser.LooksLikeFormat(fileData) {
intfc, err := pngParser.ParseBytes(fileData)
if err != nil {
return nil, err
}
chunks := intfc.(*pis.ChunkSlice)
rootIfd, _, err := chunks.Exif()
if err != nil {
if exiflog.Is(err, exif.ErrNoExif) {
// Incoming data contained no EXIF in the first place so we can return the original
return fileData, nil
}
return nil, err
}
filteredIb, err := scrubber.filteringIfdBuilder(rootIfd)
if err != nil {
return nil, err
}
chunks.SetExif(filteredIb)
b := new(bytes.Buffer)
err = chunks.WriteTo(b)
if err != nil {
return nil, err
}
return b.Bytes(), nil
}
// Don't know how to handle other file formats, so we let the caller decide how to continue
return nil, ErrUnknownFileType
}
// Check whether the tag represented by `tag` is included in the path or tag ID list
func (scrubber *ExifScrubber) isTagAllowed(tag *exif.IfdTagEntry) bool {
// Check via IDs first (faster than string comparisons)
for _, includedId := range scrubber.includedTagIds {
if includedId == tag.TagId() {
return true
}
}
// If no IDs matched, also check IFD tag paths for inclusion
tagPath := fmt.Sprintf("%s/%s", tag.IfdPath(), tag.TagName())
for _, includedPath := range scrubber.includedTagPaths {
if includedPath == tagPath {
return true
}
}
return false
}
// This method follows the implementation of exif.NewIfdBuilderFromExistingChain()
func (scrubber *ExifScrubber) filteringIfdBuilder(rootIfd *exif.Ifd) (
firstIb *exif.IfdBuilder,
err error,
) {
var lastIb *exif.IfdBuilder
i := 0
for thisExistingIfd := rootIfd; thisExistingIfd != nil; thisExistingIfd = thisExistingIfd.NextIfd() {
// This only works when no non-standard mappings are used
ifdMapping, err := exifcommon.NewIfdMappingWithStandard()
if err != nil {
return nil, err
}
// This only works when no non-standard tags are used
tagIndex := exif.NewTagIndex()
err = exif.LoadStandardTags(tagIndex)
if err != nil {
return nil, err
}
newIb := exif.NewIfdBuilder(
ifdMapping,
tagIndex,
thisExistingIfd.IfdIdentity(),
thisExistingIfd.ByteOrder(),
)
if firstIb == nil {
firstIb = newIb
} else {
lastIb.SetNextIb(newIb)
}
err = scrubber.filteredAddTagsFromExisting(newIb, thisExistingIfd)
if err != nil {
return nil, err
}
lastIb = newIb
i++
}
return firstIb, nil
}
// This method follows the implementation of exif.IfdBuilder.AddTagsFromExisting()
func (scrubber *ExifScrubber) filteredAddTagsFromExisting(
ib *exif.IfdBuilder,
ifd *exif.Ifd,
) (err error) {
for i, ite := range ifd.Entries() {
if ite.IsThumbnailOffset() == true || ite.IsThumbnailSize() {
// These will be added on-the-fly when we encode.
continue
}
var bt *exif.BuilderTag
if ite.ChildIfdPath() != "" {
// If we want to add an IFD tag, we'll have to build it first and
// *then* add it via a different method.
// Figure out which of the child-IFDs that are associated with
// this IFD represents this specific child IFD.
var childIfd *exif.Ifd
for _, thisChildIfd := range ifd.Children() {
if thisChildIfd.ParentTagIndex() != i {
continue
} else if thisChildIfd.IfdIdentity().TagId() != 0xffff &&
thisChildIfd.IfdIdentity().TagId() != ite.TagId() {
fmt.Printf(
"child-IFD tag is not correct: TAG-POSITION=(%d) ITE=%s CHILD-IFD=%s\n",
thisChildIfd.ParentTagIndex(),
ite,
thisChildIfd,
)
}
childIfd = thisChildIfd
break
}
if childIfd == nil {
childTagIds := make([]string, len(ifd.Children()))
for j, childIfd := range ifd.Children() {
childTagIds[j] = fmt.Sprintf(
"0x%04x (parent tag-position %d)",
childIfd.IfdIdentity().TagId(),
childIfd.ParentTagIndex(),
)
}
fmt.Printf(
"could not find child IFD for child ITE: IFD-PATH=[%s] TAG-ID=(0x%04x) "+
"CURRENT-TAG-POSITION=(%d) CHILDREN=%v\n",
ite.IfdPath(),
ite.TagId(),
i,
childTagIds,
)
}
childIb, err := scrubber.filteringIfdBuilder(childIfd)
if err != nil {
return err
}
bt = ib.NewBuilderTagFromBuilder(childIb)
} else {
// Non-IFD tag.
isAllowed := scrubber.isTagAllowed(ite)
if !isAllowed {
continue
}
rawBytes, err := ite.GetRawBytes()
if err != nil {
return err
}
value := exif.NewIfdBuilderTagValueFromBytes(rawBytes)
bt = exif.NewBuilderTag(
ifd.IfdIdentity().UnindexedString(),
ite.TagId(),
ite.TagType(),
value,
ifd.ByteOrder(),
)
}
if bt.Value().IsBytes() {
err := ib.Add(bt)
if err != nil {
return err
}
} else if bt.Value().IsIb() {
err := ib.AddChildIb(bt.Value().Ib())
if err != nil {
return err
}
}
}
return nil
}