2023-03-12 15:00:57 +00:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-02-28 14:17:18 +00:00
2021-03-09 16:03:40 +00:00
package media
2021-04-01 18:46:45 +00:00
import (
2021-05-17 17:06:58 +00:00
"context"
2023-02-13 18:40:48 +00:00
"errors"
2022-03-07 10:08:26 +00:00
"fmt"
2023-02-13 18:40:48 +00:00
"time"
2021-04-01 18:46:45 +00:00
2023-02-13 18:40:48 +00:00
"codeberg.org/gruf/go-runners"
"codeberg.org/gruf/go-sched"
"codeberg.org/gruf/go-store/v2/storage"
"github.com/superseriousbusiness/gotosocial/internal/config"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
"github.com/superseriousbusiness/gotosocial/internal/log"
"github.com/superseriousbusiness/gotosocial/internal/state"
"github.com/superseriousbusiness/gotosocial/internal/uris"
2021-04-01 18:46:45 +00:00
)
2023-02-11 11:48:38 +00:00
var SupportedMIMETypes = [ ] string {
mimeImageJpeg ,
mimeImageGif ,
mimeImagePng ,
mimeImageWebp ,
mimeVideoMp4 ,
}
2022-05-15 14:45:04 +00:00
2023-02-11 11:48:38 +00:00
var SupportedEmojiMIMETypes = [ ] string {
mimeImageGif ,
mimeImagePng ,
}
2022-06-30 10:22:10 +00:00
2021-12-28 15:36:00 +00:00
// Manager provides an interface for managing media: parsing, storing, and retrieving media objects like photos, videos, and gifs.
type Manager interface {
2022-12-10 21:43:11 +00:00
/ *
PROCESSING FUNCTIONS
* /
2023-02-13 18:40:48 +00:00
// PreProcessMedia begins the process of decoding and storing the given data as an attachment.
2022-02-22 12:50:33 +00:00
// It will return a pointer to a ProcessingMedia struct upon which further actions can be performed, such as getting
2022-01-10 17:36:09 +00:00
// the finished media, thumbnail, attachment, etc.
2022-01-08 16:17:01 +00:00
//
2022-02-22 12:50:33 +00:00
// data should be a function that the media manager can call to return a reader containing the media data.
//
// postData will be called after data has been called; it can be used to clean up any remaining resources.
// The provided function can be nil, in which case it will not be executed.
2022-01-11 16:49:14 +00:00
//
2022-01-08 16:17:01 +00:00
// accountID should be the account that the media belongs to.
//
2022-01-10 17:36:09 +00:00
// ai is optional and can be nil. Any additional information about the attachment provided will be put in the database.
2023-02-13 18:40:48 +00:00
//
// Note: unlike ProcessMedia, this will NOT queue the media to be asynchronously processed.
PreProcessMedia ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , accountID string , ai * AdditionalMediaInfo ) ( * ProcessingMedia , error )
// PreProcessMediaRecache refetches, reprocesses, and recaches an existing attachment that has been uncached via pruneRemote.
//
// Note: unlike ProcessMedia, this will NOT queue the media to be asychronously processed.
PreProcessMediaRecache ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , attachmentID string ) ( * ProcessingMedia , error )
// ProcessMedia will call PreProcessMedia, followed by queuing the media to be processing in the media worker queue.
2022-02-22 12:50:33 +00:00
ProcessMedia ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , accountID string , ai * AdditionalMediaInfo ) ( * ProcessingMedia , error )
2023-02-13 18:40:48 +00:00
// PreProcessEmoji begins the process of decoding and storing the given data as an emoji.
2022-02-22 12:50:33 +00:00
// It will return a pointer to a ProcessingEmoji struct upon which further actions can be performed, such as getting
// the finished media, thumbnail, attachment, etc.
//
// data should be a function that the media manager can call to return a reader containing the emoji data.
//
// postData will be called after data has been called; it can be used to clean up any remaining resources.
// The provided function can be nil, in which case it will not be executed.
//
// shortcode should be the emoji shortcode without the ':'s around it.
//
// id is the database ID that should be used to store the emoji.
//
// uri is the ActivityPub URI/ID of the emoji.
//
// ai is optional and can be nil. Any additional information about the emoji provided will be put in the database.
2022-10-13 13:16:24 +00:00
//
2023-02-13 18:40:48 +00:00
// Note: unlike ProcessEmoji, this will NOT queue the emoji to be asynchronously processed.
PreProcessEmoji ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , shortcode string , id string , uri string , ai * AdditionalEmojiInfo , refresh bool ) ( * ProcessingEmoji , error )
// ProcessEmoji will call PreProcessEmoji, followed by queuing the emoji to be processing in the emoji worker queue.
2022-10-13 13:16:24 +00:00
ProcessEmoji ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , shortcode string , id string , uri string , ai * AdditionalEmojiInfo , refresh bool ) ( * ProcessingEmoji , error )
2022-05-15 14:45:04 +00:00
2022-12-10 21:43:11 +00:00
/ *
2023-02-11 11:48:38 +00:00
PRUNING / UNCACHING FUNCTIONS
2022-12-10 21:43:11 +00:00
* /
2023-02-11 11:48:38 +00:00
// PruneAll runs all of the below pruning/uncacheing functions, and then cleans up any resulting
// empty directories from the storage driver. It can be called as a shortcut for calling the below
// pruning functions one by one.
2022-05-15 14:45:04 +00:00
//
2023-02-11 11:48:38 +00:00
// If blocking is true, then any errors encountered during the prune will be combined + returned to
// the caller. If blocking is false, the prune is run in the background and errors are just logged
// instead.
PruneAll ( ctx context . Context , mediaCacheRemoteDays int , blocking bool ) error
// UncacheRemote uncaches all remote media attachments older than the given amount of days.
//
// In this context, uncacheing means deleting media files from storage and marking the attachment
// as cached=false in the database.
//
// If 'dry' is true, then only a dry run will be performed: nothing will actually be changed.
//
// The returned int is the amount of media that was/would be uncached by this function.
UncacheRemote ( ctx context . Context , olderThanDays int , dry bool ) ( int , error )
// PruneUnusedRemote prunes unused/out of date headers and avatars cached on this instance.
2022-05-15 14:45:04 +00:00
//
// The returned int is the amount of media that was pruned by this function.
2023-02-11 11:48:38 +00:00
PruneUnusedRemote ( ctx context . Context , dry bool ) ( int , error )
// PruneUnusedLocal prunes unused media attachments that were uploaded by
2022-06-30 10:22:10 +00:00
// a user on this instance, but never actually attached to a status, or attached but
// later detached.
//
// The returned int is the amount of media that was pruned by this function.
2023-02-11 11:48:38 +00:00
PruneUnusedLocal ( ctx context . Context , dry bool ) ( int , error )
2022-11-25 17:23:42 +00:00
// PruneOrphaned prunes files that exist in storage but which do not have a corresponding
// entry in the database.
//
// If dry is true, then nothing will be changed, only the amount that *would* be removed
// is returned to the caller.
PruneOrphaned ( ctx context . Context , dry bool ) ( int , error )
2022-05-15 14:45:04 +00:00
2022-12-10 21:43:11 +00:00
/ *
REFETCHING FUNCTIONS
Useful when data loss has occurred .
* /
// RefetchEmojis iterates through remote emojis (for the given domain, or all if domain is empty string).
//
// For each emoji, the manager will check whether both the full size and static images are present in storage.
// If not, the manager will refetch and reprocess full size and static images for the emoji.
//
// The provided DereferenceMedia function will be used when it's necessary to refetch something this way.
RefetchEmojis ( ctx context . Context , domain string , dereferenceMedia DereferenceMedia ) ( int , error )
2021-04-01 18:46:45 +00:00
}
2021-12-28 15:36:00 +00:00
type manager struct {
2023-02-13 18:40:48 +00:00
state * state . State
2021-04-01 18:46:45 +00:00
}
2022-01-10 17:36:09 +00:00
// NewManager returns a media manager with the given db and underlying storage.
//
// A worker pool will also be initialized for the manager, to ensure that only
2022-05-07 15:36:01 +00:00
// a limited number of media will be processed in parallel. The numbers of workers
// is determined from the $GOMAXPROCS environment variable (usually no. CPU cores).
2022-05-15 09:16:43 +00:00
// See internal/concurrency.NewWorkerPool() documentation for further information.
2023-02-13 18:40:48 +00:00
func NewManager ( state * state . State ) Manager {
m := & manager { state : state }
scheduleCleanupJobs ( m )
return m
}
func ( m * manager ) PreProcessMedia ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , accountID string , ai * AdditionalMediaInfo ) ( * ProcessingMedia , error ) {
id , err := id . NewRandomULID ( )
if err != nil {
return nil , err
2022-01-10 17:36:09 +00:00
}
2023-02-13 18:40:48 +00:00
avatar := false
header := false
cached := false
now := time . Now ( )
// populate initial fields on the media attachment -- some of these will be overwritten as we proceed
attachment := & gtsmodel . MediaAttachment {
ID : id ,
CreatedAt : now ,
UpdatedAt : now ,
StatusID : "" ,
URL : "" , // we don't know yet because it depends on the uncalled DataFunc
RemoteURL : "" ,
Type : gtsmodel . FileTypeUnknown , // we don't know yet because it depends on the uncalled DataFunc
FileMeta : gtsmodel . FileMeta { } ,
AccountID : accountID ,
Description : "" ,
ScheduledStatusID : "" ,
Blurhash : "" ,
Processing : gtsmodel . ProcessingStatusReceived ,
File : gtsmodel . File { UpdatedAt : now } ,
Thumbnail : gtsmodel . Thumbnail { UpdatedAt : now } ,
Avatar : & avatar ,
Header : & header ,
Cached : & cached ,
}
// check if we have additional info to add to the attachment,
// and overwrite some of the attachment fields if so
if ai != nil {
if ai . CreatedAt != nil {
attachment . CreatedAt = * ai . CreatedAt
2022-05-07 15:36:01 +00:00
}
2023-02-13 18:40:48 +00:00
if ai . StatusID != nil {
attachment . StatusID = * ai . StatusID
2022-05-07 15:36:01 +00:00
}
2023-02-13 18:40:48 +00:00
if ai . RemoteURL != nil {
attachment . RemoteURL = * ai . RemoteURL
}
if ai . Description != nil {
attachment . Description = * ai . Description
}
if ai . ScheduledStatusID != nil {
attachment . ScheduledStatusID = * ai . ScheduledStatusID
}
if ai . Blurhash != nil {
attachment . Blurhash = * ai . Blurhash
}
if ai . Avatar != nil {
attachment . Avatar = ai . Avatar
}
if ai . Header != nil {
attachment . Header = ai . Header
}
if ai . FocusX != nil {
attachment . FileMeta . Focus . X = * ai . FocusX
}
if ai . FocusY != nil {
attachment . FileMeta . Focus . Y = * ai . FocusY
}
2022-05-07 15:36:01 +00:00
}
2023-02-13 18:40:48 +00:00
processingMedia := & ProcessingMedia {
media : attachment ,
dataFn : data ,
postFn : postData ,
mgr : m ,
2021-04-01 18:46:45 +00:00
}
2022-01-03 16:37:38 +00:00
2023-02-13 18:40:48 +00:00
return processingMedia , nil
}
func ( m * manager ) PreProcessMediaRecache ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , attachmentID string ) ( * ProcessingMedia , error ) {
// get the existing attachment from database.
attachment , err := m . state . DB . GetAttachmentByID ( ctx , attachmentID )
if err != nil {
2022-05-15 14:45:04 +00:00
return nil , err
2022-03-07 10:08:26 +00:00
}
2023-02-13 18:40:48 +00:00
processingMedia := & ProcessingMedia {
media : attachment ,
dataFn : data ,
postFn : postData ,
recache : true , // indicate it's a recache
mgr : m ,
}
return processingMedia , nil
2021-04-01 18:46:45 +00:00
}
2022-02-22 12:50:33 +00:00
func ( m * manager ) ProcessMedia ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , accountID string , ai * AdditionalMediaInfo ) ( * ProcessingMedia , error ) {
2023-02-13 18:40:48 +00:00
// Create a new processing media object for this media request.
media , err := m . PreProcessMedia ( ctx , data , postData , accountID , ai )
2021-12-28 15:36:00 +00:00
if err != nil {
return nil , err
}
2023-02-13 18:40:48 +00:00
// Attempt to add this media processing item to the worker queue.
_ = m . state . Workers . Media . MustEnqueueCtx ( ctx , media . Process )
return media , nil
2022-01-11 16:49:14 +00:00
}
2022-01-03 16:37:38 +00:00
2023-02-13 18:40:48 +00:00
func ( m * manager ) PreProcessEmoji ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , shortcode string , emojiID string , uri string , ai * AdditionalEmojiInfo , refresh bool ) ( * ProcessingEmoji , error ) {
instanceAccount , err := m . state . DB . GetInstanceAccount ( ctx , "" )
2022-01-11 16:49:14 +00:00
if err != nil {
2023-02-13 18:40:48 +00:00
return nil , fmt . Errorf ( "preProcessEmoji: error fetching this instance account from the db: %s" , err )
}
var (
newPathID string
emoji * gtsmodel . Emoji
now = time . Now ( )
)
if refresh {
emoji , err = m . state . DB . GetEmojiByID ( ctx , emojiID )
if err != nil {
return nil , fmt . Errorf ( "preProcessEmoji: error fetching emoji to refresh from the db: %s" , err )
}
// if this is a refresh, we will end up with new images
// stored for this emoji, so we can use the postData function
// to perform clean up of the old images from storage
originalPostData := postData
originalImagePath := emoji . ImagePath
originalImageStaticPath := emoji . ImageStaticPath
postData = func ( innerCtx context . Context ) error {
// trigger the original postData function if it was provided
if originalPostData != nil {
if err := originalPostData ( innerCtx ) ; err != nil {
return err
}
}
2023-02-17 19:05:43 +00:00
l := log . WithContext ( ctx ) .
WithField ( "shortcode@domain" , emoji . Shortcode + "@" + emoji . Domain )
2023-02-13 18:40:48 +00:00
l . Debug ( "postData: cleaning up old emoji files for refreshed emoji" )
if err := m . state . Storage . Delete ( innerCtx , originalImagePath ) ; err != nil && ! errors . Is ( err , storage . ErrNotFound ) {
l . Errorf ( "postData: error cleaning up old emoji image at %s for refreshed emoji: %s" , originalImagePath , err )
}
if err := m . state . Storage . Delete ( innerCtx , originalImageStaticPath ) ; err != nil && ! errors . Is ( err , storage . ErrNotFound ) {
l . Errorf ( "postData: error cleaning up old emoji static image at %s for refreshed emoji: %s" , originalImageStaticPath , err )
}
return nil
}
newPathID , err = id . NewRandomULID ( )
if err != nil {
return nil , fmt . Errorf ( "preProcessEmoji: error generating alternateID for emoji refresh: %s" , err )
}
// store + serve static image at new path ID
emoji . ImageStaticURL = uris . GenerateURIForAttachment ( instanceAccount . ID , string ( TypeEmoji ) , string ( SizeStatic ) , newPathID , mimePng )
emoji . ImageStaticPath = fmt . Sprintf ( "%s/%s/%s/%s.%s" , instanceAccount . ID , TypeEmoji , SizeStatic , newPathID , mimePng )
emoji . Shortcode = shortcode
emoji . URI = uri
} else {
disabled := false
visibleInPicker := true
// populate initial fields on the emoji -- some of these will be overwritten as we proceed
emoji = & gtsmodel . Emoji {
ID : emojiID ,
CreatedAt : now ,
Shortcode : shortcode ,
Domain : "" , // assume our own domain unless told otherwise
ImageRemoteURL : "" ,
ImageStaticRemoteURL : "" ,
ImageURL : "" , // we don't know yet
ImageStaticURL : uris . GenerateURIForAttachment ( instanceAccount . ID , string ( TypeEmoji ) , string ( SizeStatic ) , emojiID , mimePng ) , // all static emojis are encoded as png
ImagePath : "" , // we don't know yet
ImageStaticPath : fmt . Sprintf ( "%s/%s/%s/%s.%s" , instanceAccount . ID , TypeEmoji , SizeStatic , emojiID , mimePng ) , // all static emojis are encoded as png
ImageContentType : "" , // we don't know yet
ImageStaticContentType : mimeImagePng , // all static emojis are encoded as png
ImageFileSize : 0 ,
ImageStaticFileSize : 0 ,
Disabled : & disabled ,
URI : uri ,
VisibleInPicker : & visibleInPicker ,
CategoryID : "" ,
}
}
emoji . ImageUpdatedAt = now
emoji . UpdatedAt = now
// check if we have additional info to add to the emoji,
// and overwrite some of the emoji fields if so
if ai != nil {
if ai . CreatedAt != nil {
emoji . CreatedAt = * ai . CreatedAt
}
if ai . Domain != nil {
emoji . Domain = * ai . Domain
}
if ai . ImageRemoteURL != nil {
emoji . ImageRemoteURL = * ai . ImageRemoteURL
}
if ai . ImageStaticRemoteURL != nil {
emoji . ImageStaticRemoteURL = * ai . ImageStaticRemoteURL
}
if ai . Disabled != nil {
emoji . Disabled = ai . Disabled
}
if ai . VisibleInPicker != nil {
emoji . VisibleInPicker = ai . VisibleInPicker
}
if ai . CategoryID != nil {
emoji . CategoryID = * ai . CategoryID
}
2021-05-21 13:48:26 +00:00
}
2023-02-13 18:40:48 +00:00
processingEmoji := & ProcessingEmoji {
instAccID : instanceAccount . ID ,
emoji : emoji ,
refresh : refresh ,
newPathID : newPathID ,
dataFn : data ,
postFn : postData ,
mgr : m ,
}
2022-01-11 16:49:14 +00:00
return processingEmoji , nil
2022-01-08 16:17:01 +00:00
}
2023-02-13 18:40:48 +00:00
func ( m * manager ) ProcessEmoji ( ctx context . Context , data DataFunc , postData PostDataCallbackFunc , shortcode string , id string , uri string , ai * AdditionalEmojiInfo , refresh bool ) ( * ProcessingEmoji , error ) {
// Create a new processing emoji object for this emoji request.
emoji , err := m . PreProcessEmoji ( ctx , data , postData , shortcode , id , uri , ai , refresh )
2022-03-07 10:08:26 +00:00
if err != nil {
return nil , err
}
2023-02-13 18:40:48 +00:00
// Attempt to add this emoji processing item to the worker queue.
_ = m . state . Workers . Media . MustEnqueueCtx ( ctx , emoji . Process )
return emoji , nil
2022-03-07 10:08:26 +00:00
}
2023-02-13 18:40:48 +00:00
func scheduleCleanupJobs ( m * manager ) {
const day = time . Hour * 24
2022-01-09 17:41:22 +00:00
2023-02-13 18:40:48 +00:00
// Calculate closest midnight.
now := time . Now ( )
midnight := now . Round ( day )
2022-03-07 10:08:26 +00:00
2023-02-13 18:40:48 +00:00
if midnight . Before ( now ) {
// since <= 11:59am rounds down.
midnight = midnight . Add ( day )
2022-03-07 10:08:26 +00:00
}
2022-05-15 14:45:04 +00:00
2023-02-13 18:40:48 +00:00
// Get ctx associated with scheduler run state.
done := m . state . Workers . Scheduler . Done ( )
doneCtx := runners . CancelCtx ( done )
// TODO: we'll need to do some thinking to make these
// jobs restartable if we want to implement reloads in
// the future that make call to Workers.Stop() -> Workers.Start().
// Schedule the PruneAll task to execute every day at midnight.
m . state . Workers . Scheduler . Schedule ( sched . NewJob ( func ( now time . Time ) {
err := m . PruneAll ( doneCtx , config . GetMediaRemoteCacheDays ( ) , true )
if err != nil {
2023-02-17 11:02:29 +00:00
log . Errorf ( nil , "error during prune: %v" , err )
2023-02-13 18:40:48 +00:00
}
2023-02-17 11:02:29 +00:00
log . Infof ( nil , "finished pruning all in %s" , time . Since ( now ) )
2023-02-13 18:40:48 +00:00
} ) . EveryAt ( midnight , day ) )
2022-01-08 12:45:42 +00:00
}