2023-03-12 15:00:57 +00:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-08-10 11:32:39 +00:00
package dereferencing
import (
"context"
"errors"
2023-05-12 09:15:54 +00:00
"io"
2021-08-10 11:32:39 +00:00
"net/url"
2023-10-25 14:04:53 +00:00
"slices"
2023-11-04 20:21:20 +00:00
"time"
2023-10-25 14:04:53 +00:00
2021-08-10 11:32:39 +00:00
"github.com/superseriousbusiness/gotosocial/internal/ap"
2022-11-29 09:24:55 +00:00
"github.com/superseriousbusiness/gotosocial/internal/config"
2022-09-12 11:03:23 +00:00
"github.com/superseriousbusiness/gotosocial/internal/db"
2023-05-31 08:39:54 +00:00
"github.com/superseriousbusiness/gotosocial/internal/gtscontext"
2023-05-28 12:08:35 +00:00
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
2021-08-10 11:32:39 +00:00
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
"github.com/superseriousbusiness/gotosocial/internal/id"
2022-07-19 08:47:55 +00:00
"github.com/superseriousbusiness/gotosocial/internal/log"
2022-01-09 17:41:22 +00:00
"github.com/superseriousbusiness/gotosocial/internal/media"
2023-03-01 17:52:44 +00:00
"github.com/superseriousbusiness/gotosocial/internal/transport"
2023-10-31 11:05:17 +00:00
"github.com/superseriousbusiness/gotosocial/internal/util"
2021-08-10 11:32:39 +00:00
)
2023-05-12 09:15:54 +00:00
// statusUpToDate returns whether the given status model is both updateable
// (i.e. remote status) and whether it needs an update based on `fetched_at`.
2023-12-15 14:24:39 +00:00
func statusUpToDate ( status * gtsmodel . Status , force bool ) bool {
2024-01-31 13:29:47 +00:00
if status . Local != nil && * status . Local {
2023-05-12 09:15:54 +00:00
// Can't update local statuses.
return true
2021-08-10 11:32:39 +00:00
}
2023-05-12 09:15:54 +00:00
2023-12-15 14:24:39 +00:00
// Default limit we allow
// statuses to be refreshed.
limit := 2 * time . Hour
if force {
// We specifically allow the force flag
// to force an early refresh (on a much
// smaller cooldown period).
limit = 5 * time . Minute
}
// If this status was updated recently (within limit), return as-is.
if next := status . FetchedAt . Add ( limit ) ; time . Now ( ) . Before ( next ) {
2023-05-12 09:15:54 +00:00
return true
2022-11-15 18:45:15 +00:00
}
2023-05-12 09:15:54 +00:00
return false
2021-08-10 11:32:39 +00:00
}
2023-11-04 20:21:20 +00:00
// GetStatusByURI will attempt to fetch a status by its URI, first checking the database. In the case of a newly-met remote model, or a remote model whose 'last_fetched' date
// is beyond a certain interval, the status will be dereferenced. In the case of dereferencing, some low-priority status information may be enqueued for asynchronous fetching,
// e.g. dereferencing the status thread. Param 'syncParent' = true indicates to fetch status ancestors synchronously. An ActivityPub object indicates the status was dereferenced.
2023-10-23 09:58:13 +00:00
func ( d * Dereferencer ) GetStatusByURI ( ctx context . Context , requestUser string , uri * url . URL ) ( * gtsmodel . Status , ap . Statusable , error ) {
2024-01-31 13:29:47 +00:00
// Fetch and dereference / update status if necessary.
2023-11-04 20:21:20 +00:00
status , statusable , isNew , err := d . getStatusByURI ( ctx ,
2023-05-12 09:15:54 +00:00
requestUser ,
uri ,
)
2024-01-31 13:29:47 +00:00
2023-05-12 09:15:54 +00:00
if err != nil {
2024-01-31 13:29:47 +00:00
if status == nil {
// err with no existing
// status for fallback.
return nil , nil , err
}
log . Errorf ( ctx , "error updating status %s: %v" , uri , err )
} else if statusable != nil {
2023-05-12 09:15:54 +00:00
2023-11-04 20:21:20 +00:00
// Deref parents + children.
d . dereferenceThread ( ctx ,
requestUser ,
uri ,
status ,
statusable ,
isNew ,
)
2023-05-12 09:15:54 +00:00
}
2023-11-04 20:21:20 +00:00
return status , statusable , nil
2023-05-12 09:15:54 +00:00
}
2024-01-31 13:29:47 +00:00
// getStatusByURI is a package internal form of .GetStatusByURI() that doesn't dereference thread on update, and may return an existing status with error on failed re-fetch.
2023-11-04 20:21:20 +00:00
func ( d * Dereferencer ) getStatusByURI ( ctx context . Context , requestUser string , uri * url . URL ) ( * gtsmodel . Status , ap . Statusable , bool , error ) {
2023-05-12 09:15:54 +00:00
var (
status * gtsmodel . Status
uriStr = uri . String ( )
err error
)
2024-01-31 13:29:47 +00:00
// Search the database for existing by URI.
2023-05-31 08:39:54 +00:00
status , err = d . state . DB . GetStatusByURI (
2024-01-31 13:29:47 +00:00
2023-05-31 08:39:54 +00:00
// request a barebones object, it may be in the
// db but with related models not yet dereferenced.
gtscontext . SetBarebones ( ctx ) ,
uriStr ,
)
2023-05-12 09:15:54 +00:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-11-04 20:21:20 +00:00
return nil , nil , false , gtserror . Newf ( "error checking database for status %s by uri: %w" , uriStr , err )
2021-08-10 11:32:39 +00:00
}
2022-11-29 09:24:55 +00:00
if status == nil {
2024-01-31 13:29:47 +00:00
// Else, search database for existing by URL.
2023-05-31 08:39:54 +00:00
status , err = d . state . DB . GetStatusByURL (
gtscontext . SetBarebones ( ctx ) ,
uriStr ,
)
2023-05-12 09:15:54 +00:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-11-04 20:21:20 +00:00
return nil , nil , false , gtserror . Newf ( "error checking database for status %s by url: %w" , uriStr , err )
2022-11-29 09:24:55 +00:00
}
2022-05-23 15:40:03 +00:00
}
2023-05-12 09:15:54 +00:00
if status == nil {
2024-01-31 13:29:47 +00:00
// Ensure not a failed search for a local
// status, if so we know it doesn't exist.
if uri . Host == config . GetHost ( ) ||
uri . Host == config . GetAccountDomain ( ) {
return nil , nil , false , gtserror . SetUnretrievable ( err )
2022-11-29 09:24:55 +00:00
}
2023-05-12 09:15:54 +00:00
// Create and pass-through a new bare-bones model for deref.
2023-10-31 11:12:22 +00:00
return d . enrichStatusSafely ( ctx , requestUser , uri , & gtsmodel . Status {
2024-01-31 13:29:47 +00:00
Local : util . Ptr ( false ) ,
2023-05-12 09:15:54 +00:00
URI : uriStr ,
} , nil )
2022-11-29 09:24:55 +00:00
}
2023-12-15 14:24:39 +00:00
if statusUpToDate ( status , false ) {
2024-01-09 09:42:39 +00:00
// This is an existing status that is up-to-date,
// before returning ensure it is fully populated.
2023-05-31 08:39:54 +00:00
if err := d . state . DB . PopulateStatus ( ctx , status ) ; err != nil {
log . Errorf ( ctx , "error populating existing status: %v" , err )
}
2024-01-09 09:42:39 +00:00
2023-11-04 20:21:20 +00:00
return status , nil , false , nil
2023-05-31 08:39:54 +00:00
}
2024-01-31 13:29:47 +00:00
// Try to deref and update existing status model.
2023-11-04 20:21:20 +00:00
latest , statusable , isNew , err := d . enrichStatusSafely ( ctx ,
2023-05-12 09:15:54 +00:00
requestUser ,
uri ,
status ,
nil ,
)
2023-03-01 17:52:44 +00:00
2024-01-31 13:29:47 +00:00
if err != nil {
// fallback to the
// existing status.
latest = status
statusable = nil
2021-08-10 11:32:39 +00:00
}
2024-01-31 13:29:47 +00:00
return latest , statusable , isNew , err
2023-05-12 09:15:54 +00:00
}
2021-08-10 11:32:39 +00:00
2023-11-04 20:21:20 +00:00
// RefreshStatus is functionally equivalent to GetStatusByURI(), except that it requires a pre
// populated status model (with AT LEAST uri set), and ALL thread dereferencing is asynchronous.
func ( d * Dereferencer ) RefreshStatus (
ctx context . Context ,
requestUser string ,
status * gtsmodel . Status ,
statusable ap . Statusable ,
force bool ,
) ( * gtsmodel . Status , ap . Statusable , error ) {
2024-01-09 09:42:39 +00:00
// If no incoming data is provided,
// check whether status needs update.
if statusable == nil &&
statusUpToDate ( status , force ) {
2023-05-12 09:15:54 +00:00
return status , nil , nil
2021-08-10 11:32:39 +00:00
}
2023-05-12 09:15:54 +00:00
// Parse the URI from status.
uri , err := url . Parse ( status . URI )
2021-08-10 11:32:39 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "invalid status uri %q: %w" , status . URI , err )
2021-08-10 11:32:39 +00:00
}
2023-11-04 20:21:20 +00:00
// Try to update + dereference the passed status model.
latest , statusable , isNew , err := d . enrichStatusSafely ( ctx ,
2023-05-12 09:15:54 +00:00
requestUser ,
uri ,
status ,
2023-11-04 20:21:20 +00:00
statusable ,
2023-05-12 09:15:54 +00:00
)
2022-05-23 15:40:03 +00:00
if err != nil {
2023-05-12 09:15:54 +00:00
return nil , nil , err
2022-05-23 15:40:03 +00:00
}
2021-08-10 11:32:39 +00:00
2023-11-04 20:21:20 +00:00
if statusable != nil {
// Deref parents + children.
d . dereferenceThread ( ctx ,
requestUser ,
uri ,
status ,
statusable ,
isNew ,
)
}
2021-08-10 11:32:39 +00:00
2023-11-04 20:21:20 +00:00
return latest , statusable , nil
2021-08-10 11:32:39 +00:00
}
2023-11-04 20:21:20 +00:00
// RefreshStatusAsync is functionally equivalent to RefreshStatus(), except that ALL
// dereferencing is queued for asynchronous processing, (both thread AND status).
func ( d * Dereferencer ) RefreshStatusAsync (
ctx context . Context ,
requestUser string ,
status * gtsmodel . Status ,
statusable ap . Statusable ,
force bool ,
) {
2024-01-09 09:42:39 +00:00
// If no incoming data is provided,
// check whether status needs update.
if statusable == nil &&
statusUpToDate ( status , force ) {
2023-05-12 09:15:54 +00:00
return
2021-08-10 11:32:39 +00:00
}
2023-05-12 09:15:54 +00:00
// Parse the URI from status.
uri , err := url . Parse ( status . URI )
2021-08-10 11:32:39 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
log . Errorf ( ctx , "invalid status uri %q: %v" , status . URI , err )
2023-05-12 09:15:54 +00:00
return
2021-08-10 11:32:39 +00:00
}
2023-11-04 20:21:20 +00:00
// Enqueue a worker function to re-fetch this status entirely async.
2023-05-12 09:15:54 +00:00
d . state . Workers . Federator . MustEnqueueCtx ( ctx , func ( ctx context . Context ) {
2023-11-04 20:21:20 +00:00
latest , statusable , _ , err := d . enrichStatusSafely ( ctx ,
requestUser ,
uri ,
status ,
statusable ,
)
2023-05-12 09:15:54 +00:00
if err != nil {
log . Errorf ( ctx , "error enriching remote status: %v" , err )
return
}
2023-11-04 20:21:20 +00:00
if statusable != nil {
if err := d . DereferenceStatusAncestors ( ctx , requestUser , latest ) ; err != nil {
log . Error ( ctx , err )
}
if err := d . DereferenceStatusDescendants ( ctx , requestUser , uri , statusable ) ; err != nil {
log . Error ( ctx , err )
}
2023-10-31 11:12:22 +00:00
}
2023-05-12 09:15:54 +00:00
} )
2021-08-10 11:32:39 +00:00
}
2023-10-31 11:12:22 +00:00
// enrichStatusSafely wraps enrichStatus() to perform
// it within the State{}.FedLocks mutexmap, which protects
// dereferencing actions with per-URI mutex locks.
func ( d * Dereferencer ) enrichStatusSafely (
ctx context . Context ,
requestUser string ,
uri * url . URL ,
status * gtsmodel . Status ,
apubStatus ap . Statusable ,
2023-11-04 20:21:20 +00:00
) ( * gtsmodel . Status , ap . Statusable , bool , error ) {
2023-10-31 11:12:22 +00:00
uriStr := status . URI
2023-11-30 11:32:45 +00:00
var isNew bool
// Check if this is a new status (to us).
if isNew = ( status . ID == "" ) ; ! isNew {
2023-10-31 11:12:22 +00:00
// This is an existing status, first try to populate it. This
// is required by the checks below for existing tags, media etc.
if err := d . state . DB . PopulateStatus ( ctx , status ) ; err != nil {
log . Errorf ( ctx , "error populating existing status %s: %v" , uriStr , err )
}
}
// Acquire per-URI deref lock, wraping unlock
// to safely defer in case of panic, while still
// performing more granular unlocks when needed.
unlock := d . state . FedLocks . Lock ( uriStr )
unlock = doOnce ( unlock )
defer unlock ( )
// Perform status enrichment with passed vars.
latest , apubStatus , err := d . enrichStatus ( ctx ,
requestUser ,
uri ,
status ,
apubStatus ,
)
2024-01-31 13:29:47 +00:00
if gtserror . StatusCode ( err ) >= 400 {
2024-01-26 13:17:10 +00:00
if isNew {
// This was a new status enrich
// attempt which failed before we
// got to store it, so we can't
// return anything useful.
return nil , nil , isNew , err
}
// We had this status stored already
// before this enrichment attempt.
//
// Update fetched_at to slow re-attempts
// but don't return early. We can still
// return the model we had stored already.
2023-10-31 11:12:22 +00:00
status . FetchedAt = time . Now ( )
2024-01-26 13:17:10 +00:00
if err := d . state . DB . UpdateStatus ( ctx , status , "fetched_at" ) ; err != nil {
2024-01-31 13:29:47 +00:00
log . Error ( ctx , "error updating %s fetched_at: %v" , uriStr , err )
2024-01-26 13:17:10 +00:00
}
2023-10-31 11:12:22 +00:00
}
// Unlock now
// we're done.
unlock ( )
if errors . Is ( err , db . ErrAlreadyExists ) {
// Ensure AP model isn't set,
// otherwise this indicates WE
// enriched the status.
apubStatus = nil
2023-11-30 11:32:45 +00:00
// We leave 'isNew' set so that caller
// still dereferences parents, otherwise
// the version we pass back may not have
// these attached as inReplyTos yet (since
// those happen OUTSIDE federator lock).
//
// TODO: performance-wise, this won't be
// great. should improve this if we can!
2023-10-31 11:12:22 +00:00
// DATA RACE! We likely lost out to another goroutine
// in a call to db.Put(Status). Look again in DB by URI.
latest , err = d . state . DB . GetStatusByURI ( ctx , status . URI )
if err != nil {
err = gtserror . Newf ( "error getting status %s from database after race: %w" , uriStr , err )
}
}
2023-11-04 20:21:20 +00:00
return latest , apubStatus , isNew , err
2023-10-31 11:12:22 +00:00
}
2023-06-24 07:32:10 +00:00
// enrichStatus will enrich the given status, whether a new
// barebones model, or existing model from the database.
// It handles necessary dereferencing, database updates, etc.
2023-10-23 09:58:13 +00:00
func ( d * Dereferencer ) enrichStatus (
2023-06-24 07:32:10 +00:00
ctx context . Context ,
requestUser string ,
uri * url . URL ,
status * gtsmodel . Status ,
apubStatus ap . Statusable ,
) ( * gtsmodel . Status , ap . Statusable , error ) {
2023-05-12 09:15:54 +00:00
// Pre-fetch a transport for requesting username, used by later dereferencing.
tsport , err := d . transportController . NewTransportForUsername ( ctx , requestUser )
2021-08-10 11:32:39 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "couldn't create transport: %w" , err )
2021-08-10 11:32:39 +00:00
}
2023-05-12 09:15:54 +00:00
// Check whether this account URI is a blocked domain / subdomain.
if blocked , err := d . state . DB . IsDomainBlocked ( ctx , uri . Host ) ; err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "error checking blocked domain: %w" , err )
2023-05-12 09:15:54 +00:00
} else if blocked {
2023-06-24 07:32:10 +00:00
err = gtserror . Newf ( "%s is blocked" , uri . Host )
return nil , nil , gtserror . SetUnretrievable ( err )
2021-08-10 11:32:39 +00:00
}
2023-05-12 09:15:54 +00:00
if apubStatus == nil {
// Dereference latest version of the status.
b , err := tsport . Dereference ( ctx , uri )
2021-08-10 11:32:39 +00:00
if err != nil {
2024-01-26 13:17:10 +00:00
err := gtserror . Newf ( "error dereferencing %s: %w" , uri , err )
2023-06-22 19:46:36 +00:00
return nil , nil , gtserror . SetUnretrievable ( err )
2021-08-10 11:32:39 +00:00
}
2023-05-12 09:15:54 +00:00
// Attempt to resolve ActivityPub status from data.
apubStatus , err = ap . ResolveStatusable ( ctx , b )
if err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "error resolving statusable from data for account %s: %w" , uri , err )
2023-05-12 09:15:54 +00:00
}
}
2021-08-10 11:32:39 +00:00
2023-06-17 15:49:11 +00:00
// Get the attributed-to account in order to fetch profile.
attributedTo , err := ap . ExtractAttributedToURI ( apubStatus )
2023-05-12 09:15:54 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . New ( "attributedTo was empty" )
2022-09-12 11:03:23 +00:00
}
2021-08-10 11:32:39 +00:00
2023-10-31 11:12:22 +00:00
// Ensure we have the author account of the status dereferenced (+ up-to-date). If this is a new status
// (i.e. status.AccountID == "") then any error here is irrecoverable. AccountID must ALWAYS be set.
if _ , _ , err := d . getAccountByURI ( ctx , requestUser , attributedTo ) ; err != nil && status . AccountID == "" {
return nil , nil , gtserror . Newf ( "failed to dereference status author %s: %w" , uri , err )
2021-08-29 10:03:08 +00:00
}
2023-06-22 19:46:36 +00:00
// ActivityPub model was recently dereferenced, so assume that passed status
// may contain out-of-date information, convert AP model to our GTS model.
2023-09-23 16:44:11 +00:00
latestStatus , err := d . converter . ASStatusToStatus ( ctx , apubStatus )
2023-06-22 19:46:36 +00:00
if err != nil {
return nil , nil , gtserror . Newf ( "error converting statusable to gts model for status %s: %w" , uri , err )
2021-08-29 10:03:08 +00:00
}
2024-01-31 13:29:47 +00:00
// Based on the original provided
// status model, determine whether
// this is a new insert / update.
var isNew bool
if isNew = ( status . ID == "" ) ; isNew {
// Generate new status ID from the provided creation date.
2023-05-12 09:15:54 +00:00
latestStatus . ID , err = id . NewULIDFromTime ( latestStatus . CreatedAt )
if err != nil {
2023-11-08 14:32:17 +00:00
log . Errorf ( ctx , "invalid created at date (falling back to 'now'): %v" , err )
latestStatus . ID = id . NewULID ( ) // just use "now"
2021-08-10 11:32:39 +00:00
}
2024-01-26 13:17:10 +00:00
} else {
2024-01-31 13:29:47 +00:00
2024-01-26 13:17:10 +00:00
// Reuse existing status ID.
latestStatus . ID = status . ID
2023-05-12 09:15:54 +00:00
}
2021-08-10 11:32:39 +00:00
2023-05-12 09:15:54 +00:00
// Carry-over values and set fetch time.
2023-11-04 20:21:20 +00:00
latestStatus . UpdatedAt = status . UpdatedAt
2023-05-12 09:15:54 +00:00
latestStatus . FetchedAt = time . Now ( )
latestStatus . Local = status . Local
2023-11-08 14:32:17 +00:00
// Ensure the status' poll remains consistent, else reset the poll.
if err := d . fetchStatusPoll ( ctx , status , latestStatus ) ; err != nil {
return nil , nil , gtserror . Newf ( "error populating poll for status %s: %w" , uri , err )
}
2023-05-12 09:15:54 +00:00
// Ensure the status' mentions are populated, and pass in existing to check for changes.
if err := d . fetchStatusMentions ( ctx , requestUser , status , latestStatus ) ; err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "error populating mentions for status %s: %w" , uri , err )
2023-05-12 09:15:54 +00:00
}
2023-10-25 14:04:53 +00:00
// Now that we know who this status replies to (handled by ASStatusToStatus)
// and who it mentions, we can add a ThreadID to it if necessary.
if err := d . threadStatus ( ctx , latestStatus ) ; err != nil {
return nil , nil , gtserror . Newf ( "error checking / creating threadID for status %s: %w" , uri , err )
}
2023-10-04 12:09:42 +00:00
// Ensure the status' tags are populated, (changes are expected / okay).
2023-10-31 11:12:22 +00:00
if err := d . fetchStatusTags ( ctx , status , latestStatus ) ; err != nil {
2023-07-31 13:47:35 +00:00
return nil , nil , gtserror . Newf ( "error populating tags for status %s: %w" , uri , err )
}
2023-05-12 09:15:54 +00:00
// Ensure the status' media attachments are populated, passing in existing to check for changes.
if err := d . fetchStatusAttachments ( ctx , tsport , status , latestStatus ) ; err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "error populating attachments for status %s: %w" , uri , err )
2023-05-12 09:15:54 +00:00
}
2023-10-04 12:09:42 +00:00
// Ensure the status' emoji attachments are populated, (changes are expected / okay).
if err := d . fetchStatusEmojis ( ctx , requestUser , latestStatus ) ; err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "error populating emojis for status %s: %w" , uri , err )
2023-05-12 09:15:54 +00:00
}
2024-01-26 13:17:10 +00:00
if isNew {
2023-05-12 09:15:54 +00:00
// This is new, put the status in the database.
err := d . state . DB . PutStatus ( ctx , latestStatus )
2021-08-10 11:32:39 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "error putting in database: %w" , err )
2021-08-10 11:32:39 +00:00
}
2023-05-12 09:15:54 +00:00
} else {
// This is an existing status, update the model in the database.
if err := d . state . DB . UpdateStatus ( ctx , latestStatus ) ; err != nil {
2023-05-28 12:08:35 +00:00
return nil , nil , gtserror . Newf ( "error updating database: %w" , err )
2023-05-12 09:15:54 +00:00
}
}
return latestStatus , apubStatus , nil
}
2021-08-10 11:32:39 +00:00
2023-10-31 11:05:17 +00:00
// populateMentionTarget tries to populate the given
// mention with the correct TargetAccount and (if not
// yet set) TargetAccountURI, returning the populated
// mention.
//
// Will check on the existing status if the mention
// is already there and populated; if so, existing
// mention will be returned along with `true`.
//
// Otherwise, this function will try to parse first
// the Href of the mention, and then the namestring,
// to see who it targets, and go fetch that account.
func ( d * Dereferencer ) populateMentionTarget (
ctx context . Context ,
mention * gtsmodel . Mention ,
requestUser string ,
existing , status * gtsmodel . Status ,
) (
* gtsmodel . Mention ,
bool , // True if mention already exists in the DB.
error ,
) {
// Mentions can be created using Name or Href.
// Prefer Href (TargetAccountURI), fall back to Name.
if mention . TargetAccountURI != "" {
// Look for existing mention with this URI.
// If we already have it we can return early.
existingMention , ok := existing . GetMentionByTargetURI ( mention . TargetAccountURI )
if ok && existingMention . ID != "" {
return existingMention , true , nil
2021-08-29 10:03:08 +00:00
}
2023-05-12 09:15:54 +00:00
// Ensure that mention account URI is parseable.
accountURI , err := url . Parse ( mention . TargetAccountURI )
if err != nil {
2023-10-31 11:05:17 +00:00
err = gtserror . Newf ( "invalid account uri %q: %w" , mention . TargetAccountURI , err )
return nil , false , err
2021-08-20 10:26:56 +00:00
}
2021-08-10 11:32:39 +00:00
2023-05-12 09:15:54 +00:00
// Ensure we have the account of the mention target dereferenced.
mention . TargetAccount , _ , err = d . getAccountByURI ( ctx , requestUser , accountURI )
2021-08-10 11:32:39 +00:00
if err != nil {
2023-10-31 11:05:17 +00:00
err = gtserror . Newf ( "failed to dereference account %s: %w" , accountURI , err )
return nil , false , err
}
} else {
// Href wasn't set. Find the target account using namestring.
username , domain , err := util . ExtractNamestringParts ( mention . NameString )
if err != nil {
err = gtserror . Newf ( "failed to parse namestring %s: %w" , mention . NameString , err )
return nil , false , err
}
mention . TargetAccount , _ , err = d . getAccountByUsernameDomain ( ctx , requestUser , username , domain )
if err != nil {
err = gtserror . Newf ( "failed to dereference account %s: %w" , mention . NameString , err )
return nil , false , err
}
// Look for existing mention with this URI.
mention . TargetAccountURI = mention . TargetAccount . URI
existingMention , ok := existing . GetMentionByTargetURI ( mention . TargetAccountURI )
if ok && existingMention . ID != "" {
return existingMention , true , nil
}
}
// At this point, mention.TargetAccountURI
// and mention.TargetAccount must be set.
return mention , false , nil
}
func ( d * Dereferencer ) fetchStatusMentions ( ctx context . Context , requestUser string , existing , status * gtsmodel . Status ) error {
// Allocate new slice to take the yet-to-be created mention IDs.
status . MentionIDs = make ( [ ] string , len ( status . Mentions ) )
for i := range status . Mentions {
var (
mention = status . Mentions [ i ]
alreadyExists bool
err error
)
mention , alreadyExists , err = d . populateMentionTarget (
ctx ,
mention ,
requestUser ,
existing ,
status ,
)
if err != nil {
log . Errorf ( ctx , "failed to derive mention: %v" , err )
continue
}
if alreadyExists {
// This mention was already attached
// to the status, use it and continue.
status . Mentions [ i ] = mention
status . MentionIDs [ i ] = mention . ID
2023-05-12 09:15:54 +00:00
continue
2021-08-20 10:26:56 +00:00
}
2023-10-31 11:05:17 +00:00
// This mention didn't exist yet.
2023-05-12 09:15:54 +00:00
// Generate new ID according to status creation.
2023-10-04 12:09:42 +00:00
// TODO: update this to use "edited_at" when we add
// support for edited status revision history.
2023-05-12 09:15:54 +00:00
mention . ID , err = id . NewULIDFromTime ( status . CreatedAt )
if err != nil {
2023-11-08 14:32:17 +00:00
log . Errorf ( ctx , "invalid created at date (falling back to 'now'): %v" , err )
2023-05-12 09:15:54 +00:00
mention . ID = id . NewULID ( ) // just use "now"
2021-08-10 11:32:39 +00:00
}
2023-05-12 09:15:54 +00:00
// Set known further mention details.
mention . CreatedAt = status . CreatedAt
mention . UpdatedAt = status . UpdatedAt
mention . OriginAccount = status . Account
mention . OriginAccountID = status . AccountID
mention . OriginAccountURI = status . AccountURI
mention . TargetAccountID = mention . TargetAccount . ID
mention . TargetAccountURI = mention . TargetAccount . URI
mention . TargetAccountURL = mention . TargetAccount . URL
mention . StatusID = status . ID
mention . Status = status
// Place the new mention into the database.
if err := d . state . DB . PutMention ( ctx , mention ) ; err != nil {
2023-05-28 12:08:35 +00:00
return gtserror . Newf ( "error putting mention in database: %w" , err )
2021-08-10 11:32:39 +00:00
}
2021-08-29 10:03:08 +00:00
2023-05-12 09:15:54 +00:00
// Set the *new* mention and ID.
status . Mentions [ i ] = mention
status . MentionIDs [ i ] = mention . ID
2021-08-10 11:32:39 +00:00
}
2021-08-29 10:03:08 +00:00
2023-06-22 19:46:36 +00:00
for i := 0 ; i < len ( status . MentionIDs ) ; {
2023-05-12 09:15:54 +00:00
if status . MentionIDs [ i ] == "" {
// This is a failed mention population, likely due
// to invalid incoming data / now-deleted accounts.
copy ( status . Mentions [ i : ] , status . Mentions [ i + 1 : ] )
copy ( status . MentionIDs [ i : ] , status . MentionIDs [ i + 1 : ] )
status . Mentions = status . Mentions [ : len ( status . Mentions ) - 1 ]
status . MentionIDs = status . MentionIDs [ : len ( status . MentionIDs ) - 1 ]
2023-06-22 19:46:36 +00:00
continue
2023-05-12 09:15:54 +00:00
}
2023-06-22 19:46:36 +00:00
i ++
2023-05-12 09:15:54 +00:00
}
2021-08-10 11:32:39 +00:00
2021-08-29 10:03:08 +00:00
return nil
}
2023-10-25 14:04:53 +00:00
func ( d * Dereferencer ) threadStatus ( ctx context . Context , status * gtsmodel . Status ) error {
if status . InReplyTo != nil {
if parentThreadID := status . InReplyTo . ThreadID ; parentThreadID != "" {
// Simplest case: parent status
// is threaded, so inherit threadID.
status . ThreadID = parentThreadID
return nil
}
}
// Parent wasn't threaded. If this
// status mentions a local account,
// we should thread it so that local
// account can mute it if they want.
mentionsLocal := slices . ContainsFunc (
status . Mentions ,
func ( m * gtsmodel . Mention ) bool {
// If TargetAccount couldn't
// be deref'd, we know it's not
// a local account, so only
// check for non-nil accounts.
return m . TargetAccount != nil &&
m . TargetAccount . IsLocal ( )
} ,
)
if ! mentionsLocal {
// Status doesn't mention a
// local account, so we don't
// need to thread it.
return nil
}
// Status mentions a local account.
// Create a new thread and assign
// it to the status.
threadID := id . NewULID ( )
if err := d . state . DB . PutThread (
ctx ,
& gtsmodel . Thread {
ID : threadID ,
} ,
) ; err != nil {
return gtserror . Newf ( "error inserting new thread in db: %w" , err )
}
status . ThreadID = threadID
return nil
}
2023-10-31 11:12:22 +00:00
func ( d * Dereferencer ) fetchStatusTags ( ctx context . Context , existing , status * gtsmodel . Status ) error {
2023-07-31 13:47:35 +00:00
// Allocate new slice to take the yet-to-be determined tag IDs.
status . TagIDs = make ( [ ] string , len ( status . Tags ) )
for i := range status . Tags {
2023-10-31 11:12:22 +00:00
tag := status . Tags [ i ]
// Look for tag in existing status with name.
existing , ok := existing . GetTagByName ( tag . Name )
if ok && existing . ID != "" {
status . Tags [ i ] = existing
status . TagIDs [ i ] = existing . ID
continue
}
2023-07-31 13:47:35 +00:00
2023-10-31 11:12:22 +00:00
// Look for existing tag with name in the database.
existing , err := d . state . DB . GetTagByName ( ctx , tag . Name )
2023-07-31 13:47:35 +00:00
if err != nil && ! errors . Is ( err , db . ErrNoEntries ) {
2023-10-31 11:12:22 +00:00
return gtserror . Newf ( "db error getting tag %s: %w" , tag . Name , err )
} else if existing != nil {
status . Tags [ i ] = existing
status . TagIDs [ i ] = existing . ID
2023-07-31 13:47:35 +00:00
continue
}
2023-10-31 11:12:22 +00:00
// Create new ID for tag.
tag . ID = id . NewULID ( )
2023-07-31 13:47:35 +00:00
2023-10-31 11:12:22 +00:00
// Insert this tag with new name into the database.
if err := d . state . DB . PutTag ( ctx , tag ) ; err != nil {
log . Errorf ( ctx , "db error putting tag %s: %v" , tag . Name , err )
continue
2023-07-31 13:47:35 +00:00
}
2023-10-31 11:12:22 +00:00
// Set new tag ID in slice.
2023-07-31 13:47:35 +00:00
status . TagIDs [ i ] = tag . ID
}
// Remove any tag we couldn't get or create.
for i := 0 ; i < len ( status . TagIDs ) ; {
if status . TagIDs [ i ] == "" {
// This is a failed tag population, likely due
// to some database peculiarity / race condition.
copy ( status . Tags [ i : ] , status . Tags [ i + 1 : ] )
copy ( status . TagIDs [ i : ] , status . TagIDs [ i + 1 : ] )
status . Tags = status . Tags [ : len ( status . Tags ) - 1 ]
status . TagIDs = status . TagIDs [ : len ( status . TagIDs ) - 1 ]
continue
}
i ++
}
return nil
}
2023-11-08 14:32:17 +00:00
func ( d * Dereferencer ) fetchStatusPoll ( ctx context . Context , existing , status * gtsmodel . Status ) error {
var (
// insertStatusPoll generates ID and inserts the poll attached to status into the database.
insertStatusPoll = func ( ctx context . Context , status * gtsmodel . Status ) error {
var err error
// Generate new ID for poll from the status CreatedAt.
// TODO: update this to use "edited_at" when we add
// support for edited status revision history.
status . Poll . ID , err = id . NewULIDFromTime ( status . CreatedAt )
if err != nil {
log . Errorf ( ctx , "invalid created at date (falling back to 'now'): %v" , err )
status . Poll . ID = id . NewULID ( ) // just use "now"
}
// Update the status<->poll links.
status . PollID = status . Poll . ID
status . Poll . StatusID = status . ID
status . Poll . Status = status
// Insert this latest poll into the database.
err = d . state . DB . PutPoll ( ctx , status . Poll )
if err != nil {
return gtserror . Newf ( "error putting in database: %w" , err )
}
return nil
}
// deleteStatusPoll deletes the poll with ID, and all attached votes, from the database.
deleteStatusPoll = func ( ctx context . Context , pollID string ) error {
if err := d . state . DB . DeletePollByID ( ctx , pollID ) ; err != nil {
return gtserror . Newf ( "error deleting existing poll from database: %w" , err )
}
if err := d . state . DB . DeletePollVotes ( ctx , pollID ) ; err != nil {
return gtserror . Newf ( "error deleting existing votes from database: %w" , err )
}
return nil
}
)
switch {
case existing . Poll == nil && status . Poll == nil :
// no poll before or after, nothing to do.
return nil
case existing . Poll == nil && status . Poll != nil :
// no previous poll, insert new poll!
return insertStatusPoll ( ctx , status )
2023-11-11 10:15:04 +00:00
case status . Poll == nil :
2023-11-08 14:32:17 +00:00
// existing poll has been deleted, remove this.
return deleteStatusPoll ( ctx , existing . PollID )
2023-11-11 10:15:04 +00:00
case pollChanged ( existing . Poll , status . Poll ) :
2023-11-08 14:32:17 +00:00
// poll has changed since original, delete and reinsert new.
if err := deleteStatusPoll ( ctx , existing . PollID ) ; err != nil {
return err
}
return insertStatusPoll ( ctx , status )
2023-11-11 10:15:04 +00:00
case pollUpdated ( existing . Poll , status . Poll ) :
2023-11-08 14:32:17 +00:00
// Since we last saw it, the poll has updated!
// Whether that be stats, or close time.
poll := existing . Poll
2023-11-11 10:15:04 +00:00
poll . Closing = pollJustClosed ( existing . Poll , status . Poll )
2023-11-08 14:32:17 +00:00
poll . ClosedAt = status . Poll . ClosedAt
poll . Voters = status . Poll . Voters
poll . Votes = status . Poll . Votes
// Update poll model in the database (specifically only the possible changed columns).
if err := d . state . DB . UpdatePoll ( ctx , poll , "closed_at" , "voters" , "votes" ) ; err != nil {
return gtserror . Newf ( "error updating poll: %w" , err )
}
// Update poll on status.
status . PollID = poll . ID
status . Poll = poll
return nil
default :
// latest and existing
// polls are up to date.
poll := existing . Poll
status . PollID = poll . ID
status . Poll = poll
return nil
}
}
2023-10-23 09:58:13 +00:00
func ( d * Dereferencer ) fetchStatusAttachments ( ctx context . Context , tsport transport . Transport , existing , status * gtsmodel . Status ) error {
2023-05-12 09:15:54 +00:00
// Allocate new slice to take the yet-to-be fetched attachment IDs.
status . AttachmentIDs = make ( [ ] string , len ( status . Attachments ) )
for i := range status . Attachments {
2023-10-31 11:12:22 +00:00
attachment := status . Attachments [ i ]
2023-05-12 09:15:54 +00:00
2023-11-10 18:29:26 +00:00
// Look for existing media attachment with remote URL first.
2023-10-31 11:12:22 +00:00
existing , ok := existing . GetAttachmentByRemoteURL ( attachment . RemoteURL )
2023-06-22 19:46:36 +00:00
if ok && existing . ID != "" && * existing . Cached {
2023-05-12 09:15:54 +00:00
status . Attachments [ i ] = existing
status . AttachmentIDs [ i ] = existing . ID
continue
}
// Ensure a valid media attachment remote URL.
2023-10-31 11:12:22 +00:00
remoteURL , err := url . Parse ( attachment . RemoteURL )
2023-05-12 09:15:54 +00:00
if err != nil {
2023-10-31 11:12:22 +00:00
log . Errorf ( ctx , "invalid remote media url %q: %v" , attachment . RemoteURL , err )
2023-05-12 09:15:54 +00:00
continue
}
2023-11-10 18:29:26 +00:00
data := func ( ctx context . Context ) ( io . ReadCloser , int64 , error ) {
2023-05-12 09:15:54 +00:00
return tsport . DereferenceMedia ( ctx , remoteURL )
2023-11-10 18:29:26 +00:00
}
ai := & media . AdditionalMediaInfo {
2023-05-12 09:15:54 +00:00
StatusID : & status . ID ,
2023-10-31 11:12:22 +00:00
RemoteURL : & attachment . RemoteURL ,
Description : & attachment . Description ,
Blurhash : & attachment . Blurhash ,
2022-01-08 16:17:01 +00:00
}
2023-11-10 18:29:26 +00:00
// Start pre-processing remote media at remote URL.
processing := d . mediaManager . PreProcessMedia ( data , status . AccountID , ai )
2023-05-12 09:15:54 +00:00
// Force attachment loading *right now*.
2023-10-31 11:12:22 +00:00
attachment , err = processing . LoadAttachment ( ctx )
2022-01-08 16:17:01 +00:00
if err != nil {
2023-11-10 18:29:26 +00:00
if attachment == nil {
// Totally failed to load;
// bail on this attachment.
log . Errorf ( ctx , "error loading attachment: %v" , err )
continue
}
// Partially loaded. Keep as
// placeholder and try again later.
log . Warnf ( ctx , "partially loaded attachment: %v" , err )
2021-08-29 10:03:08 +00:00
}
2023-05-12 09:15:54 +00:00
// Set the *new* attachment and ID.
2023-10-31 11:12:22 +00:00
status . Attachments [ i ] = attachment
status . AttachmentIDs [ i ] = attachment . ID
2021-08-29 10:03:08 +00:00
}
2023-06-22 19:46:36 +00:00
for i := 0 ; i < len ( status . AttachmentIDs ) ; {
2023-05-12 09:15:54 +00:00
if status . AttachmentIDs [ i ] == "" {
2023-11-10 18:29:26 +00:00
// Remove totally failed attachment populations
2023-05-12 09:15:54 +00:00
copy ( status . Attachments [ i : ] , status . Attachments [ i + 1 : ] )
copy ( status . AttachmentIDs [ i : ] , status . AttachmentIDs [ i + 1 : ] )
status . Attachments = status . Attachments [ : len ( status . Attachments ) - 1 ]
status . AttachmentIDs = status . AttachmentIDs [ : len ( status . AttachmentIDs ) - 1 ]
2023-06-22 19:46:36 +00:00
continue
2023-05-12 09:15:54 +00:00
}
2023-06-22 19:46:36 +00:00
i ++
2023-05-12 09:15:54 +00:00
}
2021-08-29 10:03:08 +00:00
return nil
}
2023-10-23 09:58:13 +00:00
func ( d * Dereferencer ) fetchStatusEmojis ( ctx context . Context , requestUser string , status * gtsmodel . Status ) error {
2023-05-12 09:15:54 +00:00
// Fetch the full-fleshed-out emoji objects for our status.
emojis , err := d . populateEmojis ( ctx , status . Emojis , requestUser )
2022-09-26 09:56:01 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
return gtserror . Newf ( "failed to populate emojis: %w" , err )
2022-09-26 09:56:01 +00:00
}
2022-09-12 11:03:23 +00:00
2023-05-12 09:15:54 +00:00
// Iterate over and get their IDs.
2022-09-26 09:56:01 +00:00
emojiIDs := make ( [ ] string , 0 , len ( emojis ) )
for _ , e := range emojis {
emojiIDs = append ( emojiIDs , e . ID )
2022-09-12 11:03:23 +00:00
}
2023-05-12 09:15:54 +00:00
// Set known emoji details.
2022-09-26 09:56:01 +00:00
status . Emojis = emojis
2022-09-12 11:03:23 +00:00
status . EmojiIDs = emojiIDs
2021-08-29 10:03:08 +00:00
2021-08-10 11:32:39 +00:00
return nil
}