2023-03-12 15:00:57 +00:00
// GoToSocial
// Copyright (C) GoToSocial Authors admin@gotosocial.org
// SPDX-License-Identifier: AGPL-3.0-or-later
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
2021-08-10 11:32:39 +00:00
package dereferencing
import (
2021-08-25 13:34:33 +00:00
"context"
2021-08-10 11:32:39 +00:00
"net/url"
2022-07-19 08:47:55 +00:00
"codeberg.org/gruf/go-kv"
2022-09-25 11:09:41 +00:00
"github.com/superseriousbusiness/activity/streams/vocab"
2021-08-10 11:32:39 +00:00
"github.com/superseriousbusiness/gotosocial/internal/ap"
2021-12-07 12:31:39 +00:00
"github.com/superseriousbusiness/gotosocial/internal/config"
2023-05-28 12:08:35 +00:00
"github.com/superseriousbusiness/gotosocial/internal/gtserror"
2022-09-25 11:09:41 +00:00
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
2022-07-19 08:47:55 +00:00
"github.com/superseriousbusiness/gotosocial/internal/log"
2021-12-20 14:19:53 +00:00
"github.com/superseriousbusiness/gotosocial/internal/uris"
2021-08-10 11:32:39 +00:00
)
2022-09-25 11:09:41 +00:00
// maxIter defines how many iterations of descendants or
// ancesters we are willing to follow before returning error.
const maxIter = 1000
2023-05-12 09:15:54 +00:00
// dereferenceThread will dereference statuses both above and below the given status in a thread, it returns no error and is intended to be called asychronously.
func ( d * deref ) dereferenceThread ( ctx context . Context , username string , statusIRI * url . URL , status * gtsmodel . Status , statusable ap . Statusable ) {
2022-09-25 11:09:41 +00:00
// Ensure that ancestors have been fully dereferenced
if err := d . dereferenceStatusAncestors ( ctx , username , status ) ; err != nil {
2023-05-28 12:08:35 +00:00
log . Error ( ctx , err ) // log entry and error will include caller prefixes
2021-08-10 11:32:39 +00:00
}
2022-09-25 11:09:41 +00:00
// Ensure that descendants have been fully dereferenced
if err := d . dereferenceStatusDescendants ( ctx , username , statusIRI , statusable ) ; err != nil {
2023-05-28 12:08:35 +00:00
log . Error ( ctx , err ) // log entry and error will include caller prefixes
2021-08-10 11:32:39 +00:00
}
}
2022-09-25 11:09:41 +00:00
// dereferenceAncestors has the goal of reaching the oldest ancestor of a given status, and stashing all statuses along the way.
func ( d * deref ) dereferenceStatusAncestors ( ctx context . Context , username string , status * gtsmodel . Status ) error {
// Take ref to original
ogIRI := status . URI
2022-07-19 08:47:55 +00:00
2022-09-25 11:09:41 +00:00
// Start log entry with fields
2023-02-17 11:02:29 +00:00
l := log . WithContext ( ctx ) .
WithFields ( kv . Fields {
{ "username" , username } ,
{ "statusIRI" , ogIRI } ,
} ... )
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
// Log function start
l . Trace ( "beginning" )
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
for i := 0 ; i < maxIter ; i ++ {
2021-08-10 11:32:39 +00:00
if status . InReplyToURI == "" {
// status doesn't reply to anything
return nil
}
2022-06-11 14:25:41 +00:00
2022-09-25 11:09:41 +00:00
// Parse this status's replied IRI
replyIRI , err := url . Parse ( status . InReplyToURI )
2021-08-10 11:32:39 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
return gtserror . Newf ( "invalid status InReplyToURI %q: %w" , status . InReplyToURI , err )
2021-08-10 11:32:39 +00:00
}
2022-06-11 14:25:41 +00:00
2022-09-25 11:09:41 +00:00
if replyIRI . Host == config . GetHost ( ) {
l . Tracef ( "following local status ancestors: %s" , status . InReplyToURI )
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
// This is our status, extract ID from path
_ , id , err := uris . ParseStatusesPath ( replyIRI )
if err != nil {
2023-05-28 12:08:35 +00:00
return gtserror . Newf ( "invalid local status IRI %q: %w" , status . InReplyToURI , err )
2022-09-25 11:09:41 +00:00
}
// Fetch this status from the database
2023-05-12 09:15:54 +00:00
localStatus , err := d . state . DB . GetStatusByID ( ctx , id )
2022-09-25 11:09:41 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
return gtserror . Newf ( "error fetching local status %q: %w" , id , err )
2022-09-25 11:09:41 +00:00
}
// Set the fetched status
status = localStatus
} else {
l . Tracef ( "following remote status ancestors: %s" , status . InReplyToURI )
// Fetch the remote status found at this IRI
2023-05-12 09:15:54 +00:00
remoteStatus , _ , err := d . getStatusByURI ( ctx ,
username ,
replyIRI ,
)
2022-09-25 11:09:41 +00:00
if err != nil {
2023-05-28 12:08:35 +00:00
return gtserror . Newf ( "error fetching remote status %q: %w" , status . InReplyToURI , err )
2022-09-25 11:09:41 +00:00
}
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
// Set the fetched status
status = remoteStatus
}
2021-08-10 11:32:39 +00:00
}
2023-05-28 12:08:35 +00:00
return gtserror . Newf ( "reached %d ancestor iterations for %q" , maxIter , ogIRI )
2021-08-10 11:32:39 +00:00
}
2022-09-25 11:09:41 +00:00
func ( d * deref ) dereferenceStatusDescendants ( ctx context . Context , username string , statusIRI * url . URL , parent ap . Statusable ) error {
// Take ref to original
ogIRI := statusIRI
2022-07-19 08:47:55 +00:00
2022-09-25 11:09:41 +00:00
// Start log entry with fields
2023-02-17 11:02:29 +00:00
l := log . WithContext ( ctx ) .
WithFields ( kv . Fields {
{ "username" , username } ,
{ "statusIRI" , ogIRI } ,
} ... )
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
// Log function start
l . Trace ( "beginning" )
// frame represents a single stack frame when iteratively
// dereferencing status descendants. where statusIRI and
// statusable are of the status whose children we are to
// descend, page is the current activity streams collection
// page of entities we are on (as we often push a frame to
// stack mid-paging), and item___ are entity iterators for
// this activity streams collection page.
type frame struct {
statusIRI * url . URL
statusable ap . Statusable
page ap . CollectionPageable
itemIter vocab . ActivityStreamsItemsPropertyIterator
2021-08-10 11:32:39 +00:00
}
2022-09-25 11:09:41 +00:00
var (
// current is the current stack frame
current * frame
// stack is a list of "shelved" descendand iterator
// frames. this is pushed to when a child status frame
// is found that we need to further iterate down, and
// popped from into 'current' when that child's tree
// of further descendants is exhausted.
stack = [ ] * frame {
{
// Starting input is first frame
statusIRI : statusIRI ,
statusable : parent ,
} ,
}
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
// popStack will remove and return the top frame
// from the stack, or nil if currently empty.
popStack = func ( ) * frame {
if len ( stack ) == 0 {
return nil
}
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
// Get frame index
idx := len ( stack ) - 1
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
// Pop last frame
frame := stack [ idx ]
stack = stack [ : idx ]
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
return frame
}
)
2021-08-10 11:32:39 +00:00
2022-09-25 11:09:41 +00:00
stackLoop :
for i := 0 ; i < maxIter ; i ++ {
// Pop next frame, nil means we are at end
if current = popStack ( ) ; current == nil {
return nil
2021-08-10 11:32:39 +00:00
}
2022-09-25 11:09:41 +00:00
if current . page == nil {
// This is a local status, no looping to do
if current . statusIRI . Host == config . GetHost ( ) {
continue stackLoop
}
l . Tracef ( "following remote status descendants: %s" , current . statusIRI )
// Look for an attached status replies (as collection)
replies := current . statusable . GetActivityStreamsReplies ( )
2022-09-26 08:14:36 +00:00
if replies == nil {
2022-09-25 11:09:41 +00:00
continue stackLoop
}
// Get the status replies collection
collection := replies . GetActivityStreamsCollection ( )
2022-09-26 08:14:36 +00:00
if collection == nil {
continue stackLoop
}
2022-09-25 11:09:41 +00:00
// Get the "first" property of the replies collection
first := collection . GetActivityStreamsFirst ( )
2022-09-26 08:14:36 +00:00
if first == nil {
2022-09-25 11:09:41 +00:00
continue stackLoop
}
// Set the first activity stream collection page
current . page = first . GetActivityStreamsCollectionPage ( )
2022-09-26 08:14:36 +00:00
if current . page == nil {
continue stackLoop
}
2021-08-10 11:32:39 +00:00
}
2022-09-26 08:14:36 +00:00
pageLoop :
for {
2022-09-25 11:09:41 +00:00
if current . itemIter == nil {
2022-09-26 08:14:36 +00:00
// Get the items associated with this page
2022-09-25 11:09:41 +00:00
items := current . page . GetActivityStreamsItems ( )
2022-09-26 08:14:36 +00:00
if items == nil {
continue stackLoop
}
2022-09-25 11:09:41 +00:00
// Start off the item iterator
current . itemIter = items . Begin ( )
2022-09-26 08:50:14 +00:00
if current . itemIter == nil {
continue stackLoop
}
2021-08-10 11:32:39 +00:00
}
2022-09-25 11:09:41 +00:00
itemLoop :
2022-09-26 08:50:14 +00:00
for {
2022-09-25 11:09:41 +00:00
var itemIRI * url . URL
// Get next item iterator object
current . itemIter = current . itemIter . Next ( )
2022-09-26 08:50:14 +00:00
if current . itemIter == nil {
break itemLoop
}
2021-08-10 11:32:39 +00:00
2022-09-26 08:14:36 +00:00
if iri := current . itemIter . GetIRI ( ) ; iri != nil {
// Item is already an IRI type
itemIRI = iri
} else if note := current . itemIter . GetActivityStreamsNote ( ) ; note != nil {
// Item is a note, fetch the note ID IRI
if id := note . GetJSONLDId ( ) ; id != nil {
2022-09-25 11:09:41 +00:00
itemIRI = id . GetIRI ( )
}
}
if itemIRI == nil {
// Unusable iter object
continue itemLoop
}
if itemIRI . Host == config . GetHost ( ) {
// This child is one of ours,
continue itemLoop
}
2023-05-12 09:15:54 +00:00
// Dereference the remote status and store in the database.
_ , statusable , err := d . getStatusByURI ( ctx , username , itemIRI )
2022-09-25 11:09:41 +00:00
if err != nil {
2023-05-12 09:15:54 +00:00
l . Errorf ( "error dereferencing remote status %s: %v" , itemIRI , err )
continue itemLoop
}
if statusable == nil {
// Already up-to-date.
2022-09-25 11:09:41 +00:00
continue itemLoop
2021-08-10 11:32:39 +00:00
}
2022-09-25 11:09:41 +00:00
// Put current and next frame at top of stack
stack = append ( stack , current , & frame {
statusIRI : itemIRI ,
statusable : statusable ,
} )
2022-09-26 07:39:59 +00:00
// Now start at top of loop
continue stackLoop
2021-08-10 11:32:39 +00:00
}
2022-09-25 11:09:41 +00:00
// Get the current page's "next" property
pageNext := current . page . GetActivityStreamsNext ( )
2022-09-26 08:14:36 +00:00
if pageNext == nil {
2022-09-25 11:09:41 +00:00
continue stackLoop
}
// Get the "next" page property IRI
pageNextIRI := pageNext . GetIRI ( )
2022-09-26 08:14:36 +00:00
if pageNextIRI == nil {
continue stackLoop
}
2022-09-25 11:09:41 +00:00
// Dereference this next collection page by its IRI
2023-05-12 09:15:54 +00:00
collectionPage , err := d . dereferenceCollectionPage ( ctx ,
username ,
pageNextIRI ,
)
2022-09-25 11:09:41 +00:00
if err != nil {
l . Errorf ( "error dereferencing remote collection page %q: %s" , pageNextIRI . String ( ) , err )
continue stackLoop
}
// Set the updated collection page
current . page = collectionPage
2022-09-26 08:14:36 +00:00
continue pageLoop
2021-08-10 11:32:39 +00:00
}
}
2023-05-28 12:08:35 +00:00
return gtserror . Newf ( "reached %d descendant iterations for %q" , maxIter , ogIRI . String ( ) )
2021-08-10 11:32:39 +00:00
}