mirror of
https://github.com/superseriousbusiness/gotosocial
synced 2024-11-10 23:14:17 +00:00
[bugfix] fix checks for deref the same status descendants / ascendants (#2181)
This commit is contained in:
parent
9f2199f9a9
commit
916c6d07ba
1 changed files with 147 additions and 102 deletions
|
@ -56,9 +56,20 @@ func (d *deref) DereferenceStatusAncestors(
|
||||||
username string,
|
username string,
|
||||||
status *gtsmodel.Status,
|
status *gtsmodel.Status,
|
||||||
) error {
|
) error {
|
||||||
|
// Start log entry with fields
|
||||||
|
l := log.WithContext(ctx).
|
||||||
|
WithFields(kv.Fields{
|
||||||
|
{"username", username},
|
||||||
|
{"original", status.URI},
|
||||||
|
}...)
|
||||||
|
|
||||||
|
// Keep track of already dereferenced statuses
|
||||||
|
// for this ancestor thread to prevent recursion.
|
||||||
|
derefdStatuses := make(map[string]struct{}, 10)
|
||||||
|
|
||||||
// Mark given status as the one
|
// Mark given status as the one
|
||||||
// we're currently working on.
|
// we're currently working on.
|
||||||
var current = status
|
current := status
|
||||||
|
|
||||||
for i := 0; i < maxIter; i++ {
|
for i := 0; i < maxIter; i++ {
|
||||||
if current.InReplyToURI == "" {
|
if current.InReplyToURI == "" {
|
||||||
|
@ -67,14 +78,21 @@ func (d *deref) DereferenceStatusAncestors(
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
l := log.
|
// Add new log fields for this iteration.
|
||||||
WithContext(ctx).
|
l = l.WithFields(kv.Fields{
|
||||||
WithFields(kv.Fields{
|
{"current", current.URI},
|
||||||
{"username", username},
|
{"parent", current.InReplyToURI},
|
||||||
{"originalStatusIRI", status.URI},
|
}...)
|
||||||
{"currentStatusURI", current.URI},
|
l.Trace("following status ancestors")
|
||||||
{"currentInReplyToURI", current.InReplyToURI},
|
|
||||||
}...)
|
// Check whether this parent has already been deref'd.
|
||||||
|
if _, ok := derefdStatuses[current.InReplyToURI]; ok {
|
||||||
|
l.Warn("self referencing status ancestors")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add this status URI to map of deref'd.
|
||||||
|
derefdStatuses[current.URI] = struct{}{}
|
||||||
|
|
||||||
if current.InReplyToID != "" {
|
if current.InReplyToID != "" {
|
||||||
// We already have an InReplyToID set. This means
|
// We already have an InReplyToID set. This means
|
||||||
|
@ -123,7 +141,7 @@ func (d *deref) DereferenceStatusAncestors(
|
||||||
// by another action.
|
// by another action.
|
||||||
//
|
//
|
||||||
// TODO: clean this up in a nightly task.
|
// TODO: clean this up in a nightly task.
|
||||||
l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID)
|
l.Warn("orphaned status (parent no longer exists)")
|
||||||
return nil // Cannot iterate further.
|
return nil // Cannot iterate further.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -134,16 +152,15 @@ func (d *deref) DereferenceStatusAncestors(
|
||||||
inReplyToURI, err := url.Parse(current.InReplyToURI)
|
inReplyToURI, err := url.Parse(current.InReplyToURI)
|
||||||
if err != nil || inReplyToURI == nil {
|
if err != nil || inReplyToURI == nil {
|
||||||
// Parent URI is not something we can handle.
|
// Parent URI is not something we can handle.
|
||||||
l.Debug("current status has been orphaned (invalid InReplyToURI)")
|
l.Warn("orphaned status (invalid InReplyToURI)")
|
||||||
return nil //nolint:nilerr
|
return nil //nolint:nilerr
|
||||||
}
|
}
|
||||||
|
|
||||||
// Parent URI is valid, try to get it.
|
// Parent URI is valid, try to get it.
|
||||||
// getStatusByURI guards against the following conditions:
|
// getStatusByURI guards against the following conditions:
|
||||||
//
|
// - refetching recently fetched statuses (recursion!)
|
||||||
// - remote domain is blocked (will return unretrievable)
|
// - remote domain is blocked (will return unretrievable)
|
||||||
// - domain is local (will try to return something, or
|
// - any http type error for a new status returns unretrievable
|
||||||
// return unretrievable).
|
|
||||||
parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI)
|
parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
// We successfully fetched the parent.
|
// We successfully fetched the parent.
|
||||||
|
@ -171,7 +188,7 @@ func (d *deref) DereferenceStatusAncestors(
|
||||||
case code == http.StatusGone:
|
case code == http.StatusGone:
|
||||||
// 410 means the status has definitely been deleted.
|
// 410 means the status has definitely been deleted.
|
||||||
// Update this status to reflect that, then bail.
|
// Update this status to reflect that, then bail.
|
||||||
l.Debug("current status has been orphaned (call to parent returned code 410 Gone)")
|
l.Debug("orphaned status: parent returned 410 Gone")
|
||||||
|
|
||||||
current.InReplyToURI = ""
|
current.InReplyToURI = ""
|
||||||
if err := d.state.DB.UpdateStatus(
|
if err := d.state.DB.UpdateStatus(
|
||||||
|
@ -180,19 +197,19 @@ func (d *deref) DereferenceStatusAncestors(
|
||||||
); err != nil {
|
); err != nil {
|
||||||
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
|
return gtserror.Newf("db error updating status %s: %w", current.ID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
case code != 0:
|
case code != 0:
|
||||||
// We had a code, but not one indicating deletion,
|
// We had a code, but not one indicating deletion, log the code
|
||||||
// log the code but don't return error or update the
|
// but don't return error or update the status; we can try again later.
|
||||||
// status; we can try again later.
|
l.Warnf("orphaned status: http error dereferencing parent: %v)", err)
|
||||||
l.Warnf("cannot dereference parent (%q)", err)
|
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
case gtserror.Unretrievable(err):
|
case gtserror.Unretrievable(err):
|
||||||
// Not retrievable for some other reason, so just
|
// Not retrievable for some other reason, so just
|
||||||
// bail; we can try again later if necessary.
|
// bail for now; we can try again later if necessary.
|
||||||
l.Debugf("parent unretrievable (%q)", err)
|
l.Warnf("orphaned status: parent unretrievable: %v)", err)
|
||||||
return nil
|
return nil
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -205,35 +222,44 @@ func (d *deref) DereferenceStatusAncestors(
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
|
func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
|
||||||
// Take ref to original
|
statusIRIStr := statusIRI.String()
|
||||||
ogIRI := statusIRI
|
|
||||||
|
|
||||||
// Start log entry with fields
|
// Start log entry with fields
|
||||||
l := log.WithContext(ctx).
|
l := log.WithContext(ctx).
|
||||||
WithFields(kv.Fields{
|
WithFields(kv.Fields{
|
||||||
{"username", username},
|
{"username", username},
|
||||||
{"statusIRI", ogIRI},
|
{"status", statusIRIStr},
|
||||||
}...)
|
}...)
|
||||||
|
|
||||||
// Log function start
|
// Log function start
|
||||||
l.Trace("beginning")
|
l.Trace("beginning")
|
||||||
|
|
||||||
// frame represents a single stack frame when iteratively
|
// OUR instance hostname.
|
||||||
// dereferencing status descendants. where statusIRI and
|
localhost := config.GetHost()
|
||||||
// statusable are of the status whose children we are to
|
|
||||||
// descend, page is the current activity streams collection
|
// Keep track of already dereferenced collection
|
||||||
// page of entities we are on (as we often push a frame to
|
// pages for this thread to prevent recursion.
|
||||||
// stack mid-paging), and item___ are entity iterators for
|
derefdPages := make(map[string]struct{}, 10)
|
||||||
// this activity streams collection page.
|
|
||||||
|
// frame represents a single stack frame when
|
||||||
|
// iteratively derefencing status descendants.
|
||||||
type frame struct {
|
type frame struct {
|
||||||
statusIRI *url.URL
|
// page is the current activity streams
|
||||||
statusable ap.Statusable
|
// collection page we are on (as we often
|
||||||
page ap.CollectionPageable
|
// push a frame to stack mid-paging).
|
||||||
itemIter vocab.ActivityStreamsItemsPropertyIterator
|
page ap.CollectionPageable
|
||||||
|
|
||||||
|
// pageURI is the URI string of
|
||||||
|
// the frame's collection page
|
||||||
|
// (is useful for logging).
|
||||||
|
pageURI string
|
||||||
|
|
||||||
|
// items is the entity iterator for frame's page.
|
||||||
|
items vocab.ActivityStreamsItemsPropertyIterator
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// current is the current stack frame
|
// current stack frame
|
||||||
current *frame
|
current *frame
|
||||||
|
|
||||||
// stack is a list of "shelved" descendand iterator
|
// stack is a list of "shelved" descendand iterator
|
||||||
|
@ -242,11 +268,14 @@ func (d *deref) DereferenceStatusDescendants(ctx context.Context, username strin
|
||||||
// popped from into 'current' when that child's tree
|
// popped from into 'current' when that child's tree
|
||||||
// of further descendants is exhausted.
|
// of further descendants is exhausted.
|
||||||
stack = []*frame{
|
stack = []*frame{
|
||||||
{
|
func() *frame {
|
||||||
// Starting input is first frame
|
// Start input frame is built from the first input.
|
||||||
statusIRI: statusIRI,
|
page, pageURI := getAttachedStatusCollection(parent)
|
||||||
statusable: parent,
|
if page == nil {
|
||||||
},
|
return nil
|
||||||
|
}
|
||||||
|
return &frame{page: page, pageURI: pageURI}
|
||||||
|
}(),
|
||||||
}
|
}
|
||||||
|
|
||||||
// popStack will remove and return the top frame
|
// popStack will remove and return the top frame
|
||||||
|
@ -274,42 +303,9 @@ stackLoop:
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if current.page == nil {
|
|
||||||
if current.statusIRI.Host == config.GetHost() {
|
|
||||||
// This is a local status, no looping to do
|
|
||||||
continue stackLoop
|
|
||||||
}
|
|
||||||
|
|
||||||
l.Tracef("following remote status descendants: %s", current.statusIRI)
|
|
||||||
|
|
||||||
// Look for an attached status replies (as collection)
|
|
||||||
replies := current.statusable.GetActivityStreamsReplies()
|
|
||||||
if replies == nil {
|
|
||||||
continue stackLoop
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the status replies collection
|
|
||||||
collection := replies.GetActivityStreamsCollection()
|
|
||||||
if collection == nil {
|
|
||||||
continue stackLoop
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the "first" property of the replies collection
|
|
||||||
first := collection.GetActivityStreamsFirst()
|
|
||||||
if first == nil {
|
|
||||||
continue stackLoop
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set the first activity stream collection page
|
|
||||||
current.page = first.GetActivityStreamsCollectionPage()
|
|
||||||
if current.page == nil {
|
|
||||||
continue stackLoop
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pageLoop:
|
pageLoop:
|
||||||
for {
|
for {
|
||||||
if current.itemIter == nil {
|
if current.items == nil {
|
||||||
// Get the items associated with this page
|
// Get the items associated with this page
|
||||||
items := current.page.GetActivityStreamsItems()
|
items := current.page.GetActivityStreamsItems()
|
||||||
if items == nil {
|
if items == nil {
|
||||||
|
@ -317,21 +313,23 @@ stackLoop:
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start off the item iterator
|
// Start off the item iterator
|
||||||
current.itemIter = items.Begin()
|
current.items = items.Begin()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
l.Tracef("following collection page: %s", current.pageURI)
|
||||||
|
|
||||||
itemLoop:
|
itemLoop:
|
||||||
for {
|
for {
|
||||||
// Check for remaining iter
|
// Check for remaining iter
|
||||||
if current.itemIter == nil {
|
if current.items == nil {
|
||||||
break itemLoop
|
break itemLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get current item iterator
|
// Get current item iterator
|
||||||
itemIter := current.itemIter
|
itemIter := current.items
|
||||||
|
|
||||||
// Set the next available iterator
|
// Set the next available iterator
|
||||||
current.itemIter = itemIter.Next()
|
current.items = itemIter.Next()
|
||||||
|
|
||||||
// Check for available IRI on item
|
// Check for available IRI on item
|
||||||
itemIRI, _ := pub.ToId(itemIter)
|
itemIRI, _ := pub.ToId(itemIter)
|
||||||
|
@ -339,76 +337,123 @@ stackLoop:
|
||||||
continue itemLoop
|
continue itemLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
if itemIRI.Host == config.GetHost() {
|
if itemIRI.Host == localhost {
|
||||||
// This child is one of ours,
|
// This child is one of ours,
|
||||||
continue itemLoop
|
continue itemLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dereference the remote status and store in the database.
|
// Dereference the remote status and store in the database.
|
||||||
// getStatusByURI guards against the following conditions:
|
// getStatusByURI guards against the following conditions:
|
||||||
//
|
// - refetching recently fetched statuses (recursion!)
|
||||||
// - remote domain is blocked (will return unretrievable)
|
// - remote domain is blocked (will return unretrievable)
|
||||||
// - domain is local (will try to return something, or
|
// - any http type error for a new status returns unretrievable
|
||||||
// return unretrievable).
|
|
||||||
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
|
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if !gtserror.Unretrievable(err) {
|
if !gtserror.Unretrievable(err) {
|
||||||
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
|
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
continue itemLoop
|
continue itemLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
if statusable == nil {
|
if statusable == nil {
|
||||||
// Already up-to-date.
|
// A nil statusable return from
|
||||||
|
// getStatusByURI() indicates a
|
||||||
|
// remote status that was already
|
||||||
|
// dereferenced recently (so no
|
||||||
|
// need to go through descendents).
|
||||||
|
continue itemLoop
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract any attached collection + URI from status.
|
||||||
|
page, pageURI := getAttachedStatusCollection(statusable)
|
||||||
|
if page == nil {
|
||||||
continue itemLoop
|
continue itemLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Put current and next frame at top of stack
|
// Put current and next frame at top of stack
|
||||||
stack = append(stack, current, &frame{
|
stack = append(stack, current, &frame{
|
||||||
statusIRI: itemIRI,
|
pageURI: pageURI,
|
||||||
statusable: statusable,
|
page: page,
|
||||||
})
|
})
|
||||||
|
|
||||||
// Now start at top of loop
|
// Now start at top of loop
|
||||||
continue stackLoop
|
continue stackLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the current page's "next" property
|
// Get the current page's "next" property.
|
||||||
pageNext := current.page.GetActivityStreamsNext()
|
pageNext := current.page.GetActivityStreamsNext()
|
||||||
if pageNext == nil || !pageNext.IsIRI() {
|
if pageNext == nil || !pageNext.IsIRI() {
|
||||||
continue stackLoop
|
continue stackLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the IRI of the "next" property.
|
// Get the IRI of the "next" property.
|
||||||
pageNextIRI := pageNext.GetIRI()
|
pageNextURI := pageNext.GetIRI()
|
||||||
|
pageNextURIStr := pageNextURI.String()
|
||||||
|
|
||||||
// Ensure this isn't a self-referencing page...
|
// Check whether this page has already been deref'd.
|
||||||
// We don't need to store / check against a map of IRIs
|
if _, ok := derefdPages[pageNextURIStr]; ok {
|
||||||
// as our getStatusByIRI() function above prevents iter'ing
|
l.Warnf("self referencing collection page(s): %s", pageNextURIStr)
|
||||||
// over statuses that have been dereferenced recently, due to
|
|
||||||
// the `fetched_at` field preventing frequent refetches.
|
|
||||||
if id := current.page.GetJSONLDId(); id != nil &&
|
|
||||||
pageNextIRI.String() == id.Get().String() {
|
|
||||||
log.Warnf(ctx, "self referencing collection page: %s", pageNextIRI)
|
|
||||||
continue stackLoop
|
continue stackLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dereference this next collection page by its IRI
|
// Mark this collection page as deref'd.
|
||||||
|
derefdPages[pageNextURIStr] = struct{}{}
|
||||||
|
|
||||||
|
// Dereference this next collection page by its IRI.
|
||||||
collectionPage, err := d.dereferenceCollectionPage(ctx,
|
collectionPage, err := d.dereferenceCollectionPage(ctx,
|
||||||
username,
|
username,
|
||||||
pageNextIRI,
|
pageNextURI,
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err)
|
l.Errorf("error dereferencing collection page %q: %s", pageNextURIStr, err)
|
||||||
continue stackLoop
|
continue stackLoop
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the updated collection page
|
// Set the next collection page.
|
||||||
current.page = collectionPage
|
current.page = collectionPage
|
||||||
|
current.pageURI = pageNextURIStr
|
||||||
continue pageLoop
|
continue pageLoop
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return gtserror.Newf("reached %d descendant iterations for %q", maxIter, ogIRI.String())
|
return gtserror.Newf("reached %d descendant iterations for %q", maxIter, statusIRIStr)
|
||||||
|
}
|
||||||
|
|
||||||
|
// getAttachedStatusCollection is a small utility function to fetch the first page
|
||||||
|
// of an attached activity streams collection from a provided statusable object .
|
||||||
|
func getAttachedStatusCollection(status ap.Statusable) (page ap.CollectionPageable, uri string) { //nolint:gocritic
|
||||||
|
// Look for an attached status replies (as collection)
|
||||||
|
replies := status.GetActivityStreamsReplies()
|
||||||
|
if replies == nil {
|
||||||
|
return nil, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the status replies collection
|
||||||
|
collection := replies.GetActivityStreamsCollection()
|
||||||
|
if collection == nil {
|
||||||
|
return nil, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the "first" property of the replies collection
|
||||||
|
first := collection.GetActivityStreamsFirst()
|
||||||
|
if first == nil {
|
||||||
|
return nil, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the first activity stream collection page
|
||||||
|
page = first.GetActivityStreamsCollectionPage()
|
||||||
|
if page == nil {
|
||||||
|
return nil, ""
|
||||||
|
}
|
||||||
|
|
||||||
|
if pageID := page.GetJSONLDId(); pageID != nil {
|
||||||
|
// By default use collection JSONLD ID
|
||||||
|
return page, pageID.Get().String()
|
||||||
|
} else if statusID := status.GetJSONLDId(); statusID != nil {
|
||||||
|
// Else, if possible use status JSONLD ID
|
||||||
|
return page, statusID.Get().String()
|
||||||
|
} else {
|
||||||
|
// MUST have some kind of ID
|
||||||
|
return nil, ""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue