[bugfix] fix checks for deref the same status descendants / ascendants (#2181)

This commit is contained in:
kim 2023-09-05 11:22:02 +01:00 committed by GitHub
parent 9f2199f9a9
commit 916c6d07ba
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -56,9 +56,20 @@ func (d *deref) DereferenceStatusAncestors(
username string, username string,
status *gtsmodel.Status, status *gtsmodel.Status,
) error { ) error {
// Start log entry with fields
l := log.WithContext(ctx).
WithFields(kv.Fields{
{"username", username},
{"original", status.URI},
}...)
// Keep track of already dereferenced statuses
// for this ancestor thread to prevent recursion.
derefdStatuses := make(map[string]struct{}, 10)
// Mark given status as the one // Mark given status as the one
// we're currently working on. // we're currently working on.
var current = status current := status
for i := 0; i < maxIter; i++ { for i := 0; i < maxIter; i++ {
if current.InReplyToURI == "" { if current.InReplyToURI == "" {
@ -67,14 +78,21 @@ func (d *deref) DereferenceStatusAncestors(
return nil return nil
} }
l := log. // Add new log fields for this iteration.
WithContext(ctx). l = l.WithFields(kv.Fields{
WithFields(kv.Fields{ {"current", current.URI},
{"username", username}, {"parent", current.InReplyToURI},
{"originalStatusIRI", status.URI}, }...)
{"currentStatusURI", current.URI}, l.Trace("following status ancestors")
{"currentInReplyToURI", current.InReplyToURI},
}...) // Check whether this parent has already been deref'd.
if _, ok := derefdStatuses[current.InReplyToURI]; ok {
l.Warn("self referencing status ancestors")
return nil
}
// Add this status URI to map of deref'd.
derefdStatuses[current.URI] = struct{}{}
if current.InReplyToID != "" { if current.InReplyToID != "" {
// We already have an InReplyToID set. This means // We already have an InReplyToID set. This means
@ -123,7 +141,7 @@ func (d *deref) DereferenceStatusAncestors(
// by another action. // by another action.
// //
// TODO: clean this up in a nightly task. // TODO: clean this up in a nightly task.
l.Warnf("current status has been orphaned (parent %s no longer exists in database)", current.InReplyToID) l.Warn("orphaned status (parent no longer exists)")
return nil // Cannot iterate further. return nil // Cannot iterate further.
} }
@ -134,16 +152,15 @@ func (d *deref) DereferenceStatusAncestors(
inReplyToURI, err := url.Parse(current.InReplyToURI) inReplyToURI, err := url.Parse(current.InReplyToURI)
if err != nil || inReplyToURI == nil { if err != nil || inReplyToURI == nil {
// Parent URI is not something we can handle. // Parent URI is not something we can handle.
l.Debug("current status has been orphaned (invalid InReplyToURI)") l.Warn("orphaned status (invalid InReplyToURI)")
return nil //nolint:nilerr return nil //nolint:nilerr
} }
// Parent URI is valid, try to get it. // Parent URI is valid, try to get it.
// getStatusByURI guards against the following conditions: // getStatusByURI guards against the following conditions:
// // - refetching recently fetched statuses (recursion!)
// - remote domain is blocked (will return unretrievable) // - remote domain is blocked (will return unretrievable)
// - domain is local (will try to return something, or // - any http type error for a new status returns unretrievable
// return unretrievable).
parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI) parent, _, err := d.getStatusByURI(ctx, username, inReplyToURI)
if err == nil { if err == nil {
// We successfully fetched the parent. // We successfully fetched the parent.
@ -171,7 +188,7 @@ func (d *deref) DereferenceStatusAncestors(
case code == http.StatusGone: case code == http.StatusGone:
// 410 means the status has definitely been deleted. // 410 means the status has definitely been deleted.
// Update this status to reflect that, then bail. // Update this status to reflect that, then bail.
l.Debug("current status has been orphaned (call to parent returned code 410 Gone)") l.Debug("orphaned status: parent returned 410 Gone")
current.InReplyToURI = "" current.InReplyToURI = ""
if err := d.state.DB.UpdateStatus( if err := d.state.DB.UpdateStatus(
@ -180,19 +197,19 @@ func (d *deref) DereferenceStatusAncestors(
); err != nil { ); err != nil {
return gtserror.Newf("db error updating status %s: %w", current.ID, err) return gtserror.Newf("db error updating status %s: %w", current.ID, err)
} }
return nil return nil
case code != 0: case code != 0:
// We had a code, but not one indicating deletion, // We had a code, but not one indicating deletion, log the code
// log the code but don't return error or update the // but don't return error or update the status; we can try again later.
// status; we can try again later. l.Warnf("orphaned status: http error dereferencing parent: %v)", err)
l.Warnf("cannot dereference parent (%q)", err)
return nil return nil
case gtserror.Unretrievable(err): case gtserror.Unretrievable(err):
// Not retrievable for some other reason, so just // Not retrievable for some other reason, so just
// bail; we can try again later if necessary. // bail for now; we can try again later if necessary.
l.Debugf("parent unretrievable (%q)", err) l.Warnf("orphaned status: parent unretrievable: %v)", err)
return nil return nil
default: default:
@ -205,35 +222,44 @@ func (d *deref) DereferenceStatusAncestors(
} }
func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error { func (d *deref) DereferenceStatusDescendants(ctx context.Context, username string, statusIRI *url.URL, parent ap.Statusable) error {
// Take ref to original statusIRIStr := statusIRI.String()
ogIRI := statusIRI
// Start log entry with fields // Start log entry with fields
l := log.WithContext(ctx). l := log.WithContext(ctx).
WithFields(kv.Fields{ WithFields(kv.Fields{
{"username", username}, {"username", username},
{"statusIRI", ogIRI}, {"status", statusIRIStr},
}...) }...)
// Log function start // Log function start
l.Trace("beginning") l.Trace("beginning")
// frame represents a single stack frame when iteratively // OUR instance hostname.
// dereferencing status descendants. where statusIRI and localhost := config.GetHost()
// statusable are of the status whose children we are to
// descend, page is the current activity streams collection // Keep track of already dereferenced collection
// page of entities we are on (as we often push a frame to // pages for this thread to prevent recursion.
// stack mid-paging), and item___ are entity iterators for derefdPages := make(map[string]struct{}, 10)
// this activity streams collection page.
// frame represents a single stack frame when
// iteratively derefencing status descendants.
type frame struct { type frame struct {
statusIRI *url.URL // page is the current activity streams
statusable ap.Statusable // collection page we are on (as we often
page ap.CollectionPageable // push a frame to stack mid-paging).
itemIter vocab.ActivityStreamsItemsPropertyIterator page ap.CollectionPageable
// pageURI is the URI string of
// the frame's collection page
// (is useful for logging).
pageURI string
// items is the entity iterator for frame's page.
items vocab.ActivityStreamsItemsPropertyIterator
} }
var ( var (
// current is the current stack frame // current stack frame
current *frame current *frame
// stack is a list of "shelved" descendand iterator // stack is a list of "shelved" descendand iterator
@ -242,11 +268,14 @@ func (d *deref) DereferenceStatusDescendants(ctx context.Context, username strin
// popped from into 'current' when that child's tree // popped from into 'current' when that child's tree
// of further descendants is exhausted. // of further descendants is exhausted.
stack = []*frame{ stack = []*frame{
{ func() *frame {
// Starting input is first frame // Start input frame is built from the first input.
statusIRI: statusIRI, page, pageURI := getAttachedStatusCollection(parent)
statusable: parent, if page == nil {
}, return nil
}
return &frame{page: page, pageURI: pageURI}
}(),
} }
// popStack will remove and return the top frame // popStack will remove and return the top frame
@ -274,42 +303,9 @@ stackLoop:
return nil return nil
} }
if current.page == nil {
if current.statusIRI.Host == config.GetHost() {
// This is a local status, no looping to do
continue stackLoop
}
l.Tracef("following remote status descendants: %s", current.statusIRI)
// Look for an attached status replies (as collection)
replies := current.statusable.GetActivityStreamsReplies()
if replies == nil {
continue stackLoop
}
// Get the status replies collection
collection := replies.GetActivityStreamsCollection()
if collection == nil {
continue stackLoop
}
// Get the "first" property of the replies collection
first := collection.GetActivityStreamsFirst()
if first == nil {
continue stackLoop
}
// Set the first activity stream collection page
current.page = first.GetActivityStreamsCollectionPage()
if current.page == nil {
continue stackLoop
}
}
pageLoop: pageLoop:
for { for {
if current.itemIter == nil { if current.items == nil {
// Get the items associated with this page // Get the items associated with this page
items := current.page.GetActivityStreamsItems() items := current.page.GetActivityStreamsItems()
if items == nil { if items == nil {
@ -317,21 +313,23 @@ stackLoop:
} }
// Start off the item iterator // Start off the item iterator
current.itemIter = items.Begin() current.items = items.Begin()
} }
l.Tracef("following collection page: %s", current.pageURI)
itemLoop: itemLoop:
for { for {
// Check for remaining iter // Check for remaining iter
if current.itemIter == nil { if current.items == nil {
break itemLoop break itemLoop
} }
// Get current item iterator // Get current item iterator
itemIter := current.itemIter itemIter := current.items
// Set the next available iterator // Set the next available iterator
current.itemIter = itemIter.Next() current.items = itemIter.Next()
// Check for available IRI on item // Check for available IRI on item
itemIRI, _ := pub.ToId(itemIter) itemIRI, _ := pub.ToId(itemIter)
@ -339,76 +337,123 @@ stackLoop:
continue itemLoop continue itemLoop
} }
if itemIRI.Host == config.GetHost() { if itemIRI.Host == localhost {
// This child is one of ours, // This child is one of ours,
continue itemLoop continue itemLoop
} }
// Dereference the remote status and store in the database. // Dereference the remote status and store in the database.
// getStatusByURI guards against the following conditions: // getStatusByURI guards against the following conditions:
// // - refetching recently fetched statuses (recursion!)
// - remote domain is blocked (will return unretrievable) // - remote domain is blocked (will return unretrievable)
// - domain is local (will try to return something, or // - any http type error for a new status returns unretrievable
// return unretrievable).
_, statusable, err := d.getStatusByURI(ctx, username, itemIRI) _, statusable, err := d.getStatusByURI(ctx, username, itemIRI)
if err != nil { if err != nil {
if !gtserror.Unretrievable(err) { if !gtserror.Unretrievable(err) {
l.Errorf("error dereferencing remote status %s: %v", itemIRI, err) l.Errorf("error dereferencing remote status %s: %v", itemIRI, err)
} }
continue itemLoop continue itemLoop
} }
if statusable == nil { if statusable == nil {
// Already up-to-date. // A nil statusable return from
// getStatusByURI() indicates a
// remote status that was already
// dereferenced recently (so no
// need to go through descendents).
continue itemLoop
}
// Extract any attached collection + URI from status.
page, pageURI := getAttachedStatusCollection(statusable)
if page == nil {
continue itemLoop continue itemLoop
} }
// Put current and next frame at top of stack // Put current and next frame at top of stack
stack = append(stack, current, &frame{ stack = append(stack, current, &frame{
statusIRI: itemIRI, pageURI: pageURI,
statusable: statusable, page: page,
}) })
// Now start at top of loop // Now start at top of loop
continue stackLoop continue stackLoop
} }
// Get the current page's "next" property // Get the current page's "next" property.
pageNext := current.page.GetActivityStreamsNext() pageNext := current.page.GetActivityStreamsNext()
if pageNext == nil || !pageNext.IsIRI() { if pageNext == nil || !pageNext.IsIRI() {
continue stackLoop continue stackLoop
} }
// Get the IRI of the "next" property. // Get the IRI of the "next" property.
pageNextIRI := pageNext.GetIRI() pageNextURI := pageNext.GetIRI()
pageNextURIStr := pageNextURI.String()
// Ensure this isn't a self-referencing page... // Check whether this page has already been deref'd.
// We don't need to store / check against a map of IRIs if _, ok := derefdPages[pageNextURIStr]; ok {
// as our getStatusByIRI() function above prevents iter'ing l.Warnf("self referencing collection page(s): %s", pageNextURIStr)
// over statuses that have been dereferenced recently, due to
// the `fetched_at` field preventing frequent refetches.
if id := current.page.GetJSONLDId(); id != nil &&
pageNextIRI.String() == id.Get().String() {
log.Warnf(ctx, "self referencing collection page: %s", pageNextIRI)
continue stackLoop continue stackLoop
} }
// Dereference this next collection page by its IRI // Mark this collection page as deref'd.
derefdPages[pageNextURIStr] = struct{}{}
// Dereference this next collection page by its IRI.
collectionPage, err := d.dereferenceCollectionPage(ctx, collectionPage, err := d.dereferenceCollectionPage(ctx,
username, username,
pageNextIRI, pageNextURI,
) )
if err != nil { if err != nil {
l.Errorf("error dereferencing remote collection page %q: %s", pageNextIRI.String(), err) l.Errorf("error dereferencing collection page %q: %s", pageNextURIStr, err)
continue stackLoop continue stackLoop
} }
// Set the updated collection page // Set the next collection page.
current.page = collectionPage current.page = collectionPage
current.pageURI = pageNextURIStr
continue pageLoop continue pageLoop
} }
} }
return gtserror.Newf("reached %d descendant iterations for %q", maxIter, ogIRI.String()) return gtserror.Newf("reached %d descendant iterations for %q", maxIter, statusIRIStr)
}
// getAttachedStatusCollection is a small utility function to fetch the first page
// of an attached activity streams collection from a provided statusable object .
func getAttachedStatusCollection(status ap.Statusable) (page ap.CollectionPageable, uri string) { //nolint:gocritic
// Look for an attached status replies (as collection)
replies := status.GetActivityStreamsReplies()
if replies == nil {
return nil, ""
}
// Get the status replies collection
collection := replies.GetActivityStreamsCollection()
if collection == nil {
return nil, ""
}
// Get the "first" property of the replies collection
first := collection.GetActivityStreamsFirst()
if first == nil {
return nil, ""
}
// Return the first activity stream collection page
page = first.GetActivityStreamsCollectionPage()
if page == nil {
return nil, ""
}
if pageID := page.GetJSONLDId(); pageID != nil {
// By default use collection JSONLD ID
return page, pageID.Get().String()
} else if statusID := status.GetJSONLDId(); statusID != nil {
// Else, if possible use status JSONLD ID
return page, statusID.Get().String()
} else {
// MUST have some kind of ID
return nil, ""
}
} }