only use domain part of uri for hash

This commit is contained in:
Nick Sweeting 2024-05-13 07:49:36 -07:00
parent 206e7e74b3
commit 1ba8215072
No known key found for this signature in database

View file

@ -3,6 +3,7 @@ from typing import NamedTuple, Any, Union, Optional
import ulid import ulid
import uuid6 import uuid6
import hashlib import hashlib
from urllib.parse import urlparse
from uuid import UUID from uuid import UUID
from typeid import TypeID # type: ignore[import-untyped] from typeid import TypeID # type: ignore[import-untyped]
@ -100,10 +101,20 @@ def uri_hash(uri: Union[str, bytes]) -> str:
""" """
'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25' 'E4A5CCD9AF4ED2A6E0954DF19FD274E9CDDB4853051F033FD518BFC90AA1AC25'
""" """
if isinstance(uri, str): if isinstance(uri, bytes):
uri = uri.encode('utf-8') uri_str: str = uri.decode()
else:
uri_str = uri
return hashlib.sha256(uri).hexdigest().upper() # only hash the domain part of URLs
if '://' in uri_str:
domain = urlparse(uri_str).host
if domain:
url_str = domain
uri_bytes = uri_str.encode('utf-8')
return hashlib.sha256(uri_bytes).hexdigest().upper()
def abid_part_from_prefix(prefix: Optional[str]) -> str: def abid_part_from_prefix(prefix: Optional[str]) -> str:
""" """