MIsskey-ebooks-bot/roboduck.py

396 lines
13 KiB
Python
Raw Normal View History

2022-01-08 17:25:25 +00:00
import requests
import json
import os
import sys
import regex
2022-01-08 17:25:25 +00:00
import configparser
import markovify
import sqlite3
from pathlib import Path
from datetime import *
from time import sleep
2022-01-08 17:25:25 +00:00
2022-02-20 02:15:27 +00:00
2022-01-08 17:25:25 +00:00
def check_str_to_bool(text) -> bool:
if (text == "True" or text == "true" or text == "TRUE"):
return True
elif (text == "False" or text == "false" or text == "FALSE"):
return False
else:
return True
2022-02-03 22:57:53 +00:00
def get_user_id(username, instance):
url = "https://" + instance + "/api/users/show"
try:
req = requests.post(url, json={"username": username, "host": instance})
req.raise_for_status()
except requests.exceptions.HTTPError as err:
print("Couldn't get Username! " + str(err))
sys.exit(1)
return req.json()["id"]
2022-02-20 02:15:27 +00:00
def get_notes(**kwargs):
noteid = "k"
sinceid = ""
min_notes = 0
notesList = []
returnList = []
username = kwargs["username"]
instance = kwargs["instance"]
2022-02-03 22:57:53 +00:00
print("Reading notes for @" + username + "@" + instance + ".")
if (kwargs):
if ("min_notes" in kwargs):
2022-02-20 02:15:27 +00:00
# print("min_notes found!")
init = True
min_notes = kwargs["min_notes"]
2022-02-03 22:57:53 +00:00
elif ("lastnote" in kwargs):
2022-02-20 02:15:27 +00:00
# print("Lastnote found!")
init = False
sinceid = kwargs["lastnote"]
2022-02-03 22:57:53 +00:00
else:
print("Wrong arguments given!")
print("Exiting routine!")
return
else:
print("No arguments given!")
print("Exiting routine")
return None
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
# Load configuration
2022-01-08 17:25:25 +00:00
config = configparser.ConfigParser()
config.read(os.path.join(os.path.dirname(__file__), 'bot.cfg'))
2022-02-20 02:15:27 +00:00
# print(os.path.join(os.path.dirname(__file__), 'bot.cfg'))
2022-02-03 22:57:53 +00:00
userid = get_user_id(username, instance)
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
# Read & Sanitize Inputs from Config File
2022-01-08 17:25:25 +00:00
try:
2022-02-20 02:15:27 +00:00
includeReplies = check_str_to_bool(config.get("markov", "includeReplies"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
includeReplies = True
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
try:
2022-02-20 02:15:27 +00:00
includeMyRenotes = check_str_to_bool(config.get("markov", "includeMyRenotes"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
includeMyRenotes = False
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
try:
2022-02-20 02:15:27 +00:00
excludeNsfw = check_str_to_bool(config.get("markov", "excludeNsfw"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
excludeNsfw = True
2022-02-03 22:57:53 +00:00
run = True
2022-02-20 02:15:27 +00:00
oldnote = ""
2022-02-03 22:57:53 +00:00
while run:
2022-02-03 22:57:53 +00:00
if ((init and len(notesList) >= min_notes) or (oldnote == noteid)):
break
2022-02-03 22:57:53 +00:00
try:
req = requests.post("https://" + instance + "/api/users/notes", json={
2022-02-20 02:15:27 +00:00
"userId": userid,
"includeReplies": includeReplies,
"limit": 100,
"includeMyRenotes": includeMyRenotes,
"withFiles": False,
"excludeNsfw": excludeNsfw,
"untilId": noteid,
"sinceId": sinceid
})
2022-01-08 17:25:25 +00:00
req.raise_for_status()
except requests.exceptions.HTTPError as err:
2022-02-20 02:15:27 +00:00
print("Couldn't get Posts! " + str(err))
2022-01-08 17:25:25 +00:00
sys.exit(1)
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
for jsonObj in req.json():
2022-02-03 22:57:53 +00:00
notesList.append(jsonObj)
if (len(notesList) == 0):
print("No new notes to load!")
return []
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
oldnote = noteid
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
noteid = notesList[len(notesList) - 1]["id"]
2022-02-03 22:57:53 +00:00
print(str(len(notesList)) + " Notes read.")
print("Processing notes...")
2022-02-03 22:57:53 +00:00
for element in notesList:
lastTime = element["createdAt"]
2022-02-20 02:15:27 +00:00
lastTimestamp = int(datetime.timestamp(datetime.strptime(lastTime, '%Y-%m-%dT%H:%M:%S.%f%z')) * 1000)
2022-02-03 22:57:53 +00:00
content = element["text"]
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
if content is None: # Skips empty notes (I don't know how there could be empty notes)
2022-01-24 19:41:56 +00:00
continue
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
content = regex.sub(r"(?>@(?>[\w\-])+)(?>@(?>[\w\-\.])+)?", '',
content) # Remove instance name with regular expression
content = content.replace("::", ": :") # Break long emoji chains
content = content.replace("@", "@" + chr(8203))
2022-02-03 22:57:53 +00:00
dict = {"id": element["id"], "text": content, "timestamp": lastTimestamp, "user_id": userid }
returnList.append(dict)
2022-02-03 22:57:53 +00:00
return returnList
2022-02-20 02:15:27 +00:00
def calculate_markov_chain():
text = ""
2022-02-20 02:15:27 +00:00
# Load configuration
config = configparser.ConfigParser()
config.read((Path(__file__).parent).joinpath('bot.cfg'))
try:
2022-02-20 02:15:27 +00:00
max_notes = config.get("markov", "max_notes")
except (TypeError, ValueError) as err:
max_notes = "10000"
databasepath = (Path(__file__).parent).joinpath('roboduck.db')
2022-02-03 22:16:27 +00:00
if (not (os.path.exists(databasepath) and os.stat(databasepath).st_size != 0)):
print("Roboduck database not already created!")
print("Exit initialization!")
sys.exit(0)
2022-02-03 22:57:53 +00:00
with open(databasepath, 'r', encoding='utf-8') as emojilist:
database = sqlite3.connect(databasepath)
2022-02-03 22:57:53 +00:00
data = database.cursor()
2022-02-06 15:34:44 +00:00
data.execute("SELECT text FROM notes ORDER BY timestamp DESC LIMIT " + max_notes + ";")
2022-02-03 22:57:53 +00:00
rows = data.fetchall()
2022-02-03 22:57:53 +00:00
for row in rows:
text += row[0] + "\n"
2022-02-03 22:57:53 +00:00
markovchain = markovify.Text(text)
2022-02-20 02:15:27 +00:00
markovchain.compile(inplace=True)
2022-02-03 22:57:53 +00:00
markov_json = markovchain.to_json()
2022-02-03 22:57:53 +00:00
with open((Path(__file__).parent).joinpath('markov.json'), "w", encoding="utf-8") as markov:
json.dump(markov_json, markov)
2022-01-08 17:25:25 +00:00
2022-02-20 02:15:27 +00:00
def clean_database():
databasepath = (Path(__file__).parent).joinpath('roboduck.db')
2022-02-03 22:16:27 +00:00
if (not (os.path.exists(databasepath) and os.stat(databasepath).st_size != 0)):
print("No database found!")
2022-02-06 15:34:44 +00:00
print("Please run Bot first!")
sys.exit(0)
2022-02-03 22:57:53 +00:00
with open(databasepath, "a", encoding="utf-8") as f:
database = sqlite3.connect(databasepath)
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
# Reading config file bot.cfg with config parser
config = configparser.ConfigParser()
config.read((Path(__file__).parent).joinpath('bot.cfg'))
2022-02-20 02:15:27 +00:00
# print((Path(__file__).parent).joinpath('bot.cfg'))
try:
2022-02-20 02:15:27 +00:00
max_notes = config.get("markov", "max_notes")
except (TypeError, ValueError) as err:
max_notes = "10000"
2022-02-03 22:57:53 +00:00
2022-03-11 16:16:15 +00:00
for user in config.get("misskey", "users").split(";"):
username = user.split("@")[1]
instance = user.split("@")[2]
userid = get_user_id(username, instance)
data = database.cursor()
data.execute("DELETE FROM notes WHERE user_id=:user_id AND id NOT IN (SELECT id FROM notes WHERE user_id=:user_id ORDER BY timestamp DESC LIMIT :max );", {"user_id": userid, "max": int(max_notes)})
2022-02-03 22:57:53 +00:00
database.commit()
database.close()
2022-02-20 02:15:27 +00:00
def create_sentence():
with open((os.path.join((Path(__file__).parent), 'markov.json')), "r", encoding="utf-8") as markov:
markov_json = json.load(markov)
2022-02-03 22:57:53 +00:00
text_model = markovify.Text.from_json(markov_json)
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
note = ""
2022-01-24 19:41:56 +00:00
2022-02-20 02:15:27 +00:00
# Reading config file bot.cfg with config parser
2022-01-08 17:25:25 +00:00
config = configparser.ConfigParser()
config.read((Path(__file__).parent).joinpath('bot.cfg'))
2022-02-20 02:15:27 +00:00
# print((Path(__file__).parent).joinpath('bot.cfg'))
# Read & Sanitize Inputs
2022-01-08 17:25:25 +00:00
try:
2022-02-20 02:15:27 +00:00
test_output = check_str_to_bool(config.get("markov", "test_output"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
2022-02-20 02:15:27 +00:00
# print("test_output: " + str(err))
2022-01-08 17:25:25 +00:00
test_output = True
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
if (test_output):
try:
2022-02-20 02:15:27 +00:00
tries = int(config.get("markov", "tries"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
2022-02-20 02:15:27 +00:00
# print("tries: " + str(err))
2022-01-08 17:25:25 +00:00
tries = 250
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
try:
2022-02-20 02:15:27 +00:00
max_overlap_ratio = float(config.get("markov", "max_overlap_ratio"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
2022-02-20 02:15:27 +00:00
# print("max_overlap_ratio: " + str(err))
2022-01-08 17:25:25 +00:00
max_overlap_ratio = 0.7
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
try:
2022-02-20 02:15:27 +00:00
max_overlap_total = int(config.get("markov", "max_overlap_total"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
2022-02-20 02:15:27 +00:00
# print("max_overlap_total: " + str(err))
2022-01-08 17:25:25 +00:00
max_overlap_total = 10
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
try:
2022-02-20 02:15:27 +00:00
max_words = int(config.get("markov", "max_words"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
2022-02-20 02:15:27 +00:00
# print("max_words: " + str(err))
2022-01-08 17:25:25 +00:00
max_words = None
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
try:
2022-02-20 02:15:27 +00:00
min_words = int(config.get("markov", "min_words"))
2022-01-08 17:25:25 +00:00
except (TypeError, ValueError) as err:
2022-02-20 02:15:27 +00:00
# print("min_words: " + str(err))
2022-01-08 17:25:25 +00:00
min_words = None
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
if (max_words is not None and min_words is not None):
if (min_words >= max_words):
2022-02-20 02:15:27 +00:00
# print("min_words ("+str(min_words)+") bigger than max_words ("+str(max_words)+")! Swapping values!")
2022-01-08 17:25:25 +00:00
swap = min_words
min_words = max_words
max_words = swap
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
else:
tries = 250
max_overlap_ratio = 0.7
max_overlap_total = 15
max_words = None
min_words = None
2022-02-03 22:57:53 +00:00
2022-01-08 17:25:25 +00:00
"""
#Debug section to rpint the used values
print("These values are used:")
print("test_output: " + str(test_output))
print("tries: " + str(tries))
print("max_overlap_ratio: " + str(max_overlap_ratio))
print("max_overlap_total: " + str(max_overlap_total))
print("max_words: " + str(max_words))
print("min_words: " + str(min_words))
"""
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
# Applying Inputs
2022-01-08 17:25:25 +00:00
note = text_model.make_sentence(
2022-02-20 02:15:27 +00:00
test_output=test_output,
tries=tries,
max_overlap_ratio=max_overlap_ratio,
max_overlap_total=max_overlap_total,
max_words=max_words,
min_words=min_words
)
if (note is not None):
return note
else:
return "Error in markov chain sentence creation: Couldn't calculate sentence!\n\n☹ Please try again! "
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
def update():
notesList = []
databasepath = (Path(__file__).parent).joinpath('roboduck.db')
2022-02-03 22:16:27 +00:00
if (not (os.path.exists(databasepath) and os.stat(databasepath).st_size != 0)):
print("No database found!")
2022-02-06 15:34:44 +00:00
print("Please run Bot first!")
sys.exit(0)
2022-02-03 22:57:53 +00:00
with open(databasepath, "a", encoding="utf-8") as f:
database = sqlite3.connect(databasepath)
print("Connected to roboduck.db succesfull...")
2022-02-03 22:57:53 +00:00
config = configparser.ConfigParser()
config.read((Path(__file__).parent).joinpath('bot.cfg'))
for user in config.get("misskey","users").split(";"):
username = user.split("@")[1]
instance = user.split("@")[2]
userid = get_user_id(username, instance)
data = database.cursor()
data.execute("SELECT id FROM notes WHERE timestamp = (SELECT MAX(timestamp) FROM notes WHERE user_id=:user_id) AND user_id=:user_id;", {"user_id": userid})
2022-02-03 22:57:53 +00:00
sinceNote = data.fetchone()[0]
2022-02-03 22:57:53 +00:00
notesList.extend(get_notes(lastnote=sinceNote, username=username, instance=instance))
2022-02-03 22:57:53 +00:00
if (notesList == 0):
database.close()
return
2022-02-03 22:57:53 +00:00
print("Insert new notes to database...")
2022-03-11 16:25:03 +00:00
database.executemany("INSERT OR IGNORE INTO notes (id, text, timestamp, user_id) VALUES(?, ?, ?, ?)", [(note["id"],note["text"],note["timestamp"],note["user_id"]) for note in notesList])
2022-02-03 22:57:53 +00:00
database.commit()
print("Notes updated!")
database.close()
2022-02-03 22:57:53 +00:00
print("Cleaning database...")
clean_database()
print("Database cleaned!")
2022-02-03 22:57:53 +00:00
print("Short sleep to prevent file collison...")
sleep(10)
2022-02-03 22:57:53 +00:00
print("Calculating new Markov Chain...")
calculate_markov_chain()
print("Markov Chain saved!")
2022-02-03 22:57:53 +00:00
2022-02-06 15:34:44 +00:00
print("\nUpdate done!")
2022-02-20 02:15:27 +00:00
def init_bot():
notesList = []
databasepath = (Path(__file__).parent).joinpath('roboduck.db')
2022-02-03 22:16:27 +00:00
if (os.path.exists(databasepath) and os.stat(databasepath).st_size != 0):
print("Roboduck database already created!")
print("Exit initialization!")
sys.exit(0)
2022-02-03 22:57:53 +00:00
print("Creating database...")
2022-02-03 22:57:53 +00:00
with open(databasepath, "w+", encoding="utf-8") as f:
database = sqlite3.connect(databasepath)
print("Connected to roboduck.db succesfull...")
2022-02-03 22:57:53 +00:00
print("Creating Table...")
database.execute("CREATE TABLE notes (id CHAR(10) PRIMARY KEY, text CHAR(5000), timestamp INT, user_id CHAR(10));")
2022-02-03 22:57:53 +00:00
print("Table NOTES created...")
2022-02-03 22:57:53 +00:00
2022-02-20 02:15:27 +00:00
# Load configuration
config = configparser.ConfigParser()
config.read((Path(__file__).parent).joinpath('bot.cfg'))
try:
2022-02-20 02:15:27 +00:00
initnotes = int(config.get("markov", "min_notes"))
except (TypeError, ValueError) as err:
2022-02-20 02:15:27 +00:00
# print(err)
initnotes = 1000
for user in config.get("misskey","users").split(";"):
2022-02-03 22:57:53 +00:00
print("Try reading first " + str(initnotes) + " notes for " + user + ".")
2022-02-03 22:57:53 +00:00
notesList = get_notes(min_notes=initnotes, username=user.split("@")[1], instance=user.split("@")[2])
2022-02-03 22:57:53 +00:00
print("Writing notes into database...")
2022-03-11 16:25:03 +00:00
database.executemany("INSERT INTO notes (id, text, timestamp, user_id) VALUES(?, ?, ?, ?)", [(note["id"], note["text"], note["timestamp"], note["user_id"]) for note in notesList])
2022-02-03 22:57:53 +00:00
database.commit()
database.close()
2022-02-03 22:57:53 +00:00
print("Notes written...")
print("Creating Markov Chain")
calculate_markov_chain()
2022-02-03 22:57:53 +00:00
print("Markov Chain calculated & saved.\n")
print("Finished initialization!\n")
print("The bot will now be started!")