diff --git a/.dockerignore b/.dockerignore old mode 100644 new mode 100755 diff --git a/.github/dependabot.yml b/.github/dependabot.yml old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/CHANGELOG.md b/CHANGELOG.md old mode 100644 new mode 100755 diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 index 24c540e..7c29157 --- a/Dockerfile +++ b/Dockerfile @@ -1,22 +1,26 @@ FROM python:3-slim VOLUME /config + +# Install Requirements +COPY requirements.txt / +RUN apt-get update && \ + apt-get upgrade -y --no-install-recommends && \ + apt-get install -y tzdata --no-install-recommends && \ + pip3 install --no-cache-dir --upgrade --requirement /requirements.txt && \ + apt-get autoremove -y && \ + apt-get clean && \ + rm -rf \ + /requirements.txt \ + /tmp/* \ + /var/tmp/* \ + /var/lib/apt/lists/* + +# Download stopwords for keyword support +RUN python3 -c "import nltk; nltk.download('stopwords')" + +# Copy now that requirements are all cached COPY . / -RUN \ - echo "**** install system packages ****" && \ - apt-get update && \ - apt-get upgrade -y --no-install-recommends && \ - apt-get install -y tzdata --no-install-recommends && \ - echo "**** install python packages ****" && \ - pip3 install --no-cache-dir --upgrade --requirement /requirements.txt && \ - echo "**** install Plex-Auto-Collections ****" && \ - chmod +x /app/plex_auto_collections.py && \ - echo "**** cleanup ****" && \ - apt-get autoremove -y && \ - apt-get clean && \ - rm -rf \ - /requirements.txt \ - /tmp/* \ - /var/tmp/* \ - /var/lib/apt/lists/* +RUN chmod +x /app/plex_auto_collections.py WORKDIR /app + ENTRYPOINT ["python3", "plex_auto_collections.py"] \ No newline at end of file diff --git a/README.md b/README.md old mode 100644 new mode 100755 index 2946565..12dd615 --- a/README.md +++ b/README.md @@ -113,11 +113,13 @@ A simple `Dockerfile` is available in this repo if you'd like to build it yourse The docker implementation today is limited but will improve over time. To use, try the following: ```shell -docker run --rm -v '/mnt/user/plex-auto-collections/':'/config':'rw' 'mza921/plex-auto-collections' -u +docker run --rm -v '/mnt/user/plex-auto-collections/':'/config':'rw' --net host 'mza921/plex-auto-collections' -u ``` The `-v '/mnt/user/plex-auto-collections/':'/config'` mounts a persistent volume to store your config file. Today, the docker image defaults to running the config named `config.yml` in your persistent volume (eventually, the docker will support an environment variable to change the config path). +`--net host` allows network access to localhost instead of putting the container inside docker networking. + Lastly, you may need to run the docker with `-it` and without `-u` in order to interact with the script. For example, if you'd like to use Trakt lists, you need to go through the OAuth flow and interact with the script at first-run. After that, you should be able to run it without the `-it` flag. # Configuration @@ -153,6 +155,36 @@ Each collection is defined by the mapping name which becomes the name of the Ple - [Background (optional)](#background-collection-attribute) - [Name Mapping (optional)](#name-mapping-collection-attribute) +### Collection Name +If the collection name is wildcard `*`, the title of each video will be split and it will be added to each collection. Here are some examples: + +```yaml +# Go through all videos and add them to any collections by keywords in the title +collections: + "*": + all: true +``` + + +| Title | Collections | +| ----------------------------------------------- | ------------------------------------------------ | +| The Land Before Time | Land Before Time | +| Toy Story 3 | Toy, Story | +| Toy Story 2 | Toy, Story | +| Toy Story 1 | Toy, Story | +| Harry Potter and the Deathly Hallows: Part 1 | Deathly, Harry, Hallows, Potter | +| Harry Potter and the Philosopher's Stone | Stone, Harry, Philosopher, Potter | +| Harry Potter and the Order of the Phoenix | Order, Phoenix, Harry, Potter | +| Alice in Wonderland | Alice, Wonderland | +| Star Wars: Episode I - The Phantom Menace | Phantom, Episode, Wars, Menace, Star | +| Jurassic Park | Jurassic, Park | +| The Land Before Time | Land, Time | +| Pirates of the Caribbean: On Stranger Tides | Tides, Pirates, Caribbean, Stranger | +| Pirates of the Caribbean: Dead Man's Chest | Man, Dead, Pirates, Caribbean, Chest | +| Pirates of the Caribbean: At World's End | World, Pirates, Caribbean, End | + + + ### List Type (Collection Attribute) The only required attribute for each collection is the list type. There are many different list types to choose from: @@ -195,6 +227,7 @@ You can create a collection based on the Plex search feature using the `plex_sea | Search Option | Description | Movie
Libraries | Show
Libraries | | :-- | :-- | :--: | :--: | +| `title` | Gets every movie the search term in the title | :heavy_check_mark: | :x: | | `actor` | Gets every movie with the specified actor | :heavy_check_mark: | :x: | | `tmdb_actor` | Gets every movie with the specified actor as well as the added TMDb [metadata](#tmdb-people-list-type) | :heavy_check_mark: | :x: | | `country` | Gets every movie with the specified country | :heavy_check_mark: | :x: | @@ -216,6 +249,12 @@ collections: genre: Documentary ``` ```yaml +collections: + Hunger Games: + plex_search: + title: Hunger Games +``` +```yaml collections: Dave Chappelle Comedy: plex_search: diff --git a/app/config_tools.py b/app/config_tools.py old mode 100644 new mode 100755 diff --git a/app/imdb_tools.py b/app/imdb_tools.py old mode 100644 new mode 100755 diff --git a/app/plex_auto_collections.py b/app/plex_auto_collections.py old mode 100644 new mode 100755 index aa8b60b..9affdcb --- a/app/plex_auto_collections.py +++ b/app/plex_auto_collections.py @@ -193,6 +193,7 @@ def update_from_config(config_path, plex, headless=False, no_meta=False, no_imag "studio", #"studio.not", "year", #"year.not", "writer", #"writer.not", + "title", "tmdb_actor", "tmdb_director", "tmdb_writer" ] show_only_lists = [ @@ -553,6 +554,7 @@ def update_from_config(config_path, plex, headless=False, no_meta=False, no_imag else: print("| Config Error: {} plex search attribute not supported".format(search_attr)) methods.append((method_name, [search])) + print("METHOS: {}, search: {}".format(methods, search)) elif method_name == "tmdb_discover": new_dictionary = {"limit": 100} for attr in collections[c][m]: diff --git a/app/plex_tools.py b/app/plex_tools.py old mode 100644 new mode 100755 index a1b685f..640b7f7 --- a/app/plex_tools.py +++ b/app/plex_tools.py @@ -22,6 +22,20 @@ import tempfile import glob import shutil from contextlib import closing +from title_keywords import keywords + +# Probably a better way to do this +use_collection_cache = True +collection_cache = {} + +def get_collection_cache(name): + global collection_cache + if name in collection_cache: + return collection_cache[name] + +def set_collection_cache(name, value): + global collection_cache + collection_cache[name] = value def adjust_space(old_length, display_title): @@ -239,7 +253,11 @@ def add_to_collection(config_path, plex, method, value, c, plex_map=None, map=No ors = ors + (" OR " if len(ors) > 0 else attr_pair[0] + "(") + str(param) output = output + ("\n|\t\t AND " if len(output) > 0 else "| Processing Plex Search: ") + ors + ")" print(output) - items = plex.Library.search(**search_terms) + title = None + if final_method == "title": + title = search_terms[final_method][0] + del search_terms[final_method] + items = plex.Library.search(title, **search_terms) elif method == "tvdb_show" and plex.library_type == "show": items, missing = imdb_tools.tvdb_get_shows(config_path, plex, plex_map, value) elif "imdb" in method or "tmdb" in method: @@ -285,74 +303,95 @@ def add_to_collection(config_path, plex, method, value, c, plex_map=None, map=No } if items: - # Check if already in collection - cols = plex.Library.search(title=c, libtype="collection") - try: - fs = cols[0].children - except IndexError: - fs = [] + start = datetime.now() item_count = 0 item_max = len(items) max_str_len = len(str(item_max)) current_length = 0 for rk in items: + elapsed = (datetime.now() - start).total_seconds() + if elapsed > 0.: + time_each = float(item_count) / elapsed + if time_each > 0.: + remaining = timedelta(seconds=float(item_max - item_count) / time_each) + print("Processing item {}/{} remaining: {} ({}s each)".format(item_count, item_max, remaining, time_each)) current_item = get_item(plex, rk) item_count += 1 - match = True - if filters: - display_count = (" " * (max_str_len - len(str(item_count)))) + str(item_count) - print_display = "| Filtering {}/{} {}".format(display_count, item_max, current_item.title) - print(adjust_space(current_length, print_display), end = "\r") - current_length = len(print_display) - for f in filters: - modifier = f[0][-4:] - method = filter_alias[f[0][:-4]] if modifier in [".not", ".lte", ".gte"] else filter_alias[f[0]] - if method == "max_age": - threshold_date = datetime.now() - timedelta(days=f[1]) - attr = getattr(current_item, "originallyAvailableAt") - if attr is None or attr < threshold_date: - match = False - break - elif modifier in [".gte", ".lte"]: - if method == "originallyAvailableAt": - threshold_date = datetime.strptime(f[1], "%m/%d/%y") - attr = getattr(current_item, "originallyAvailableAt") - if (modifier == ".lte" and attr > threshold_date) or (modifier == ".gte" and attr < threshold_date): - match = False - break - elif method in ["year", "rating"]: - attr = getattr(current_item, method) - if (modifier == ".lte" and attr > f[1]) or (modifier == ".gte" and attr < f[1]): - match = False - break - else: - terms = f[1] if isinstance(f[1], list) else str(f[1]).split(", ") - if method in ["video_resolution", "audio_language", "subtitle_language"]: - for media in current_item.media: - if method == "video_resolution": - attrs = [media.videoResolution] - for part in media.parts: - if method == "audio_language": - attrs = ([audio_stream.language for audio_stream in part.audioStreams()]) - if method == "subtitle_language": - attrs = ([subtitle_stream.language for subtitle_stream in part.subtitleStreams()]) - elif method in ["contentRating", "studio", "year", "rating", "originallyAvailableAt"]: # Otherwise, it's a string. Make it a list. - attrs = [str(getattr(current_item, method))] - elif method in ["actors", "countries", "directors", "genres", "writers", "collections"]: - attrs = [getattr(x, 'tag') for x in getattr(current_item, method)] - # Get the intersection of the user's terms and item's terms - # If it's empty and modifier is not .not, it's not a match - # If it's not empty and modifier is .not, it's not a match - if (not list(set(terms) & set(attrs)) and modifier != ".not") or (list(set(terms) & set(attrs)) and modifier == ".not"): - match = False - break - if match: - if current_item in fs: - map[current_item.ratingKey] = None + cs = [c] + # Wildcard. Add to all keywords in title + if c.strip() == "*": + cs = keywords(current_item.title) + print("KEYWORDS FOR: '{}' = {}".format(current_item.title, cs)) + + for c2 in cs: + # Check if already in collection + cached = get_collection_cache(c2) + if cached: + fs = cached + print("collection was cached: {}".format(repr(cols))) else: - current_item.addCollection(c) - print(adjust_space(current_length, "| {} Collection | {} | {}".format(c, "=" if current_item in fs else "+", current_item.title))) + cols = plex.Library.search(title=c2, libtype="collection") + try: + fs = cols[0].children + except IndexError: + fs = [] + set_collection_cache(c2, fs) + match = True + if filters: + display_count = (" " * (max_str_len - len(str(item_count)))) + str(item_count) + print_display = "| Filtering {}/{} {}".format(display_count, item_max, current_item.title) + print(adjust_space(current_length, print_display), end = "\r") + current_length = len(print_display) + for f in filters: + modifier = f[0][-4:] + method = filter_alias[f[0][:-4]] if modifier in [".not", ".lte", ".gte"] else filter_alias[f[0]] + if method == "max_age": + threshold_date = datetime.now() - timedelta(days=f[1]) + attr = getattr(current_item, "originallyAvailableAt") + if attr is None or attr < threshold_date: + match = False + break + elif modifier in [".gte", ".lte"]: + if method == "originallyAvailableAt": + threshold_date = datetime.strptime(f[1], "%m/%d/%y") + attr = getattr(current_item, "originallyAvailableAt") + if (modifier == ".lte" and attr > threshold_date) or (modifier == ".gte" and attr < threshold_date): + match = False + break + elif method in ["year", "rating"]: + attr = getattr(current_item, method) + if (modifier == ".lte" and attr > f[1]) or (modifier == ".gte" and attr < f[1]): + match = False + break + else: + terms = f[1] if isinstance(f[1], list) else str(f[1]).split(", ") + if method in ["video_resolution", "audio_language", "subtitle_language"]: + for media in current_item.media: + if method == "video_resolution": + attrs = [media.videoResolution] + for part in media.parts: + if method == "audio_language": + attrs = ([audio_stream.language for audio_stream in part.audioStreams()]) + if method == "subtitle_language": + attrs = ([subtitle_stream.language for subtitle_stream in part.subtitleStreams()]) + elif method in ["contentRating", "studio", "year", "rating", "originallyAvailableAt"]: # Otherwise, it's a string. Make it a list. + attrs = [str(getattr(current_item, method))] + elif method in ["actors", "countries", "directors", "genres", "writers", "collections"]: + attrs = [getattr(x, 'tag') for x in getattr(current_item, method)] + + # Get the intersection of the user's terms and item's terms + # If it's empty and modifier is not .not, it's not a match + # If it's not empty and modifier is .not, it's not a match + if (not list(set(terms) & set(attrs)) and modifier != ".not") or (list(set(terms) & set(attrs)) and modifier == ".not"): + match = False + break + if match: + if current_item in fs: + map[current_item.ratingKey] = None + else: + current_item.addCollection(c2) + print(adjust_space(current_length, "| {} Collection | {} | {}".format(c2, "=" if current_item in fs else "+", current_item.title))) print(adjust_space(current_length, "| Processed {} {}".format(item_max, "Movies" if plex.library_type == "movie" else "Shows"))) else: print("| No {} Found".format("Movies" if plex.library_type == "movie" else "Shows")) diff --git a/app/radarr_tools.py b/app/radarr_tools.py old mode 100644 new mode 100755 diff --git a/app/title_keywords.py b/app/title_keywords.py new file mode 100755 index 0000000..2360176 --- /dev/null +++ b/app/title_keywords.py @@ -0,0 +1,45 @@ +import re +import nltk +nltk.download('stopwords') +from nltk.corpus import stopwords +blacklist = stopwords.words('english') +blacklist.extend(["the", "part"]) +blacklist = set(blacklist) +whitelist = set(["vr"]) + +def extract_kw(kw): + kw = kw.strip().lower() + if not kw: + return None + if kw not in whitelist: + if kw in blacklist or len(kw) > 18 or len(kw) < 3 or kw.isnumeric(): + return None + return kw.capitalize() + + +"""Extract any meaningful words from string""" +def keywords(title): + keywords = re.split("_|\.|\W|\-", title) + keywords = (extract_kw(kw) for kw in keywords) + keywords = set(filter(bool, keywords)) + return keywords + +if __name__ == "__main__": + names = u""" +Toy Story 3 +Toy Story 2 +Toy Story 1 +Harry Potter and the Deathly Hallows: Part 1 +Harry Potter and the Philosopher's Stone +Harry Potter and the Order of the Phoenix +Alice in Wonderland +Star Wars: Episode I - The Phantom Menace +Jurassic Park +The Land Before Time +Pirates of the Caribbean: On Stranger Tides +Pirates of the Caribbean: Dead Man's Chest +Pirates of the Caribbean: At World's End +""" + lines = [line.strip() for line in names.split("\n") if line.strip()] + for line in lines: + print("{0: <60} -> {1}".format(line, ", ".join(keywords(line)))) \ No newline at end of file diff --git a/app/trakt_helpers.py b/app/trakt_helpers.py old mode 100644 new mode 100755 diff --git a/app/trakt_tools.py b/app/trakt_tools.py old mode 100644 new mode 100755 diff --git a/config/config.yml.template b/config/config.yml.template old mode 100644 new mode 100755 diff --git a/config/images/Dave Chappelle.png b/config/images/Dave Chappelle.png old mode 100644 new mode 100755 diff --git a/config/images/Jurassic Park.png b/config/images/Jurassic Park.png old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 index e96b380..4c66ef8 --- a/requirements.txt +++ b/requirements.txt @@ -9,3 +9,4 @@ bs4 lxml requests>=2.4.2 ruamel.yaml +nltk \ No newline at end of file