diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..45b2ea6 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +FROM vibroaxe/generic:supervisord +#FROM 1and1internet/ubuntu-18-nginx:latest +MAINTAINER SteamCache.Net Team + +ENV GENERICCACHE_VERSION 2 +ENV WEBUSER nginx +ENV CACHE_MEM_SIZE 500m +ENV CACHE_DISK_SIZE 500000m +ENV CACHE_MAX_AGE 3560d +ENV UPSTREAM_DNS 8.8.8.8 8.8.4.4 +ENV BEAT_TIME 1h +ENV LOGFILE_RETENTION 3560 +ENV CACHE_DOMAIN_REPO https://github.com/uklans/cache-domains.git +ENV UPSTREAM_DNS 8.8.8.8 8.8.4.4 + +COPY overlay/ / + +RUN mkdir -m 755 -p /data/cachedomains ;\ + mkdir -m 755 -p /tmp/nginx ;\ + apt-get update ;\ + apt-get install -y jq git ; + + +VOLUME ["/data/logs", "/data/cache", "/data/cachedomains", "/var/www"] + +EXPOSE 80 + +WORKDIR /scripts diff --git a/ISSUE_TEMPLATE.md b/ISSUE_TEMPLATE.md new file mode 100644 index 0000000..0388102 --- /dev/null +++ b/ISSUE_TEMPLATE.md @@ -0,0 +1,16 @@ +### Describe the issue you are having + + +### How are you running the container(s)? +``` + +``` + +### DNS Configuration +``` + +``` +### Output of container(s) +``` + +``` diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..f8a1e9a --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +The MIT License (MIT) + +Copyright (c) 2016 Michael Smith, Robin Lewis, Brian Wojtczak, Jason Rivers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index 089c7ac..8ae5590 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,170 @@ -# monolithic -A monolithic cache based on steamcache/generic +# Game Download Cache Docker Container + +## Introduction + +This docker container provides a caching proxy server for game download content. For any network with more than one PC gamer in connected this will drastically reduce internet bandwidth consumption. + +The primary use case is gaming events, such as LAN parties, which need to be able to cope with hundreds or thousands of computers receiving an unannounced patch - without spending a fortune on internet connectivity. Other uses include smaller networks, such as Internet Cafes and home networks, where the new games are regularly installed on multiple computers; or multiple independent operating systems on the same computer. + +This container is designed to support any game that uses HTTP and also supports HTTP range requests (used by Origin). This should make it suitable for: + + - Steam (Valve) + - Origin (EA Games) + - Riot Games (League of Legends) + - Battle.net (Hearthstone, Starcraft 2, Overwatch) + - Frontier Launchpad (Elite Dangerous, Planet Coaster) + - Uplay (Ubisoft) + - Windows Updates + +This is the best container to use for all game caching and should be used for Steam in preference to the steamcache/steamcache container. + +## Usage + +You need to be able to redirect HTTP traffic to this container. The easiest way to do this is to replace the DNS entries for the various game services with your cache server. + +You can use the [steamcache-dns](https://hub.docker.com/r/steamcache/steamcache-dns/) docker image to do this or you can use a DNS service already on your network see the [steamcache-dns github page](https://github.com/steamcache/steamcache-dns) for more information. + +For the cache files to persist you will need to mount a directory on the host machine into the container. You can do this using `-v :/data/cache`. You can do the same with a logs directory as well if you want logs to be persistent as well. + +Run the container using the following to allow TCP port 80 (HTTP) and to mount `/cache/steam/data` directory into the container. + +``` +docker run \ + --restart unless-stopped \ + --name cache-steam \ + -v /cache/steam/data:/data/cache \ + -v /cache/steam/logs:/data/logs \ + -p 192.168.1.10:80:80 \ + steamcache/generic:latest +``` + +## Caching Multiple Services + +If you want to cache multiple game services then you should run multiple instances of the cache and use different IP addresses on the host machine. The first thing is to add an extra IP to your network interface. + +You should then create a second data directory on the host and then run the container for the service you want to cache: + +``` +docker run \ + --restart unless-stopped \ + --name cache-blizzard \ + -v /cache/blizzard/data:/data/cache \ + -v /cache/blizzard/logs:/data/logs \ + -p 192.168.1.11:80:80 \ + steamcache/generic:latest +``` + +Repeat this for as many services as you want to cache. It is best practice to keep the caches separate for each service to prevent the possibility of overwriting the same data. + +## Origin and SSL + +Some publishers, including Origin, use the same hostnames we're replacing for HTTPS content as well as HTTP content. We can't cache HTTPS traffic, so if you're intercepting DNS, you will need to run an SNI Proxy container on port 443 to forward on any HTTPS traffic. + +``` +docker run \ + --restart unless-stopped \ + --name sniproxy \ + -p 443:443 \ + steamcache/sniproxy:latest +``` + +Please read the [steamcache/sniproxy](https://github.com/steamcache/sniproxy) project for more information. + +## DNS Entries + +You can find a list of domains you will want to use for each service over on [uklans/cache-domains](https://github.com/uklans/cache-domains). The aim is for this to be a definitive list of all domains you might want to cache. + +## Suggested Hardware + +Regular commodity hardware (a single 2TB WD Black on an HP Microserver) can achieve peak throughputs of 30MB/s+ using this setup (depending on the specific content being served). + +## Changing Upstream DNS + +If you need to change the upstream DNS server the cache uses, these are defined by the `UPSTREAM_DNS` environment variable. The defaults are Google DNS (8.8.8.8 and 8.8.4.4). + +``` + UPSTREAM_DNS 8.8.8.8 8.8.4.4 +``` + +You can override these using the `-e` argument to docker run and specifying your upstream DNS servers. Multiple upstream dns servers are allowed, separated by whitespace. + +``` +-e UPSTREAM_DNS="1.1.1.1 1.0.0.1" +``` + +## Tweaking Cache sizes + +Two environment variables are available to manage both the memory and disk cache for a particular container, and are set to the following defaults. +``` +CACHE_MEM_SIZE 500m +CACHE_DISK_SIZE 500000m +``` + +In addition, there is an environment variable to control the max cache age + +``` +CACHE_MAX_AGE 3650d +```` + +You can override these at run time by adding the following to your docker run command. They accept the standard nginx notation for sizes (k/m/g/t) and durations (m/h/d) + +``` +-e CACHE_MEM_SIZE=4000m -e CACHE_DISK_SIZE=1000g +``` + +## Monitoring + +Access logs are written to /data/logs. If you don't particularly care about keeping them, you don't need to mount an external volume into the container. + +You can tail them using: + +``` +docker exec -it cache-steam tail -f /data/logs/access.log +``` + +If you have mounted the volume externally then you can tail it on the host instead. + +## Advice to Publishers + +If you're a games publisher and you'd like LAN parties, gaming centers and other places to be able to easily cache your game updates, we reccomend the following: + + - If your content downloads are on HTTPS, you can do what Riot have done - try and resolve a specific hostname. If it resolves to a RFC1918 private address, switch your downloads to use HTTP instead. + - Try to use hostnames specific for your HTTP download traffic. + - Tell us the hostnames that you're using for your game traffic. We're maintaining a list at [uklans/cache-domains](https://github.com/uklans/cache-domains) and we'll accept pull requests! + - Have your client verify the files and ensure the file they've downloaded matches the file they **should** have downloaded. This cache server acts as a man-in-the-middle so it would be good to ensure the files are correct. + + If you need any further advice, please contact us and we'll be glad to help! + +## Frequently Asked Questions + +If you have any questions, please check [our FAQs](faq.md). If this doesn't answer your question, please raise an issue in GitHub. + +## Thanks + + - Based on original configs from [ansible-lanparty](https://github.com/ti-mo/ansible-lanparty). + - Everyone on [/r/lanparty](https://reddit.com/r/lanparty) who has provided feedback and helped people with this. + - UK LAN Techs for all the support. + +## License + +The MIT License (MIT) + +Copyright (c) 2016 Jessica Smith, Robin Lewis, Brian Wojtczak, Jason Rivers + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/faq.md b/faq.md new file mode 100644 index 0000000..3eba0eb --- /dev/null +++ b/faq.md @@ -0,0 +1,75 @@ +# Frequently Asked Questions + +## What is a LAN Cache or Steam Cache? + +A Steam Cache or LAN Cache is a way of speeding up Steam or other content downloads on a local area network for multiple users. + +## How does it work? + +The Cache works as a simple caching proxy server. Most game and content updates use unsecured HTTP for downloading the content. We are able to intercept these download requests and save them, so that if another request is made for the same file, we don't need to download it again from the Internet. + +## This doesn't sound very new or original, we've been using caches for years + +You're correct. HTTP caches are not a new thing, many businesses and organisations such as universities and schools have been running them for a while, however they are normally intended for caching traditional web traffic with short caching lifetimes and small files. + +## Why not use Squid or another server? + +We chose nginx as it offers the flexibility we need to overwrite cache keys and optimise the cache performance to cope with gaming update traffic. We need to be able to cache files for a long period of time, rewrite upstream HTTP headers and to be able to exclude the hostname of the server from the cache key, to allow us to cache content served from Content Delivery Networks (CDNs) + +## Should I use this then? + +It depends on your situation. If you just have a single PC and you download games to it once, you probably won't see any advantage or benefit from using the cache. It won't speed up your downloads of **new** content. It would speed up downloads if you removed the game and then re-installed it. + +If you have multiple computers sharing one Internet connection, you would see a benefit to it. If user A downloads Team Fortress 2 on Steam, it will download at the speed of your Internet Connection and stored on the cache server. Once it's been downloaded, if user B downloads Team Fortress 2, it will be served up from your cache server at the maximum speed it can read the files from storage. + +## So can I really get 10gbps downloads for my games? + +Yes, but it relies on a few things: + + 1. You have already downloaded the content and it is on your cache server. + 2. Your network switch, network card in the cache server and PC network are 10gbps. + 3. You can read the files from disk at 10gbps. + +Realistic performance you can expect from the cache depends on all of these things. + +## You mentioned storage, how fast does my storage need to be? + +As fast as possible. Linux is great in that it will cache in memory any files accessed from disk, so if someone accesses the same files multiple times, and they haven't changed, those files will be served from memory, which is very quick. + +If the files aren't cached in memory, the OS will need to read it from the disk. If you have mechnical drives, this can be slow, and if you're reading from different areas of the disk, this will slow things down further. SSDs are quicker and if you use RAID, you can improve things even further. + +If you can't afford a large SSD, you can also use lvmcache and set up the SSD to act as a read cache for the rotational disk. This will give you three levels of caching: + + 1. RAM + 2. SSD based lvmcache + 3. Rotational Hard Disk + +If you can afford large SSDs, then run your entire cache using SSDs! + +## Why do you mention you support Windows Update, can't we use WSUS? + +This cache is intended for a LAN party environment where you have no control over the computers on the network. To force them to use a WSUS server, you'd need to either join them to a domain and push the config out through group policy or run a script to get them to use the WSUS server. Each of which is not really possible for a LAN party + +This solution is designed to support anything from a couple of people at home, to thousands of people at gaming events. This is the easiest way to achieve the caching of services like Windows Update, without modifying people's computers. + +## But what about HTTPS, can I cache HTTPS? + +No. HTTPS traffic is encrypted. Some of the games, Origin for instance, also serve HTTPS content on the same hostnames we're intercepting - for this you can use SNI Proxy. It listens on port 443 and just passes through any HTTPS traffic. It is unable to inspect the traffic, or cache it. + +You can find more at the [steamcache/sniproxy](https://github.com/steamcache/sniproxy) project page. + +## Can I cache Fortnite/Epic Games Launcher? + +Sadly no, it uses HTTPS and can't be cached. + +## Can I cache *some other service* + +Yes, almost any HTTP content can be cached. We're maintaining a list of hostnames for various update services on the [uklans/cache-domains](https://github.com/uklans/cache-domains) project. There's a lot of services you can cache! + +## How do you intercept the HTTP traffic? + +We prefer to use DNS-based interception This is because it is easier to deploy than other methods. We have a [steamcache-dns](https://github.com/steamcache/steamcache-dns) project that is a self-contained DNS server with options for setting your cache IP addresses. + +If you already run a DNS server and are comfortable configuring new override DNS zones, you can also just do this from the list on [uklans/cache-domains](https://github.com/uklans/cache-domains). pfSense's DNS forwarder can easily be configured, and the project has scripts to generate config for unbound. + +Other options also involve transparently intercepting HTTP traffic at a network level either using WCCP on Cisco switches or on your router if it supports it. diff --git a/hardware.md b/hardware.md new file mode 100644 index 0000000..efa4b29 --- /dev/null +++ b/hardware.md @@ -0,0 +1,33 @@ +# Hardware examples and throughput + +## Introduction + +This project is heavily reliant on the hardware that you run it on. The most +common complaint is "The cache is slow" and in nearly every case this can be +attributed to the hardware that the cache is being run on. + + +The following hardware specifications are real-world examples of hardware that +has been used to run this project, so you can get an idea of the type of +performance that you can expect from varying hardware setups. + + + +## High Capacity + +24 CPU core, 140GB memory, 8 x Samsung 850 Pro SSD in RAID6 +Served ~2,800 uniques, peaking at 10Gbps + + +## Medium capacity + +24 CPU core, 180GB memory, 5 x 900GB 15K SAS drives, plus 180GB SSD (lvmcache) +Served ~250 uniques, peaking at 1.8Gbps + +## Low capacity + + + + + + diff --git a/hooks/supervisor-pre.d/10_generate_maps.sh b/hooks/supervisor-pre.d/10_generate_maps.sh new file mode 100644 index 0000000..8a5c6f5 --- /dev/null +++ b/hooks/supervisor-pre.d/10_generate_maps.sh @@ -0,0 +1,43 @@ +#!/bin/bash + +#!/bin/sh +IFS=' ' + +cd /data/cachedomains +export GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" +if [[ ! -d .git ]]; then + git clone ${CACHE_DOMAIN_REPO} . +fi +git fetch origin +git reset --hard origin/master + +path=$(mktemp -d) +outputfile=${path}/outfile.conf + +echo "map \$http_host \$cacheidentifier {" >> $outputfile +echo " hostnames;" >> $outputfile +echo " default \$http_host;" >> $outputfile + +jq -r '.cache_domains | to_entries[] | .key' cache_domains.json | while read entry; do + key=$(jq -r ".cache_domains[$entry].name" cache_domains.json) + jq -r ".cache_domains[$entry].domain_files | to_entries[] | .key" cache_domains.json | while read fileid; do + jq -r ".cache_domains[$entry].domain_files[$fileid]" cache_domains.json | while read filename; do + echo "" >> $outputfile + cat ${filename} | while read fileentry; do + # Ignore comments + case "$var" in + \#*) continue ;; + esac + if grep -q "$fileentry" $outputfile; then + continue + fi + echo " ${fileentry} ${key};" >> $outputfile + done + done + done +done +echo "}" >> $outputfile +cat $outputfile +cp $outputfile /etc/nginx/maps.conf +rm -rf $path + diff --git a/overlay/etc/nginx/conf.d/10_log_format.conf b/overlay/etc/nginx/conf.d/10_log_format.conf new file mode 100644 index 0000000..60c5b11 --- /dev/null +++ b/overlay/etc/nginx/conf.d/10_log_format.conf @@ -0,0 +1,2 @@ + log_format cachelog '[$cacheidentifier] $remote_addr / $http_x_forwarded_for - $remote_user [$time_local] "$request" $status $body_bytes_sent "$http_referer" "$http_user_agent" "$upstream_cache_status" "$host" "$http_range"'; + diff --git a/overlay/etc/nginx/conf.d/20_maps.conf b/overlay/etc/nginx/conf.d/20_maps.conf new file mode 100644 index 0000000..884957d --- /dev/null +++ b/overlay/etc/nginx/conf.d/20_maps.conf @@ -0,0 +1,4 @@ +map $http_host $cacheidentifier { + hostnames; + default $http_host; +} diff --git a/overlay/etc/nginx/sites-available/generic.conf.d/root/30_cache_key.conf b/overlay/etc/nginx/sites-available/generic.conf.d/root/30_cache_key.conf new file mode 100644 index 0000000..e2d5580 --- /dev/null +++ b/overlay/etc/nginx/sites-available/generic.conf.d/root/30_cache_key.conf @@ -0,0 +1 @@ + proxy_cache_key $cacheidentifier$uri$slice_range; diff --git a/overlay/hooks/entrypoint-pre.d/15_generate_maps.sh b/overlay/hooks/entrypoint-pre.d/15_generate_maps.sh new file mode 100644 index 0000000..3936b9e --- /dev/null +++ b/overlay/hooks/entrypoint-pre.d/15_generate_maps.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +IFS=' ' + cd /data/cachedomains +export GIT_SSH_COMMAND="ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no" +if [[ ! -d .git ]]; then + git clone ${CACHE_DOMAIN_REPO} . +fi +git fetch origin +git reset --hard origin/master + path=$(mktemp -d) +outputfile=${path}/outfile.conf + echo "map \$http_host \$cacheidentifier {" >> $outputfile +echo " hostnames;" >> $outputfile +echo " default \$http_host;" >> $outputfile + jq -r '.cache_domains | to_entries[] | .key' cache_domains.json | while read entry; do + key=$(jq -r ".cache_domains[$entry].name" cache_domains.json) + jq -r ".cache_domains[$entry].domain_files | to_entries[] | .key" cache_domains.json | while read fileid; do + jq -r ".cache_domains[$entry].domain_files[$fileid]" cache_domains.json | while read filename; do + echo "" >> $outputfile + cat ${filename} | while read fileentry; do + # Ignore comments + case "$var" in + \#*) continue ;; + esac + if grep -q "$fileentry" $outputfile; then + continue + fi + echo " ${fileentry} ${key};" >> $outputfile + done + done + done +done +echo "}" >> $outputfile +cat $outputfile +cp $outputfile /etc/nginx/conf.d/20_maps.conf +rm -rf $path