mirror of
https://github.com/itzg/docker-minecraft-server
synced 2025-01-22 08:45:29 +00:00
Add initial version of forge download scraper
This commit is contained in:
parent
2e8704a311
commit
14a1e5fb2a
1 changed files with 68 additions and 0 deletions
68
minecraft-server/minecraftforge_spider.py
Normal file
68
minecraft-server/minecraftforge_spider.py
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import scrapy
|
||||||
|
from scrapy.contrib.spiders import CrawlSpider, Rule
|
||||||
|
from scrapy.contrib.linkextractors import LinkExtractor
|
||||||
|
from scrapy.selector import Selector
|
||||||
|
|
||||||
|
import re
|
||||||
|
|
||||||
|
class Forge(scrapy.Item):
|
||||||
|
versions = scrapy.Field()
|
||||||
|
latest = scrapy.Field()
|
||||||
|
|
||||||
|
class ForgeVersions(scrapy.Item):
|
||||||
|
id = scrapy.Field()
|
||||||
|
minecraft = scrapy.Field()
|
||||||
|
type = scrapy.Field()
|
||||||
|
time = scrapy.Field()
|
||||||
|
url = scrapy.Field()
|
||||||
|
|
||||||
|
class ForgeLatest(scrapy.Item):
|
||||||
|
forge_latest = scrapy.Field()
|
||||||
|
forge_recommended = scrapy.Field()
|
||||||
|
|
||||||
|
class ForgeSpider(CrawlSpider):
|
||||||
|
name = "ForgeSpider"
|
||||||
|
allowed_domains = ["minecraftforge.net"]
|
||||||
|
start_urls = ['http://files.minecraftforge.net']
|
||||||
|
|
||||||
|
def parse(self, response):
|
||||||
|
forge = Forge()
|
||||||
|
forge['versions'] = []
|
||||||
|
forge['latest'] = ForgeLatest()
|
||||||
|
|
||||||
|
selector = Selector(response)
|
||||||
|
rows = selector.xpath('//table[@id="promotions_table"]//tr')
|
||||||
|
header = rows.pop(0)
|
||||||
|
for row in rows:
|
||||||
|
cells = row.xpath('td')
|
||||||
|
|
||||||
|
id = cells[1].xpath('text()').extract()
|
||||||
|
minecraft = cells[2].xpath('text()').extract()
|
||||||
|
type = cells[0].xpath('text()')
|
||||||
|
time = cells[3].xpath('text()')
|
||||||
|
url = cells[4].xpath('a[text()="Installer"]/@href')
|
||||||
|
|
||||||
|
#if has version
|
||||||
|
has_version = re.match('(.+)\-.+', ''.join(type.extract()))
|
||||||
|
if has_version:
|
||||||
|
download = ForgeVersions()
|
||||||
|
download['id'] = id
|
||||||
|
download['minecraft'] = minecraft
|
||||||
|
download['type'] = 'forge_' + ''.join(type.re('([a-zA-Z]+)')).lower()
|
||||||
|
download['time'] = time.extract()
|
||||||
|
download['url'] = url.re('http://adf.ly/\d+/(.+)')
|
||||||
|
|
||||||
|
forge['versions'].append(download)
|
||||||
|
else:
|
||||||
|
is_recommended = re.match('Recommended', ''.join(type.extract()))
|
||||||
|
if is_recommended:
|
||||||
|
download = ForgeLatest()
|
||||||
|
forge['latest']['forge_recommended'] = id
|
||||||
|
else:
|
||||||
|
download = ForgeLatest()
|
||||||
|
forge['latest']['forge_latest'] = id
|
||||||
|
|
||||||
|
|
||||||
|
return forge
|
||||||
|
|
Loading…
Reference in a new issue