mirror of
https://github.com/itzg/docker-minecraft-server
synced 2024-12-13 05:42:28 +00:00
Add initial version of forge download scraper
This commit is contained in:
parent
2e8704a311
commit
14a1e5fb2a
1 changed files with 68 additions and 0 deletions
68
minecraft-server/minecraftforge_spider.py
Normal file
68
minecraft-server/minecraftforge_spider.py
Normal file
|
@ -0,0 +1,68 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import scrapy
|
||||
from scrapy.contrib.spiders import CrawlSpider, Rule
|
||||
from scrapy.contrib.linkextractors import LinkExtractor
|
||||
from scrapy.selector import Selector
|
||||
|
||||
import re
|
||||
|
||||
class Forge(scrapy.Item):
|
||||
versions = scrapy.Field()
|
||||
latest = scrapy.Field()
|
||||
|
||||
class ForgeVersions(scrapy.Item):
|
||||
id = scrapy.Field()
|
||||
minecraft = scrapy.Field()
|
||||
type = scrapy.Field()
|
||||
time = scrapy.Field()
|
||||
url = scrapy.Field()
|
||||
|
||||
class ForgeLatest(scrapy.Item):
|
||||
forge_latest = scrapy.Field()
|
||||
forge_recommended = scrapy.Field()
|
||||
|
||||
class ForgeSpider(CrawlSpider):
|
||||
name = "ForgeSpider"
|
||||
allowed_domains = ["minecraftforge.net"]
|
||||
start_urls = ['http://files.minecraftforge.net']
|
||||
|
||||
def parse(self, response):
|
||||
forge = Forge()
|
||||
forge['versions'] = []
|
||||
forge['latest'] = ForgeLatest()
|
||||
|
||||
selector = Selector(response)
|
||||
rows = selector.xpath('//table[@id="promotions_table"]//tr')
|
||||
header = rows.pop(0)
|
||||
for row in rows:
|
||||
cells = row.xpath('td')
|
||||
|
||||
id = cells[1].xpath('text()').extract()
|
||||
minecraft = cells[2].xpath('text()').extract()
|
||||
type = cells[0].xpath('text()')
|
||||
time = cells[3].xpath('text()')
|
||||
url = cells[4].xpath('a[text()="Installer"]/@href')
|
||||
|
||||
#if has version
|
||||
has_version = re.match('(.+)\-.+', ''.join(type.extract()))
|
||||
if has_version:
|
||||
download = ForgeVersions()
|
||||
download['id'] = id
|
||||
download['minecraft'] = minecraft
|
||||
download['type'] = 'forge_' + ''.join(type.re('([a-zA-Z]+)')).lower()
|
||||
download['time'] = time.extract()
|
||||
download['url'] = url.re('http://adf.ly/\d+/(.+)')
|
||||
|
||||
forge['versions'].append(download)
|
||||
else:
|
||||
is_recommended = re.match('Recommended', ''.join(type.extract()))
|
||||
if is_recommended:
|
||||
download = ForgeLatest()
|
||||
forge['latest']['forge_recommended'] = id
|
||||
else:
|
||||
download = ForgeLatest()
|
||||
forge['latest']['forge_latest'] = id
|
||||
|
||||
|
||||
return forge
|
||||
|
Loading…
Reference in a new issue