inspec/lib/fetchers/url.rb

# encoding: utf-8
# author: Dominik Richter
# author: Christoph Hartmann

require 'uri'
require 'tempfile'
require 'open-uri'

module Fetchers
  class Url < Inspec.fetcher(1)
    name 'url'
    priority 200

    attr_reader :files

    def self.resolve(target, opts = {})
      uri = URI.parse(target)
      return nil if uri.nil? or uri.scheme.nil?
      return nil unless %{ http https }.include? uri.scheme
      target = transform(target)
      # TODO: for now, this can be much less strict now vv
      return nil unless target.end_with?('tar.gz', 'zip')
      resolve_url(target, opts)
    rescue URI::Error => _e
      nil
    end

    # Transforms a browser github url to github tar url
    # We distinguish between three different Github URL types:
    #  - Master URL
    #  - Branch URL
    #  - Commit URL
    #
    # master url:
    # https://github.com/nathenharvey/tmp_compliance_profile/ is transformed to
    # https://github.com/nathenharvey/tmp_compliance_profile/archive/master.tar.gz
    #
    # github branch:
    # https://github.com/hardening-io/tests-os-hardening/tree/2.0 is transformed to
    # https://github.com/hardening-io/tests-os-hardening/archive/2.0.tar.gz
    #
    # github commit:
    # https://github.com/hardening-io/tests-os-hardening/tree/48bd4388ddffde68badd83aefa654e7af3231876
    # is transformed to
    # https://github.com/hardening-io/tests-os-hardening/archive/48bd4388ddffde68badd83aefa654e7af3231876.tar.gz
    def self.transform(target)
      # support for default github url
      m = %r{^https?://(www\.)?github\.com/(?<user>[\w-]+)/(?<repo>[\w-]+)(\.git)?(/)?$}.match(target)
      return "https://github.com/#{m[:user]}/#{m[:repo]}/archive/master.tar.gz" if m

      # support for branch and commit urls
      m = %r{^https?://(www\.)?github\.com/(?<user>[\w-]+)/(?<repo>[\w-]+)/tree/(?<commit>[\w\.]+)(/)?$}.match(target)
      return "https://github.com/#{m[:user]}/#{m[:repo]}/archive/#{m[:commit]}.tar.gz" if m

      # if we could not find a match, return the original value
      target
    end

    MIME_TYPES = {
      'application/x-zip-compressed' => '.zip',
      'application/zip' => '.zip',
      'application/x-gzip' => '.tar.gz',
      'application/gzip' => '.tar.gz',
    }.freeze

    # download url into archive using opts,
    # returns File object and content-type from HTTP headers
    def self.download_archive(url, opts)
      remote = open(
        url,
        http_basic_authentication: [opts['user'] || '', opts['password'] || ''],
      )

      content_type = remote.meta['content-type']
      file_type = MIME_TYPES[content_type]

      # fall back to tar
      if file_type.nil?
        warn "Could not determine file type for content type #{content_type}."\
             'Defaulting to .tar.gz'
        file_type = 'tar.gz'
      end

      # download content
      archive = Tempfile.new(['inspec-dl-', file_type])
      archive.binmode
      archive.write(remote.read)
      archive.rewind
      archive.close
      archive
    end

    def self.resolve_url(url, opts)
      archive = download_archive(url, opts)
      # TODO: At the moment we hand over the Tempfile object. This is necessary,
      # since otherwise Ruby will delete the file on disk(!!) as soon as
      # this call is finished (due to garbage-collection of Tempfile).
      # Resolve this by handling over the intermediate resolver to the next.
      resolve_next(archive)
    end
  end
end
add url fetcher 2016-02-21 03:00:52 +00:00			`# encoding: utf-8`
			`# author: Dominik Richter`
			`# author: Christoph Hartmann`

			`require 'uri'`
			`require 'tempfile'`
			`require 'open-uri'`

			`module Fetchers`
			`class Url < Inspec.fetcher(1)`
			`name 'url'`
			`priority 200`

			`attr_reader :files`

			`def self.resolve(target, opts = {})`
			`uri = URI.parse(target)`
			`return nil if uri.nil? or uri.scheme.nil?`
			`return nil unless %{ http https }.include? uri.scheme`
			`target = transform(target)`
			`# TODO: for now, this can be much less strict now vv`
			`return nil unless target.end_with?('tar.gz', 'zip')`
			`resolve_url(target, opts)`
			`rescue URI::Error => _e`
			`nil`
			`end`

			`# Transforms a browser github url to github tar url`
			`# We distinguish between three different Github URL types:`
			`# - Master URL`
			`# - Branch URL`
			`# - Commit URL`
			`#`
			`# master url:`
			`# https://github.com/nathenharvey/tmp_compliance_profile/ is transformed to`
			`# https://github.com/nathenharvey/tmp_compliance_profile/archive/master.tar.gz`
			`#`
			`# github branch:`
			`# https://github.com/hardening-io/tests-os-hardening/tree/2.0 is transformed to`
			`# https://github.com/hardening-io/tests-os-hardening/archive/2.0.tar.gz`
			`#`
			`# github commit:`
			`# https://github.com/hardening-io/tests-os-hardening/tree/48bd4388ddffde68badd83aefa654e7af3231876`
			`# is transformed to`
			`# https://github.com/hardening-io/tests-os-hardening/archive/48bd4388ddffde68badd83aefa654e7af3231876.tar.gz`
			`def self.transform(target)`
			`# support for default github url`
			`m = %r{^https?://(www\.)?github\.com/(?<user>[\w-]+)/(?<repo>[\w-]+)(\.git)?(/)?$}.match(target)`
			`return "https://github.com/#{m[:user]}/#{m[:repo]}/archive/master.tar.gz" if m`

			`# support for branch and commit urls`
			`m = %r{^https?://(www\.)?github\.com/(?<user>[\w-]+)/(?<repo>[\w-]+)/tree/(?<commit>[\w\.]+)(/)?$}.match(target)`
			`return "https://github.com/#{m[:user]}/#{m[:repo]}/archive/#{m[:commit]}.tar.gz" if m`

			`# if we could not find a match, return the original value`
			`target`
			`end`

			`MIME_TYPES = {`
			`'application/x-zip-compressed' => '.zip',`
			`'application/zip' => '.zip',`
			`'application/x-gzip' => '.tar.gz',`
			`'application/gzip' => '.tar.gz',`
			`}.freeze`

			`# download url into archive using opts,`
			`# returns File object and content-type from HTTP headers`
			`def self.download_archive(url, opts)`
			`remote = open(`
			`url,`
			`http_basic_authentication: [opts['user'] \|\| '', opts['password'] \|\| ''],`
			`)`

			`content_type = remote.meta['content-type']`
			`file_type = MIME_TYPES[content_type]`

			`# fall back to tar`
			`if file_type.nil?`
			`warn "Could not determine file type for content type #{content_type}."\`
			`'Defaulting to .tar.gz'`
			`file_type = 'tar.gz'`
			`end`

			`# download content`
			`archive = Tempfile.new(['inspec-dl-', file_type])`
			`archive.binmode`
			`archive.write(remote.read)`
			`archive.rewind`
			`archive.close`
			`archive`
			`end`

			`def self.resolve_url(url, opts)`
			`archive = download_archive(url, opts)`
			`# TODO: At the moment we hand over the Tempfile object. This is necessary,`
			`# since otherwise Ruby will delete the file on disk(!!) as soon as`
			`# this call is finished (due to garbage-collection of Tempfile).`
			`# Resolve this by handling over the intermediate resolver to the next.`
			`resolve_next(archive)`
			`end`
			`end`
			`end`