Avoid spurious downloads during dependency management

Before, a URL based source might be downloaded multiple times during the
dependency fetching and lockfile creation. This commit tries to avoid
this by:

1) Memoizing data about the archive to avoid re-fetching the archive

2) Adding a CachedFetcher wrapper around the fetcher class to help
ensure that callers always consult the cache before fetching.

Signed-off-by: Steven Danna <steve@chef.io>
This commit is contained in:
Steven Danna 2016-09-22 11:23:32 +01:00 committed by Christoph Hartmann
parent 6190e4f828
commit 50b27c4b32
5 changed files with 111 additions and 77 deletions

View file

@ -85,21 +85,12 @@ module Fetchers
@archive_path ||= download_archive(path)
end
def sha256
c = if @archive_path
File.read(@archive_path)
else
content
end
Digest::SHA256.hexdigest c
end
def resolved_source
@resolved_source ||= { url: @target, sha256: sha256 }
end
def cache_key
sha256
@archive_shasum ||= sha256
end
def to_s
@ -108,16 +99,9 @@ module Fetchers
private
def open_target
Inspec::Log.debug("Fetching URL: #{@target}")
http_opts = {}
http_opts['ssl_verify_mode'.to_sym] = OpenSSL::SSL::VERIFY_NONE if @insecure
http_opts['Authorization'] = "Bearer #{@token}" if @token
open(@target, http_opts)
end
def content
open_target.read
def sha256
file = @archive_path || temp_archive_path
Digest::SHA256.hexdigest File.read(file)
end
def file_type_from_remote(remote)
@ -132,20 +116,34 @@ module Fetchers
file_type
end
# download url into archive using opts,
# returns File object and content-type from HTTP headers
def download_archive(path)
remote = open_target
file_type = file_type_from_remote(remote)
final_path = "#{path}#{file_type}"
# download content
archive = Tempfile.new(['inspec-dl-', file_type])
def temp_archive_path
@temp_archive_path ||= download_archive_to_temp
end
# Downloads archive to temporary file with side effect :( of setting @archive_type
def download_archive_to_temp
return @temp_archive_path if ! @temp_archive_path.nil?
Inspec::Log.debug("Fetching URL: #{@target}")
http_opts = {}
http_opts['ssl_verify_mode'.to_sym] = OpenSSL::SSL::VERIFY_NONE if @insecure
http_opts['Authorization'] = "Bearer #{@token}" if @token
remote = open(@target, http_opts)
@archive_type = file_type_from_remote(remote) # side effect :(
archive = Tempfile.new(['inspec-dl-', @archive_type])
archive.binmode
archive.write(remote.read)
archive.rewind
archive.close
FileUtils.mv(archive.path, final_path)
Inspec::Log.debug("Archive stored at temporary location: #{archive.path}")
@temp_archive_path = archive.path
end
def download_archive(path)
download_archive_to_temp
final_path = "#{path}#{@archive_type}"
FileUtils.mv(temp_archive_path, final_path)
Inspec::Log.debug("Fetched archive moved to: #{final_path}")
@temp_archive_path = nil
final_path
end
end

View file

@ -0,0 +1,67 @@
# encoding: utf-8
require 'inspec/fetcher'
require 'forwardable'
module Inspec
class CachedFetcher
extend Forwardable
attr_reader :cache, :target, :fetcher
def initialize(target, cache)
@target = target
@fetcher = Inspec::Fetcher.resolve(target)
if @fetcher.nil?
fail("Could not fetch inspec profile in #{target.inspect}.")
end
@cache = cache
end
def resolved_source
fetch
@fetcher.resolved_source
end
def cache_key
k = if target.is_a?(Hash)
target[:sha256] || target[:ref]
end
if k.nil?
fetcher.cache_key
else
k
end
end
def fetch
if cache.exists?(cache_key)
Inspec::Log.debug "Using cached dependency for #{target}"
[cache.prefered_entry_for(cache_key), false]
else
Inspec::Log.debug "Dependency does not exist in the cache #{target}"
fetcher.fetch(cache.base_path_for(fetcher.cache_key))
assert_cache_sanity!
[fetcher.archive_path, fetcher.writable?]
end
end
def assert_cache_sanity!
if target.respond_to?(:key?) && target.key?(:sha256)
if fetcher.resolved_source[:sha256] != target[:sha256]
fail <<EOF
The remote source #{fetcher} no longer has the requested content:
Request Content Hash: #{target[:sha256]}
Actual Content Hash: #{fetcher.resolved_source[:sha256]}
For URL, supermarket, compliance, and other sources that do not
provide versioned artifacts, this likely means that the remote source
has changed since your lockfile was generated.
EOF
end
end
end
end
end

View file

@ -1,5 +1,5 @@
# encoding: utf-8
require 'inspec/fetcher'
require 'inspec/cached_fetcher'
require 'inspec/dependencies/dependency_set'
require 'digest'
@ -82,9 +82,7 @@ module Inspec
end
def fetcher
@fetcher ||= Inspec::Fetcher.resolve(opts)
fail "No fetcher for #{name} (options: #{opts})" if @fetcher.nil?
@fetcher
@fetcher ||= Inspec::CachedFetcher.new(opts, @cache)
end
def dependencies
@ -94,17 +92,18 @@ module Inspec
end
def to_s
"#{name ? name : '<unfetched>'} (#{resolved_source})"
name
end
def profile
return @profile if ! @profile.nil?
opts = @opts.dup
opts[:cache] = @cache
opts[:backend] = @backend
if !@dependencies.nil?
opts[:dependencies] = Inspec::DependencySet.from_array(@dependencies, @cwd, @cache, @backend)
end
@profile ||= Inspec::Profile.for_target(opts, opts)
@profile = Inspec::Profile.for_fetcher(fetcher, opts)
end
end
end

View file

@ -24,6 +24,8 @@ module Inspec
Inspec::Fetcher
end
attr_accessor :target
def writable?
false
end

View file

@ -5,7 +5,7 @@
require 'forwardable'
require 'inspec/polyfill'
require 'inspec/fetcher'
require 'inspec/cached_fetcher'
require 'inspec/file_provider'
require 'inspec/source_reader'
require 'inspec/metadata'
@ -21,45 +21,8 @@ module Inspec
class Profile # rubocop:disable Metrics/ClassLength
extend Forwardable
#
# TODO: This function is getting pretty gross.
#
def self.resolve_target(target, cache = nil)
cache ||= Cache.new
fetcher = Inspec::Fetcher.resolve(target)
if fetcher.nil?
fail("Could not fetch inspec profile in #{target.inspect}.")
end
cache_key = if target.is_a?(Hash)
target[:sha256] || target[:ref] || fetcher.cache_key
else
fetcher.cache_key
end
if cache.exists?(cache_key)
Inspec::Log.debug "Using cached dependency for #{target}"
[cache.prefered_entry_for(cache_key), false]
else
fetcher.fetch(cache.base_path_for(fetcher.cache_key))
if target.respond_to?(:key?) && target.key?(:sha256)
if fetcher.resolved_source[:sha256] != target[:sha256]
fail <<EOF
The remote source #{fetcher} no longer has the requested content:
Request Content Hash: #{target[:sha256]}
Actual Content Hash: #{fetcher.resolved_source[:sha256]}
For URL, supermarket, compliance, and other sources that do not
provide versioned artifacts, this likely means that the remote source
has changed since your lockfile was generated.
EOF
end
end
[fetcher.archive_path, fetcher.writable?]
end
Inspec::CachedFetcher.new(target, cache || Cache.new)
end
def self.for_path(path, opts)
@ -72,9 +35,14 @@ EOF
new(reader, opts)
end
def self.for_fetcher(fetcher, opts)
path, writable = fetcher.fetch
for_path(path, opts.merge(target: fetcher.target, writable: writable))
end
def self.for_target(target, opts = {})
path, writable = resolve_target(target, opts[:cache])
for_path(path, opts.merge(target: target, writable: writable))
fetcher = resolve_target(target, opts[:cache])
for_fetcher(fetcher, opts)
end
attr_reader :source_reader, :backend, :runner_context