Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Use Retrievers for fetching remote / local data #104

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Introduce Retrievers
They are fluffy and can fetch stuff to include in ZIPs
  • Loading branch information
julik committed Mar 20, 2024
commit 778dfd55840f279c45fe37439d21871df100227e
8 changes: 4 additions & 4 deletions lib/zipline.rb
Original file line number Diff line number Diff line change
@@ -1,9 +1,5 @@
require "zip_kit"

require_relative "zipline/version"
require_relative "zipline/zip_handler"
require_relative "zipline/retrievers"

# class MyController < ApplicationController
# include Zipline
# def index
Expand All @@ -13,6 +9,10 @@
# end
# end
module Zipline
require_relative "zipline/version"
require_relative "zipline/zip_handler"
require_relative "zipline/retrievers"

def self.included(into_controller)
into_controller.include(ZipKit::RailsStreaming)
super
Expand Down
126 changes: 126 additions & 0 deletions lib/zipline/retrievers.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
class Zipline::IORetriever
def self.build_for(item)
return new(item) if item.respond_to?(:read) && item.respond_to?(:read_nonblock)
end

def initialize(io)
@io = io
end

def each_chunk
chunk_size = 1024
while (bytes = @io.read(chunk_size))
yield(bytes)
end
end
end

class Zipline::FileRetriever < Zipline::IORetriever
def self.build_for(item)
return super(item) if item.is_a?(File)
end

def each_chunk(&blk)
@io.rewind
super(&blk)
ensure
@io.close
end
end

class Zipline::HTTPRetriever
def self.build_for(url)
return unless item && item.is_a?(String) && item.start_with?("http")
new(item)
end

def initialize(url)
@uri = URI(url)
end

def each_chunk(&block)
Net::HTTP.get_response(@uri) do |response|
response.read_body(&block)
end
end

def may_restart_after?(e)
# Server error, IO error etc
false
end
end

class Zipline::StringRetriever
def self.build_for(item)
return unless item.is_a?(String)
new(item)
end

def initialize(string)
@string = string
end

def each_chunk
chunk_size = 1024
offset = 0
loop do
bytes = @string.byteslice(offset, chunk_size)
offset += chunk_size
break if bytes.nil?
yield(bytes)
end
end

def may_restart_after?(e)
false
end
end

class Zipline::CarrierwaveRetriever
def self.build_for(item)
if defined?(CarrierWave::Storage::Fog::File) && item.is_a?(CarrierWave::Storage::Fog::File)
return Zipline::HTTPRetriever.new(item.url)
end
end
end

class Zipline::ActiveStorageRetriever
def self.build_for(item)
return unless defined?(ActiveStorage)
return new(item.blob) if is_active_storage_attachment?(item) || is_active_storage_one?(item)
return new(item) if is_active_storage_blob?(item)
nil
end


def self.is_active_storage_attachment?(item)
defined?(ActiveStorage::Attachment) && item.is_a?(ActiveStorage::Attachment)
end

def self.is_active_storage_one?(item)
defined?(ActiveStorage::Attached::One) && item.is_a?(ActiveStorage::Attached::One)
end

def self.is_active_storage_blob?(item)
defined?(ActiveStorage::Blob) && item.is_a?(ActiveStorage::Blob)
end

def initialize(blob)
@blob = blob
end

def each_chunk(&block)
@blob.download(&block)
end
end

class Zipline::PaperclipRetriever
def self.build_for(item)
return unless defined?(Paperclip) && item.is_a?(Paperclip::Attachment)
if item.options[:storage] == :filesystem
Zipline::FileRetriever.build_for(File.open(item.path, "rb"))
else
Zipline::HTTPRetriever.build_for(file.expiring_url)
end
end
end
134 changes: 38 additions & 96 deletions lib/zipline/zip_handler.rb
Original file line number Diff line number Diff line change
@@ -1,102 +1,44 @@
module Zipline
class ZipHandler
# takes an array of pairs [[uploader, filename], ... ]
def initialize(streamer, logger)
@streamer = streamer
@logger = logger
end

def handle_file(file, name, options)
normalized_file = normalize(file)
write_file(normalized_file, name, options)
rescue => e
# Since most APM packages do not trace errors occurring within streaming
# Rack bodies, it can be helpful to print the error to the Rails log at least
error_message = "zipline: an exception (#{e.inspect}) was raised when serving the ZIP body."
error_message += " The error occurred when handling file #{name.inspect}"
@logger&.error(error_message)
raise
end

# This extracts either a url or a local file from the provided file.
# Currently support carrierwave and paperclip local and remote storage.
# returns a hash of the form {url: aUrl} or {file: anIoObject}
def normalize(file)
if defined?(CarrierWave::Uploader::Base) && file.is_a?(CarrierWave::Uploader::Base)
file = file.file
end

if defined?(Paperclip) && file.is_a?(Paperclip::Attachment)
if file.options[:storage] == :filesystem
{file: File.open(file.path)}
else
{url: file.expiring_url}
end
elsif defined?(CarrierWave::Storage::Fog::File) && file.is_a?(CarrierWave::Storage::Fog::File)
{url: file.url}
elsif defined?(CarrierWave::SanitizedFile) && file.is_a?(CarrierWave::SanitizedFile)
{file: File.open(file.path)}
elsif is_io?(file)
{file: file}
elsif defined?(ActiveStorage::Blob) && file.is_a?(ActiveStorage::Blob)
{blob: file}
elsif is_active_storage_attachment?(file) || is_active_storage_one?(file)
{blob: file.blob}
elsif file.respond_to? :url
{url: file.url}
elsif file.respond_to? :path
{file: File.open(file.path)}
elsif file.respond_to? :file
{file: File.open(file.file)}
elsif is_url?(file)
{url: file}
else
raise(ArgumentError, "Bad File/Stream")
end
end

def write_file(file, name, options)
@streamer.write_file(name, **options.slice(:modification_time)) do |writer_for_file|
if file[:url]
the_remote_uri = URI(file[:url])
class Zipline::ZipHandler
def initialize(streamer, logger)
@streamer = streamer
@logger = logger
end

def handle_file(file, name, options)
write_item(file, name, options)
rescue => e
# Since most APM packages do not trace errors occurring within streaming
# Rack bodies, it can be helpful to print the error to the Rails log at least
error_message = "zipline: an exception (#{e.inspect}) was raised when serving the ZIP body."
error_message += " The error occurred when handling file #{name.inspect}"
@logger&.error(error_message)
raise
end

Net::HTTP.get_response(the_remote_uri) do |response|
response.read_body do |chunk|
writer_for_file << chunk
end
end
elsif file[:file]
IO.copy_stream(file[:file], writer_for_file)
file[:file].close
elsif file[:blob]
file[:blob].download { |chunk| writer_for_file << chunk }
else
raise(ArgumentError, "Bad File/Stream")
end
def write_item(item, name, options)
retriever = pick_retriever_for(item)
@streamer.write_file(name, **options.slice(:modification_time)) do |writer_for_file|
retriever.each_chunk do |bytes|
writer_for_file << bytes
end
end
end

private

def is_io?(io_ish)
io_ish.respond_to? :read
end

def is_active_storage_attachment?(file)
defined?(ActiveStorage::Attachment) && file.is_a?(ActiveStorage::Attachment)
end

def is_active_storage_one?(file)
defined?(ActiveStorage::Attached::One) && file.is_a?(ActiveStorage::Attached::One)
end

def is_url?(url)
url = begin
URI.parse(url)
rescue
false
end
url.is_a?(URI::HTTP) || url.is_a?(URI::HTTPS)
end
def pick_retriever_for(item)
retriever_classes = [
Zipline::CarrierwaveRetriever,
Zipline::ActiveStorageRetriever,
Zipline::PaperclipRetriever,
Zipline::FileRetriever,
Zipline::IORetriever,
Zipline::HTTPRetriever,
Zipline::StringRetriever,
]
retriever_classes.each do |retriever_class|
maybe_retriever = retriever_class.build_for(item)
return maybe_retriever if maybe_retriever
end

raise "Don't know how to handle a file in the shape of #{file_argument.inspect}" unless retriever
end
end