Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ft/docx rendering, RE: #304 #347

Open
wants to merge 7 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Encode download url, fix tests and style updates.
  • Loading branch information
cslzchen authored and NyanHelsing committed Aug 3, 2018
commit 32bf62ce2d9a9e2beabf3f9d78aac06a9935a726
5 changes: 2 additions & 3 deletions mfr/core/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,9 +147,8 @@ def __init__(self, message, *args, metadata_url: str='', response: str='', **kwa


class QueryParameterError(ProviderError):
"""The MFR related errors raised from a :class:`mfr.core.provider` and relating to query parameters
should inherit from MetadataError
This error is thrown when a query parameter is used missused
"""The MFR related errors raised from a :class:`mfr.core.provider`and relating to query
parameters. This error is thrown when the query has an invalid value.
"""

__TYPE = 'query_parameter'
Expand Down
16 changes: 9 additions & 7 deletions mfr/core/provider.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import abc
import markupsafe

from aiohttp import HttpBadRequest
import furl
import markupsafe

from mfr.core import exceptions
from mfr.server import settings
from mfr.core import exceptions as core_exceptions
from mfr.core.metrics import MetricsRecord
from mfr.server import settings as server_settings


class BaseProvider(metaclass=abc.ABCMeta):
Expand All @@ -17,12 +18,13 @@ class BaseProvider(metaclass=abc.ABCMeta):
def __init__(self, request, url, action=None):
self.request = request
url_netloc = furl.furl(url).netloc
if url_netloc not in settings.ALLOWED_PROVIDER_NETLOCS:
raise exceptions.ProviderError(
if url_netloc not in server_settings.ALLOWED_PROVIDER_NETLOCS:
raise core_exceptions.ProviderError(
message="{} is not a permitted provider domain.".format(
markupsafe.escape(url_netloc)
),
code=400
# TODO: using HTTPStatus.BAD_REQUEST fails tests, not sure why and I will take a another look later
code=HttpBadRequest.code
)
self.url = url
self.action = action
Expand Down Expand Up @@ -63,9 +65,9 @@ def serialize(self):
return {
'name': self.name,
'ext': self.ext,
'is_public': self.is_public,
'content_type': self.content_type,
'unique_key': str(self.unique_key),
'download_url': str(self.download_url),
'stable_id': None if self.stable_id is None else str(self.stable_id),
'is_public': self.is_public,
}
8 changes: 5 additions & 3 deletions mfr/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,25 +80,27 @@ def make_renderer(name, metadata, file_path, url, assets_url, export_url):
normalized_name = (name and name.lower()) or 'none'
if metadata.is_public:
try:
# Use the public renderer if exist
return driver.DriverManager(
namespace='mfr.public_renderers',
name=normalized_name,
invoke_on_load=True,
invoke_args=(metadata, file_path, url, assets_url, export_url),
).driver
except:
# Check for a public renderer, if one doesn't exist, use a regular one
# Real exceptions handled by main driver.DriverManager
# If public render does not exist, use default renderer by MFR
# If public render exists but exceptions occurs, delay the exception handling
pass

try:
# Use the default MFR handler
return driver.DriverManager(
namespace='mfr.renderers',
name=normalized_name,
invoke_on_load=True,
invoke_args=(metadata, file_path, url, assets_url, export_url),
).driver
except RuntimeError:
except:
raise exceptions.MakeRendererError(
namespace='mfr.renderers',
name=normalized_name,
Expand Down
26 changes: 15 additions & 11 deletions mfr/extensions/office365/render.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
import os
from urllib import parse

import furl
from mako.lookup import TemplateLookup

from mfr.core import extension
from mako.lookup import TemplateLookup
from mfr.extensions.office365 import settings
from mfr.extensions.office365 import settings as office365_settings


class Office365Renderer(extension.BaseRenderer):
"""A renderer for use with public .docx files.
"""A renderer for .docx files that are publicly available.

Office online can render `.docx` files to `.pdf` for us. This renderer will only be made
if a query param with `public_file=true` is present. It then generates and embeds an
office online URL into an `iframe` and returns the template. The file it is trying to
render MUST be public.

Office online can render .docx files to pdf for us.
This renderer will only ever be made if a query param with `public_file=1` is sent.
It then generates and embeds an office online url into an
iframe and returns the template. The file it is trying to render MUST
be available publically online. This renderer will not work if testing locally.
Note: this renderer DOES NOT work locally.

"""

Expand All @@ -23,9 +26,10 @@ class Office365Renderer(extension.BaseRenderer):
]).get_template('viewer.mako')

def render(self):
download_url = furl.furl(self.metadata.download_url).set(query='')
url = settings.OFFICE_BASE_URL + download_url.url
return self.TEMPLATE.render(base=self.assets_url, url=url)
download_url = furl.furl(self.metadata.download_url).set(query='').url
encoded_download_url = parse.quote(download_url)
office_render_url = office365_settings.OFFICE_BASE_URL + encoded_download_url
return self.TEMPLATE.render(base=self.assets_url, url=office_render_url)

@property
def file_required(self):
Expand Down
55 changes: 26 additions & 29 deletions mfr/providers/osf/provider.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
import os
import json
import hashlib
from http import HTTPStatus
import logging
from urllib.parse import urlparse
import mimetypes
import os
from urllib.parse import urlparse

import furl
import aiohttp
from aiohttp.errors import ContentEncodingError

from waterbutler.core import streams

from mfr.core import exceptions
from mfr.core import provider
from mfr.core.utils import sizeof_fmt
from mfr.providers.osf import settings
from mfr.settings import MAX_FILE_SIZE_TO_RENDER
from mfr.core.exceptions import TooBigToRenderError
from mfr import settings as mfr_settings
from mfr.core import exceptions as core_exceptions
from mfr.core import provider, utils
from mfr.providers.osf import settings as provider_settings

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -79,14 +78,14 @@ async def metadata(self):
response_reason = metadata_response.reason
response_headers = metadata_response.headers
await metadata_response.release()
if response_code != 200:
raise exceptions.MetadataError(
if response_code != HTTPStatus.OK:
raise core_exceptions.MetadataError(
'Failed to fetch file metadata from WaterButler. Received response: ',
'code {} {}'.format(str(response_code), str(response_reason)),
metadata_url=download_url,
response=response_reason,
provider=self.NAME,
code=400
code=HTTPStatus.BAD_REQUEST
)

try:
Expand All @@ -111,12 +110,12 @@ async def metadata(self):
name, ext = os.path.splitext(metadata['data']['name'])
size = metadata['data']['size']

max_file_size = MAX_FILE_SIZE_TO_RENDER.get(ext)
max_file_size = mfr_settings.MAX_FILE_SIZE_TO_RENDER.get(ext)
if max_file_size and size and int(size) > max_file_size:
raise TooBigToRenderError(
raise core_exceptions.TooBigToRenderError(
"This file with extension '{ext}' exceeds the size limit of {max_size} and will not "
"be rendered. To view this file download it and view it "
"offline.".format(ext=ext, max_size=sizeof_fmt(max_file_size)),
"offline.".format(ext=ext, max_size=utils.sizeof_fmt(max_file_size)),
requested_size=int(size), maximum_size=max_file_size,
)

Expand All @@ -131,18 +130,16 @@ async def metadata(self):
stable_id = hashlib.sha256(stable_str.encode('utf-8')).hexdigest()
logger.debug('stable_identifier: str({}) hash({})'.format(stable_str, stable_id))
is_public = False

if 'public_file' in cleaned_url.args:
if cleaned_url.args['public_file'] not in ['0', '1']:
raise exceptions.QueryParameterError(
'The `public_file` query paramter should either `0`, `1`, or unused. Instead '
'got {}'.format(cleaned_url.args['public_file']),
public_file = cleaned_url.args.get('public_file', None)
if public_file:
if public_file not in ['True', 'False']:
raise core_exceptions.QueryParameterError(
'Invalid value for query parameter `public_file`: {}'.format(cleaned_url.args['public_file']),
url=download_url,
provider=self.NAME,
code=400,
code=HTTPStatus.BAD_REQUEST,
)

is_public = cleaned_url.args['public_file'] == '1'
is_public = public_file == 'True'

return provider.ProviderMetadata(
name,
Expand All @@ -157,21 +154,21 @@ async def metadata(self):
async def download(self):
"""Download file from WaterButler, returning stream."""
download_url = await self._fetch_download_url()
headers = {settings.MFR_IDENTIFYING_HEADER: '1'}
headers = {provider_settings.MFR_IDENTIFYING_HEADER: '1'}
response = await self._make_request('GET', download_url, allow_redirects=False, headers=headers)

if response.status >= 400:
if response.status >= HTTPStatus.BAD_REQUEST:
resp_text = await response.text()
logger.error('Unable to download file: ({}) {}'.format(response.status, resp_text))
raise exceptions.DownloadError(
raise core_exceptions.DownloadError(
'Unable to download the requested file, please try again later.',
download_url=download_url,
response=resp_text,
provider=self.NAME,
)

self.metrics.add('download.saw_redirect', False)
if response.status in (302, 301):
if response.status in (HTTPStatus.MOVED_PERMANENTLY, HTTPStatus.FOUND):
await response.release()
response = await aiohttp.request('GET', response.headers['location'])
self.metrics.add('download.saw_redirect', True)
Expand Down Expand Up @@ -204,8 +201,8 @@ async def _fetch_download_url(self):
await request.release()

logger.debug('osf-download-resolver: request.status::{}'.format(request.status))
if request.status != 302:
raise exceptions.MetadataError(
if request.status != HTTPStatus.FOUND:
raise core_exceptions.MetadataError(
request.reason,
metadata_url=self.url,
provider=self.NAME,
Expand Down
21 changes: 14 additions & 7 deletions tests/extensions/office365/test_renderer.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,23 @@
from urllib import parse

import furl
import pytest

from mfr.extensions.office365 import settings
from mfr.core.provider import ProviderMetadata
from mfr.extensions.office365 import Office365Renderer
from mfr.extensions.office365 import settings as office365_settings


@pytest.fixture
def metadata():
return ProviderMetadata('test', '.pdf', 'text/plain', '1234',
return ProviderMetadata(
'test',
'.pdf',
'text/plain',
'1234',
'http://wb.osf.io/file/test.pdf?token=1234&public_file=1',
is_public=True)
is_public=True
)


@pytest.fixture
Expand All @@ -20,7 +27,7 @@ def file_path():

@pytest.fixture
def url():
return 'http://osf.io/file/test.pdf'
return parse.quote('http://osf.io/file/test.pdf')


@pytest.fixture
Expand All @@ -40,8 +47,8 @@ def renderer(metadata, file_path, url, assets_url, export_url):

class TestOffice365Renderer:

def test_render_pdf(self, renderer, metadata, assets_url):
def test_render_pdf(self, renderer, metadata):
download_url = furl.furl(metadata.download_url).set(query='')
body_url = settings.OFFICE_BASE_URL + download_url.url
office_render_url = office365_settings.OFFICE_BASE_URL + parse.quote(download_url.url)
body = renderer.render()
assert '<iframe src={} frameborder=\'0\'></iframe>'.format(body_url) in body
assert '<iframe src={} frameborder=\'0\'></iframe>'.format(office_render_url) in body
2 changes: 1 addition & 1 deletion tests/server/handlers/test_query_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from tests import utils


class TestRenderHandler(utils.HandlerTestCase):
class TestQueryParamsHandler(utils.HandlerTestCase):

@testing.gen_test
def test_format_url(self):
Expand Down