Skip to content

Commit

Permalink
move everything out of legacy folder
Browse files Browse the repository at this point in the history
  • Loading branch information
pirate committed Apr 27, 2019
1 parent 553f312 commit 1b8abc0
Show file tree
Hide file tree
Showing 74 changed files with 3,155 additions and 2,622 deletions.
3 changes: 3 additions & 0 deletions archivebox/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
__package__ = 'archivebox'

from . import core
from . import cli

from .main import *
9 changes: 7 additions & 2 deletions archivebox/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,14 @@

__package__ = 'archivebox'

from .cli.archivebox import main
import sys
from .cli import archivebox


def main():
archivebox.main(args=sys.argv[1:], stdin=sys.stdin)


if __name__ == '__main__':
main()
archivebox.main(args=sys.argv[1:], stdin=sys.stdin)

15 changes: 11 additions & 4 deletions archivebox/cli/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,17 @@

import os

from typing import Dict
from typing import Dict, List, Optional, IO
from importlib import import_module

CLI_DIR = os.path.dirname(os.path.abspath(__file__))

# these common commands will appear sorted before any others for ease-of-use
display_first = ('help', 'version', 'init', 'info', 'config', 'list', 'update', 'add', 'remove')
meta_cmds = ('help', 'version')
main_cmds = ('init', 'info', 'config')
archive_cmds = ('add', 'remove', 'update', 'list')

display_first = (*meta_cmds, *main_cmds, *archive_cmds)

# every imported command module must have these properties in order to be valid
required_attrs = ('__package__', '__command__', 'main')
Expand Down Expand Up @@ -42,11 +46,14 @@ def list_subcommands() -> Dict[str, str]:
return dict(sorted(COMMANDS, key=display_order))


def run_subcommand(subcommand: str, args=None) -> None:
def run_subcommand(subcommand: str,
subcommand_args: List[str]=None,
stdin: Optional[IO]=None,
pwd: Optional[str]=None) -> None:
"""run a given ArchiveBox subcommand with the given list of args"""

module = import_module('.archivebox_{}'.format(subcommand), __package__)
module.main(args) # type: ignore
module.main(args=subcommand_args, stdin=stdin, pwd=pwd) # type: ignore


SUBCOMMANDS = list_subcommands()
Expand Down
60 changes: 14 additions & 46 deletions archivebox/cli/archivebox.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,17 @@
__command__ = 'archivebox'
__description__ = 'ArchiveBox: The self-hosted internet archive.'

import os
import sys
import argparse

from . import list_subcommands, run_subcommand
from ..legacy.config import OUTPUT_DIR
from typing import Optional, List, IO

from . import list_subcommands, run_subcommand
from ..config import OUTPUT_DIR

def parse_args(args=None):
args = sys.argv[1:] if args is None else args

def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
subcommands = list_subcommands()

parser = argparse.ArgumentParser(
prog=__command__,
description=__description__,
Expand All @@ -43,54 +41,24 @@ def parse_args(args=None):
default=None,
)
parser.add_argument(
"args",
"subcommand_args",
help="Arguments for the subcommand",
nargs=argparse.REMAINDER,
)

command = parser.parse_args(args)
command = parser.parse_args(args or ())

if command.help:
if command.help or command.subcommand is None:
command.subcommand = 'help'
if command.version:
command.subcommand = 'version'

# print('--------------------------------------------')
# print('Command: ', sys.argv[0])
# print('Subcommand: ', command.subcommand)
# print('Args to pass:', args[1:])
# print('--------------------------------------------')

return command.subcommand, command.args


def print_import_tutorial():
print('Welcome to ArchiveBox!')
print()
print('To import an existing archive (from a previous version of ArchiveBox):')
print(' 1. cd into your data dir OUTPUT_DIR (usually ArchiveBox/output) and run:')
print(' 2. archivebox init')
print()
print('To start a new archive:')
print(' 1. Create an emptry directory, then cd into it and run:')
print(' 2. archivebox init')
print()
print('For more information, see the migration docs here:')
print(' https://github.com/pirate/ArchiveBox/wiki/Migration')

def main(args=None):
subcommand, subcommand_args = parse_args(args)
existing_index = os.path.exists(os.path.join(OUTPUT_DIR, 'index.json'))

if subcommand is None:
if existing_index:
run_subcommand('help', subcommand_args)
else:
print_import_tutorial()
raise SystemExit(0)
run_subcommand(
subcommand=command.subcommand,
subcommand_args=command.subcommand_args,
stdin=stdin,
pwd=pwd or OUTPUT_DIR,
)

run_subcommand(subcommand, subcommand_args)


if __name__ == '__main__':
main()
main(args=sys.argv[1:], stdin=sys.stdin)
107 changes: 46 additions & 61 deletions archivebox/cli/archivebox_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,90 +7,75 @@
import sys
import argparse

from typing import List, Optional
from typing import List, Optional, IO

from ..legacy.config import stderr, check_dependencies, check_data_folder
from ..legacy.util import (
handle_stdin_import,
handle_file_import,
)
from ..legacy.main import update_archive_data
from ..main import add
from ..util import SmartFormatter, accept_stdin
from ..config import OUTPUT_DIR, ONLY_NEW


def main(args: List[str]=None, stdin: Optional[str]=None) -> None:
check_data_folder()

args = sys.argv[1:] if args is None else args

def main(args: Optional[List[str]]=None, stdin: Optional[IO]=None, pwd: Optional[str]=None) -> None:
parser = argparse.ArgumentParser(
prog=__command__,
description=__description__,
add_help=True,
formatter_class=SmartFormatter,
)
# parser.add_argument(
# '--depth', #'-d',
# type=int,
# help='Recursively archive all linked pages up to this many hops away',
# default=0,
# )
parser.add_argument(
'--only-new', #'-n',
'--update-all', #'-n',
action='store_true',
help="Don't attempt to retry previously skipped/failed links when updating",
default=not ONLY_NEW,
help="Also retry previously skipped/failed links when adding new links",
)
parser.add_argument(
'--index-only', #'-o',
action='store_true',
help="Add the links to the main index without archiving them",
)
# parser.add_argument(
# '--mirror', #'-m',
# action='store_true',
# help='Archive an entire site (finding all linked pages below it on the same domain)',
# )
# parser.add_argument(
# '--crawler', #'-r',
# choices=('depth_first', 'breadth_first'),
# help='Controls which crawler to use in order to find outlinks in a given page',
# default=None,
# )
parser.add_argument(
'url',
'import_path',
nargs='?',
type=str,
default=None,
help='URL of page to archive (or path to local file)'
help=(
'URL or path to local file containing a list of links to import. e.g.:\n'
' https://getpocket.com/users/USERNAME/feed/all\n'
' https://example.com/some/rss/feed.xml\n'
' ~/Downloads/firefox_bookmarks_export.html\n'
' ~/Desktop/sites_list.csv\n'
)
)
command = parser.parse_args(args)

check_dependencies()

### Handle ingesting urls piped in through stdin
# (.e.g if user does cat example_urls.txt | archivebox add)
import_path = None
if stdin or not sys.stdin.isatty():
stdin_raw_text = stdin or sys.stdin.read()
if stdin_raw_text and command.url:
stderr(
'[X] You should pass either a path as an argument, '
'or pass a list of links via stdin, but not both.\n'
)
raise SystemExit(1)

import_path = handle_stdin_import(stdin_raw_text)

### Handle ingesting url from a remote file/feed
# (e.g. if an RSS feed URL is used as the import path)
elif command.url:
import_path = handle_file_import(command.url)

update_archive_data(
import_path=import_path,
resume=None,
only_new=command.only_new,
command = parser.parse_args(args or ())
import_str = accept_stdin(stdin)
add(
import_str=import_str,
import_path=command.import_path,
update_all=command.update_all,
index_only=command.index_only,
out_dir=pwd or OUTPUT_DIR,
)


if __name__ == '__main__':
main()
main(args=sys.argv[1:], stdin=sys.stdin)


# TODO: Implement these
#
# parser.add_argument(
# '--depth', #'-d',
# type=int,
# help='Recursively archive all linked pages up to this many hops away',
# default=0,
# )
# parser.add_argument(
# '--mirror', #'-m',
# action='store_true',
# help='Archive an entire site (finding all linked pages below it on the same domain)',
# )
# parser.add_argument(
# '--crawler', #'-r',
# choices=('depth_first', 'breadth_first'),
# help='Controls which crawler to use in order to find outlinks in a given page',
# default=None,
# )
Loading

0 comments on commit 1b8abc0

Please sign in to comment.