Merge pull request #319 from amberzw/flask

Paging, filtering, and sorting methods
amberzw · Sep 6, 2017 · bd28b76 · bd28b76
2 parents 26c650d + a89ab76
commit bd28b76
Show file tree

Hide file tree

Showing 6 changed files with 438 additions and 53 deletions.
diff --git a/pvacseq/server/config/swagger.yaml b/pvacseq/server/config/swagger.yaml
@@ -37,13 +37,51 @@ paths:
         items:
           type: "string"
         default: "default"
+      - name: "filters"
+        in: "query"
+        description: "Array of filters to be applied. ex: size>200"
+        required: false
+        type: "array"
+        items:
+          type: "string"
+        collectionFormat: 'csv'
+        default: "none"
+      - name: "sorting"
+        in: "query"
+        description: "Ordered array of ways to sort data, + before property name\
+        indicating ascending, - indicating descending. ex: +size,-fileID"
+        required: false
+        type: "array"
+        items:
+          type: "string"
+        collectionFormat: 'csv'
+        default: "none"
+      - name: "page"
+        in: "query"
+        description: "Page of results to return"
+        required: false
+        type: "integer"
+        default: 1
+      - name: "count"
+        in: "query"
+        description: "Number of results to return per page"
+        required: false
+        type: "integer"
+        default: 10
       responses:
         200:
-          description: "An array of results"
+          description: "An object containing metadata and an array of results"
           schema:
             type: "array"
             items:
-              $ref: "#/definitions/Result"
+              type: "object"
+              properties:
+                _meta:
+                  $ref: "#/definitions/MetaData"
+                results:
+                  type: "array"
+                  items:
+                    $ref: "#/definitions/Result"
         default:
           description: "Unexpected error"
           schema:
@@ -60,7 +98,7 @@ paths:
       operationId: "pvacseq.server.controllers.files.list_input"
       responses:
         200:
-          description: "An array of input files"
+          description: "An object containing metadata and an array of input files"
           schema:
             type: "array"
             items:
@@ -559,14 +597,53 @@ paths:
       summary: "Get list of running processes"
       description: "This endpoint returns a list of active pvacseq runs\n"
       operationId: "pvacseq.server.controllers.processes.processes"
+      parameters:
+      - name: "filters"
+        in: "query"
+        description: "Array of filters to be applied. ex: size>200"
+        required: false
+        type: "array"
+        items:
+          type: "string"
+        collectionFormat: 'csv'
+        default: "none"
+      - name: "sorting"
+        in: "query"
+        description: "Ordered array of ways to sort data, + before property name\
+        indicating ascending, - indicating descending. ex: +size,-fileID"
+        required: false
+        type: "array"
+        items:
+          type: "string"
+        collectionFormat: 'csv'
+        default: "none"
+      - name: "page"
+        in: "query"
+        description: "Page of information to return"
+        required: false
+        type: "integer"
+        default: 1
+      - name: "count"
+        in: "query"
+        description: "Number of objects to return per page"
+        required: false
+        type: "integer"
+        default: 10
       responses:
         200:
-          description: "A list of active processes"
+          description: "An object containing metadata and a list of active processes"
           schema:
             type: "array"
-            items: {
-              $ref: "#/definitions/ProcessSummary"
-            }
+            items:
+              type: "object"
+              properties:
+                _meta:
+                  $ref: "#/definitions/MetaData"
+                results:
+                  type: "array"
+                  items: {
+                    $ref: "#/definitions/ProcessSummary"
+                  }
         default:
           description: "Unexpected error"
           schema:
@@ -678,11 +755,6 @@ paths:
           description: "Returns object mapping each algorithm to an array of valid alleles for that algorithm"
           schema:
             type: "object"
-            properties:
-              default:
-                $ref: "#/definitions/MapItem"
-            additionalProperties:
-              $ref: "#/definitions/MapItem"
         default:
           description: "Unexpected error"
           schema:
@@ -745,7 +817,7 @@ paths:
       operationId: "pvacseq.server.controllers.files.list_dropbox"
       responses:
         200:
-          description: "Returns a list of files in the dropbox"
+          description: "An object containing metadata and a list of files in the dropbox"
           schema:
             type: "array"
             items:
@@ -1093,16 +1165,21 @@ definitions:
       description:
         type: "string"
         description: "A description of the file's contents in the context of pVAC-Seq"
-  MapItem:
+  MetaData:
     type: "object"
     properties:
-      algorithm:
-        type: "string"
-      alleles:
-        type: "array"
-        items: {
-          type: "string"
-        }
+      current_page:
+        type: "integer"
+        description: "The page of information being returned"
+      count:
+        type: "integer"
+        description: "The number of items to return per page"
+      total_pages:
+        type: "integer"
+        description: "The total number of pages of data"
+      total_count:
+        type: "integer"
+        description: "The total number of items"
   Error:
     type: "object"
     properties:

diff --git a/pvacseq/server/controllers/database.py b/pvacseq/server/controllers/database.py
@@ -17,7 +17,6 @@
 NA_pattern = re.compile(r'^NA$')
 queryfilters = re.compile(r'(.+)(<=?|>=?|!=|==)(.+)')
 
-
 def init_column_mapping(row, schema):
     """Generate initial estimates of column data types"""
     defs = {column_filter(col): 'text' for col in row}

diff --git a/pvacseq/server/controllers/files.py b/pvacseq/server/controllers/files.py
@@ -5,12 +5,12 @@
 import subprocess
 from .processes import fetch_process, is_running
 from .database import filterfile
-from .utils import descriptions, column_filter
+from .utils import descriptions, column_filter, filterdata, sort, fullresponse
 
 # details for each file to be appended to the output of results_get
 def resultfile(id, process, fileID):
     return({
-        'fileID':fileID,
+        'fileID':int(fileID),
         'description':process[0]['files'][fileID]['description'],
         'display_name':process[0]['files'][fileID]['display_name'],
         'url':'/api/v1/processes/%d/results/%s'%(id, fileID),
@@ -22,7 +22,7 @@ def resultfile(id, process, fileID):
         ]).decode().split()[0])-1,
     })
 
-def results_get(id, type):
+def results_get(id, type, filters, sorting, page, count):
     """Get the list of result files from a specific pVAC-Seq run"""
     data = current_app.config['storage']['loader']()
     if id == -1:
@@ -49,7 +49,7 @@ def results_get(id, type):
             for fileID in process[0]['files']:
                 if (re.search('%s.tsv'%filter, process[0]['files'][fileID]['display_name'])):
                     output.append(resultfile(id,process,fileID))
-    return output
+    return filterdata(output, filters, sorting, page, count)
 
 
 def list_input(path = None):

diff --git a/pvacseq/server/controllers/processes.py b/pvacseq/server/controllers/processes.py
@@ -6,7 +6,7 @@
 import sys
 import subprocess
 from shlex import split
-from .utils import descriptions
+from .utils import descriptions, filterdata
 from shutil import move as movetree
 
 spinner = re.compile(r'[\\\b\-/|]{2,}')
@@ -49,13 +49,13 @@ def fixpath(src, root, *keys):
         )
     return src
 
-def processes():
+def processes(filters, sorting, page, count):
     """Returns a list of processes, and whether or not each process is running"""
     data = current_app.config['storage']['loader']()
     #Python comprehensions are great!
     # data['process-%d'%id]['returncode'] = process[1].returncode
     # data['process-%d'%id]['status'] = 1 if process[1].returncode == 0 else -1
-    return [
+    return filterdata([
          {
              'id':proc[0],
              'running':is_running(proc[1]),
@@ -108,7 +108,7 @@ def processes():
                 )),
                 range(data['processid']+1)
             ) if 'process-%d'%(proc[0]) in data
-    ]
+    ], filters, sorting, page, count)
 
 
 def process_info(id):

diff --git a/pvacseq/server/controllers/utils.py b/pvacseq/server/controllers/utils.py
@@ -15,6 +15,8 @@
 import webbrowser
 import threading
 from postgresql.exceptions import UndefinedTableError
+from math import ceil
+import operator
 
 class dataObj(dict):
     def __init__(self, datafiles, sync):
@@ -156,7 +158,7 @@ def initialize(current_app, args):
         print(
             "pid's of old pVAC-Seq runs with id's",
             data['processid'],
-            "and lower may be innacurate"
+            "and lower may be inaccurate"
         )
     current_app.config['storage']['children']={}
     current_app.config['storage']['manifest']={}
@@ -173,7 +175,7 @@ def initialize(current_app, args):
             quote(visapp_path)
         ),
         shell=True,
-        stdout=subprocess.DEVNULL,
+        stdout=subprocess.DEVNULL
     )
     print(
         "Visualization server started on PID",
@@ -578,3 +580,106 @@ def cleanup_frontend():
         # threading.Timer(2.5, lambda :webbrowser.open('http://localhost:8000')).start()
 
     print("Initialization complete.  Booting API")
+
+
+### filtering, sorting, and paging functions shared by multiple files ###
+queryfilters = re.compile(r'(.+)(<=?|>=?|!=|==)(.+)')
+
+ops = {
+    '<': operator.lt,
+    '<=': operator.le,
+    '==': operator.eq,
+    '!=': operator.ne,
+    '>=': operator.ge,
+    '>': operator.gt
+}
+
+# see if string is a number
+def is_number(string):
+    try:
+        float(string)
+        return True
+    except ValueError:
+        return False
+
+def cmp(arg1, op, arg2):
+    operation = ops.get(op)
+    return operation(arg1,arg2)
+
+def fullresponse(data, page, count):
+    if count == -1:
+        count = len(data)
+    return ({
+        "_meta": {
+            "current_page":page,
+            "per_page":count,
+            "total_pages":ceil(len(data)/count),
+            "total_count":len(data)
+        },
+        "result": data[(count*(page-1)):((count*page)) if (count*page)<len(data) else len(data)]
+    })
+
+def sort(data, sorting, page, count, columns):
+    if not len(sorting) or sorting[0]=="none":
+        return fullresponse(data, page, count)
+    i = len(sorting)-1
+    while i > -1:
+        col = sorting[i]
+        if not col.startswith('-') and not col.startswith('+'):
+            return ({
+                "code": 400,
+                "message": "Please indicate which direction you'd like to sort by by putting a + or - in front of the column name",
+                "fields": "sorting"
+            }, 400)
+        if col[1:] not in columns:
+            return ({
+                "code": 400,
+                "message": "Unknown column name %s" % col[1:],
+                "fields": "sorting"
+            }, 400)
+        data = sorted(data, key=operator.itemgetter(col[1:]), reverse=True if col.startswith('-') else False)
+        i-=1
+    return fullresponse(data, page, count)
+
+def filterdata(data, filters, sorting, page, count):
+    if not len(data):
+        return fullresponse(data, page, count)
+    columns = [name for name in data[0]]
+    if not len(filters) or filters[0]=="none":
+        return sort(data, sorting, page, count, columns)
+    filteredlist = []
+    for i in range(len(data)):
+        comparisons = []
+        for j in range(len(filters)):
+            f = filters[j].strip()
+            if not len(f):
+                continue
+            result = queryfilters.match(f)
+            if not result:
+                return ({
+                    "code":400,
+                    "message": "Encountered an invalid filter (%s)" % f,
+                    "fields": "filtering"
+                }, 400)
+            colname = result.group(1)
+            if colname not in columns:
+                return ({
+                    "code": 400,
+                    "message": "Unknown column name %s" % result.group(1),
+                    "fields": "filtering"
+                }, 400)
+            op = result.group(2)
+            val = result.group(3)
+            comp = data[i][colname]
+            if type(comp) == int:
+                val = int(val)
+            # see if string is actually a number for accurate number comparisons,
+            # avoiding string comparisons of numbers in cmp() function
+            elif is_number(comp):
+                data[i]
+                val = float(val)
+            if not cmp(comp, op, val):
+                break
+            if j == len(filters)-1:
+                filteredlist.append(data[i])
+    return sort(filteredlist, sorting, page, count, columns)