Skip to content

Commit

Permalink
- added function to remove attributes according to a blacklist but ke…
Browse files Browse the repository at this point in the history
…ep attributes according to a whitelist
  • Loading branch information
Egon2k committed Aug 19, 2018
1 parent fe0221d commit 747f293
Showing 1 changed file with 28 additions and 5 deletions.
33 changes: 28 additions & 5 deletions PolarionParser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import xml.etree.ElementTree as ET
import re
import os
import os.path
from bs4 import BeautifulSoup

# globals
Expand All @@ -10,6 +9,9 @@

REMOVE_ATTRIBUTES = [
'style','font','size','color']

EXCLUDE_FROM_ATTRIBUTE_REMOVAL = [
'table','tbody','tr','th','td']


def analyseFolderStruct():
Expand Down Expand Up @@ -80,8 +82,9 @@ def getDescriptionFromWorkitem(id):
def removeDefinedAttributes(soup):
##https://stackoverflow.com/a/39976027
for tag in soup.recursiveChildGenerator():
if hasattr(tag, 'attrs'):
tag.attrs = {key:value for key,value in tag.attrs.iteritems() if key not in REMOVE_ATTRIBUTES}
if tag.name not in EXCLUDE_FROM_ATTRIBUTE_REMOVAL:
if hasattr(tag, 'attrs'):
tag.attrs = {key:value for key,value in tag.attrs.iteritems() if key not in REMOVE_ATTRIBUTES}
return soup

########################################################################################
Expand Down Expand Up @@ -147,11 +150,31 @@ def removeDefinedAttributes(soup):
description = getDescriptionFromWorkitem(id)

subsoup = BeautifulSoup(description, 'html.parser')
removeDefinedAttributes(subsoup)
#removeDefinedAttributes(subsoup)

#for subtag in subsoup:
# if subtag.name == "span":
# print "span found"
# if hasattr(subtag, 'attrs'):
# for subtag.attrs in subtag.attrs:
# if subtag.attrs == "class":
# print type(subtag.attrs)
# print subtag.attrs
# exit()

#print subsoup.find_all('span')
for span in subsoup.find_all('span'):
if span.get('data-item-id'):
# https://www.crummy.com/software/BeautifulSoup/bs4/doc/#replace-with
new_tag = soup.new_tag("font color=\"blue\"")
new_tag.string = span.get('data-item-id') + " - " + getTitleFromWorkitem(span.get('data-item-id'))
#print span.get('data-item-id')
span.replace_with(new_tag)


## https://stackoverflow.com/questions/17136127/calling-a-function-on-captured-group-in-re-sub
#description = re.sub(r'<span class="polarion-rte-link" data-type="workItem" id="fake" data-item-id="([a-zA-Z]*-\d{1,6})" data-option-id="long"></span>', r'\1' + " - WI Title" , description)
description = re.sub(r'<span class="polarion-rte-link" data-type="workItem" id="fake" data-item-id="([a-zA-Z]*-\d{1,6})" data-option-id="long"></span>', getIdAndTitleFromRegex , description)
#description = re.sub(r'<span class="polarion-rte-link" data-type="workItem" id="fake" data-item-id="([a-zA-Z]*-\d{1,6})" data-option-id="long"></span>', getIdAndTitleFromRegex , subsoup.encode('utf-8'))

f.write("<b>")
f.write(id + " " + getTitleFromWorkitem(id))
Expand Down

0 comments on commit 747f293

Please sign in to comment.