Skip to content

Commit

Permalink
TODOs added
Browse files Browse the repository at this point in the history
  • Loading branch information
lakshmanaram committed Dec 30, 2016
1 parent 1ce58c8 commit b458776
Show file tree
Hide file tree
Showing 4 changed files with 170 additions and 68 deletions.
53 changes: 51 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# cvscan
<!-- # cvscan
Your not so typical resume parser
Instructions
========
Expand Down Expand Up @@ -61,4 +61,53 @@ cvscan add -q "B.S,B.Tech,B.Arch"
### remove
```
cvscan remove --qual "B.Arch"
```
```
## Extra Information
### add
```
cvscan add -e "machine learning,artificial intelligence"
```
### remove
```
cvscan remove --extra "machine learning,artificial intelligence"
```
File Descriptions
============
## class Cvscan
```
cvscan = Cvscan(name,path)
```
#### Extract
Convert the input file to raw_text and calls parse class method
```
cvscan.extract()
```
#### Display extracted text
```
cvscan.show()
```
-->
### Attributes
| Attributes | Functions |
|---------------------|-----------|
|path | Stores the path of the resume |
|raw_text | Stores the resume as raw text |
|URLs | Stores all the URLs from the resume |
|name | Applicant's name |
|emails | Applicant's email |
|Phone number | Applicant's contact number |
|address | Applicant's address |
|experience | Applicant's experience in years |
|cleaned_resume | raw_text after removing english stopwords |
|skills | Applicant's skillset |
|qualifications | Applicant's qualifications |
|degree_info | info about qualification |
|

## configurations.py
Contains the regular expressions used throughout the project
## converter.py
Contains methods to convert resume from input format to raw text
#### pdf_to_text
Uses pdfminer library to fetch raw text from the resume. Special characters and bullets in the resume are replaced with a newline character.
This formatted text from the resume is returned.
4 changes: 2 additions & 2 deletions cvscan/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def extract(self):
if self.raw_text is not '':
self.parse()
else:
raise ValueError("Error parsing resume.")
raise ValueError("Error extracting resume text.")

def parse(self):
self.URLs = annotations_parser.fetch_pdf_urls(self.path)
Expand Down Expand Up @@ -70,4 +70,4 @@ def show(self):
"qualifications" : self.qualifications,
"qualifications_info" : self.degree_info,
"extra_info" : self.extra_info
}
}
68 changes: 34 additions & 34 deletions cvscan/details_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"""
def fetch_email(resume_text):
try:
regular_expression = re.compile(regex.email,re.IGNORECASE)
regular_expression = re.compile(regex.email, re.IGNORECASE)
emails = []
result = re.search(regular_expression, resume_text)
while result:
Expand All @@ -51,7 +51,7 @@ def fetch_email(resume_text):
"""
def fetch_phone(resume_text):
try:
regular_expression = re.compile(regex.get_phone(3,3,10),re.IGNORECASE)
regular_expression = re.compile(regex.get_phone(3, 3, 10), re.IGNORECASE)
result = re.search(regular_expression, resume_text)
phone = ''
if result:
Expand All @@ -60,9 +60,9 @@ def fetch_phone(resume_text):
if part:
phone += part
if phone is '':
for i in range(1,10):
for j in range(1,10-i):
regular_expression =re.compile(regex.get_phone(i,j,10),re.IGNORECASE)
for i in range(1, 10):
for j in range(1, 10-i):
regular_expression =re.compile(regex.get_phone(i, j, 10), re.IGNORECASE)
result = re.search(regular_expression, resume_text)
if result:
result = result.groups()
Expand All @@ -82,7 +82,7 @@ def fetch_phone(resume_text):
Utility function that fetches address in the resume.
Params: resume_text type: string
returns: address type:dictionary keys:district,state,pincode
returns: address type:dictionary keys:district, state, pincode
"""
def fetch_address(resume_text):
Expand All @@ -99,11 +99,11 @@ def fetch_address(resume_text):

with open(pincode_input_path, 'rb') as fp:
pincodes = pickle.load(fp)
with open(address_input_path,'rb') as fp:
with open(address_input_path, 'rb') as fp:
address = pickle.load(fp)

regular_expression = re.compile(regex.pincode)
regex_result = re.search(regular_expression,resume_text)
regex_result = re.search(regular_expression, resume_text)
while regex_result:
useful_resume_text = resume_text[:regex_result.start()].lower()
pincode_tuple = regex_result.group()
Expand All @@ -119,17 +119,17 @@ def fetch_address(resume_text):

result_address.clear()
resume_text = resume_text[regex_result.end():]
regex_result = re.search(regular_expression,resume_text)
regex_result = re.search(regular_expression, resume_text)

resume_text = initial_resume_text.lower()

with open(states_input,'rb') as fp:
with open(states_input, 'rb') as fp:
states = pickle.load(fp)
with open(district_state_input,'rb') as fp:
with open(district_state_input, 'rb') as fp:
district_states = pickle.load(fp)

# Check if the input is a separate word in resume_text
def if_separate_word(pos,word):
def if_separate_word(pos, word):
if (pos != 0) and resume_text[pos-1].isalpha():
return False
final_pos = pos+len(word)
Expand All @@ -143,12 +143,12 @@ def if_separate_word(pos,word):
district_pos = len(resume_text)
for state in states:
pos = resume_text.find(state)
if (pos != -1) and(pos < state_pos) and if_separate_word(pos,state):
if (pos != -1) and(pos < state_pos) and if_separate_word(pos, state):
state_pos = pos
result_state = state
for district in district_states.keys():
pos = resume_text.find(district)
if (pos != -1) and (pos < district_pos) and if_separate_word(pos,district):
if (pos != -1) and (pos < district_pos) and if_separate_word(pos, district):
district_pos = pos
result_district = district
if (result_state is '') and (result_district is not ''):
Expand All @@ -170,7 +170,7 @@ def if_separate_word(pos,word):
def calculate_experience(resume_text):
#
def get_month_index(month):
month_dict = {'jan':1,'feb':2,'mar':3,'apr':4,'may':5,'jun':6,'jul':7,'aug':8,'sep':9,'oct':10,'nov':11,'dec':12}
month_dict = {'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6, 'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12}
return month_dict[month.lower()]

try:
Expand All @@ -179,16 +179,16 @@ def get_month_index(month):
start_year = -1
end_month = -1
end_year = -1
regular_expression = re.compile(regex.date_range,re.IGNORECASE)
regular_expression = re.compile(regex.date_range, re.IGNORECASE)
regex_result = re.search(regular_expression, resume_text)
while regex_result:
date_range = regex_result.group()
year_regex = re.compile(regex.year)
year_result = re.search(year_regex,date_range)
year_result = re.search(year_regex, date_range)
if (start_year == -1) or (int(year_result.group()) <= start_year):
start_year = int(year_result.group())
month_regex = re.compile(regex.months_short,re.IGNORECASE)
month_result = re.search(month_regex,date_range)
month_regex = re.compile(regex.months_short, re.IGNORECASE)
month_result = re.search(month_regex, date_range)
if month_result:
current_month = get_month_index(month_result.group())
if (start_month == -1) or (current_month < start_month):
Expand All @@ -197,11 +197,11 @@ def get_month_index(month):
end_month = date.today().month # current month
end_year = date.today().year # current year
else:
year_result = re.search(year_regex,date_range[year_result.end():])
year_result = re.search(year_regex, date_range[year_result.end():])
if (end_year == -1) or (int(year_result.group()) >= end_year):
end_year = int(year_result.group())
month_regex = re.compile(regex.months_short,re.IGNORECASE)
month_result = re.search(month_regex,date_range)
month_regex = re.compile(regex.months_short, re.IGNORECASE)
month_result = re.search(month_regex, date_range)
if month_result:
current_month = get_month_index(month_result.group())
if (end_month == -1) or (current_month > end_month):
Expand Down Expand Up @@ -231,12 +231,12 @@ def fetch_jobs(cleaned_resume):
positions = []
for job in jobs.keys():
job_regex = r'[^a-zA-Z]'+job+r'[^a-zA-Z]'
regular_expression = re.compile(job_regex,re.IGNORECASE)
regex_result = re.search(regular_expression,cleaned_resume)
regular_expression = re.compile(job_regex, re.IGNORECASE)
regex_result = re.search(regular_expression, cleaned_resume)
if regex_result:
positions.append(regex_result.start())
job_positions.append(job.capitalize())
job_positions = [job for (pos,job) in sorted(zip(positions,job_positions))]
job_positions = [job for (pos, job) in sorted(zip(positions, job_positions))]

# For finding the most frequent job category
hash_jobs = {}
Expand All @@ -252,7 +252,7 @@ def fetch_jobs(cleaned_resume):
hash_jobs['Student'] = 0
hash_jobs['Other'] = -1

return (job_positions,max(hash_jobs,key=hash_jobs.get).capitalize())
return (job_positions, max(hash_jobs, key=hash_jobs.get).capitalize())


"""
Expand All @@ -263,7 +263,7 @@ def fetch_jobs(cleaned_resume):
"""
def fetch_skills(cleaned_resume):
with open(dirpath.PKGPATH + '/data/skills/skills','rb') as fp:
with open(dirpath.PKGPATH + '/data/skills/skills', 'rb') as fp:
skills = pickle.load(fp)

skill_set = []
Expand Down Expand Up @@ -292,17 +292,17 @@ def fetch_qualifications(resume_text):
info = []
for qualification in qualifications:
qual_regex = r'[^a-zA-Z]'+qualification+r'[^a-zA-Z]'
regular_expression = re.compile(qual_regex,re.IGNORECASE)
regex_result = re.search(regular_expression,resume_text)
regular_expression = re.compile(qual_regex, re.IGNORECASE)
regex_result = re.search(regular_expression, resume_text)
while regex_result:
degree.append(qualification)
resume_text = resume_text[regex_result.end():]
lines = [line.rstrip().lstrip()
for line in resume_text.split('\n') if line.rstrip().lstrip()]
if lines:
info.append(lines[0])
regex_result = re.search(regular_expression,resume_text)
return degree,info
regex_result = re.search(regular_expression, resume_text)
return degree, info


"""
Expand All @@ -319,10 +319,10 @@ def fetch_extra(resume_text):
extra_information = []
for info in extra:
extra_regex = r'[^a-zA-Z]'+info+r'[^a-zA-Z]'
regular_expression = re.compile(extra_regex,re.IGNORECASE)
regex_result = re.search(regular_expression,resume_text)
regular_expression = re.compile(extra_regex, re.IGNORECASE)
regex_result = re.search(regular_expression, resume_text)
while regex_result:
extra_information.append(info)
resume_text = resume_text[regex_result.end():]
regex_result = re.search(regular_expression,resume_text)
regex_result = re.search(regular_expression, resume_text)
return extra_information
Loading

0 comments on commit b458776

Please sign in to comment.