docstring on dataset related

gokulsg · Aug 13, 2018 · f79a369 · f79a369
1 parent 13240c3
commit f79a369
Show file tree

Hide file tree

Showing 6 changed files with 390 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -11,12 +11,13 @@ Using python 3.5, core libraries are :
 - tensorflow
 - keras
 - opencv
+- mxnet
 
 ## Datasets
 Datasets are saved in data/ directory
 - [IMDB-Wiki](https://data.vision.ee.ethz.ch/cvl/rrothe/imdb-wiki/)
 - [Adience](https://talhassner.github.io/home/projects/Adience/Adience-data.html)
-- [UTKFace](ttps://susanqq.github.io/UTKFace/)
+- [UTKFace](https://susanqq.github.io/UTKFace/)
 - [FGNET](http://yanweifu.github.io/FG_NET_data/index.html)
 
 ## Preprocess
@@ -32,10 +33,10 @@ Datasets are saved in data/ directory
 
 
 ## References and Acknowledgments
-This project is part of my internship program at [Nodeflux](nodeflux.io) as data scientist from July - August, 2018
+This project is part of my internship program at [Nodeflux](https://nodeflux.io/) as data scientist from July - August, 2018
 1. [Rothe R, Timofte R, Van Gool L. Dex: Deep expectation of apparent age from a single image[C]//Proceedings of the IEEE International Conference on Computer Vision Workshops. 2015: 10-15.](https://www.vision.ee.ethz.ch/en/publications/papers/proceedings/eth_biwi_01229.pdf)
-1. [Rothe R, Timofte R, Van Gool L. Deep expectation of real and apparent age from a single image without facial landmarks[J]. International Journal of Computer Vision, 2016: 1-14.](https://www.vision.ee.ethz.ch/en/publications/papers/articles/eth_biwi_01299.pdf)
-1. [[IJCAI18] SSR-Net: A Compact Soft Stagewise Regression Network for Age Estimation](https://github.com/shamangary/SSR-Net)
-1. [yu4u/age-gender-estimation Keras implementation of a CNN network for age and gender estimation](https://github.com/yu4u/age-gender-estimation)
-1. [deepinsight/insightface Face Recognition Project on MXNet](https://github.com/deepinsight/insightface)
-1. [abewley/sort Simple, online, and realtime tracking of multiple objects in a video sequence](https://github.com/abewley/sort)
+2. [Rothe R, Timofte R, Van Gool L. Deep expectation of real and apparent age from a single image without facial landmarks[J]. International Journal of Computer Vision, 2016: 1-14.](https://www.vision.ee.ethz.ch/en/publications/papers/articles/eth_biwi_01299.pdf)
+3. [[IJCAI18] SSR-Net: A Compact Soft Stagewise Regression Network for Age Estimation](https://github.com/shamangary/SSR-Net)
+4. [yu4u/age-gender-estimation Keras implementation of a CNN network for age and gender estimation](https://github.com/yu4u/age-gender-estimation)
+5. [deepinsight/insightface Face Recognition Project on MXNet](https://github.com/deepinsight/insightface)
+6. [abewley/sort Simple, online, and realtime tracking of multiple objects in a video sequence](https://github.com/abewley/sort)
diff --git a/data/README.md b/data/README.md
@@ -1 +1,30 @@
-Place the datasets here
+# Data directory
+
+**Place the datasets here**
+
+## Folder naming and content
+- Save dataset in separate folder, preserve their original directory structure
+- For Adience dataset, provide 2 folder, each for images and fold.txt file
+- Suggested folder structure
+  ```bash
+  data/
+    ├── adience
+    ├── adience_fold
+    ├── fgnet
+    ├── imdb
+    ├── utkface
+    └── wiki
+  ```
+
+## Generating .csv db file
+Use make_db.py
+```bash
+usage: make_db.py [-h] --db_name {imdb,wiki,utkface,fgnet,adience} --path PATH
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --db_name {imdb,wiki,utkface,fgnet,adience}
+                        Dataset name
+  --path PATH           Path to dataset folder
+```
+for Adience, use path to folder contain Adience fold.txt files
diff --git a/data/db/README.md b/data/db/README.md
@@ -0,0 +1 @@
+All .csv db file will be saved here
diff --git a/data/make_db.py b/data/make_db.py
@@ -1,4 +1,169 @@
 import argparse
 import os
 import numpy as np
-from scipy.io import loadmat
+import utils
+import pandas as pd
+import re
+from glob import glob
+parser = argparse.ArgumentParser()
+parser.add_argument('--db_name',
+                    required=True,
+                    choices=['imdb', 'wiki', 'utkface', 'fgnet', 'adience'],
+                    help='Dataset name')
+parser.add_argument('--path',
+                    required=True,
+                    help="Path to dataset folder")
+
+
+def make_from_imdb(path):
+    """Create .csv file as db from IMDB dataset
+    Parameters
+    ----------
+    path : string
+        Path to IMDB dataset folder
+    """
+
+    data = utils.load_data('imdb', path+'/imdb.mat')
+    data = utils.clean_data(data)
+    data['db_name'] = path
+    data.to_csv('db/imdb.csv',  columns=['db_name', 'full_path', 'age', 'gender'], index=False)
+
+
+def make_from_wiki(path):
+    """Create .csv file as db from Wiki dataset
+    Parameters
+    ----------
+    path : string
+        Path to Wiki dataset folder
+    """
+
+    data = utils.load_data('wiki', path+'/wiki.mat')
+    data = utils.clean_data(data)
+    data['db_name'] = path
+    data.to_csv('db/wiki.csv',  columns=['db_name', 'full_path', 'age', 'gender'], index=False)
+
+
+def make_from_utkface(path):
+    """Create .csv file as db from UTKface dataset
+    Parameters
+    ----------
+    path : string
+        Path to UTKface dataset folder
+    """
+
+    image_list = []
+    for i in range(1, 4):
+        image_list.extend(glob(os.path.join(path, 'part{}/*.jpg'.format(i))))
+
+    result = dict()
+    age = [int(im.split('/')[-1].split('_')[0]) for im in (image_list)]
+    gender = [im.split('/')[-1].split('_')[1] for im in (image_list)]
+    result['full_path'] = image_list
+    result['age'] = age
+    result['gender'] = gender
+
+    result = pd.DataFrame.from_dict(result)
+    result = result.loc[(result['gender'] != '3') & (result['gender'] != '')]
+    result['gender'] = result['gender'].astype('int8') ^ 1
+
+    def removedb(row):
+        res = row.split('/')[1:]
+        return '/'.join(res)
+
+    result['full_path'] = result['full_path'].map(removedb)
+    result['db_name'] = path
+    result = result[['db_name', 'full_path', 'age', 'gender']]
+    result.to_csv('db/UTKface.csv', index=False)
+
+
+def make_from_fgnet(path):
+    """Create .csv file as db from FGNET dataset
+    Parameters
+    ----------
+    path : string
+        Path to FGNET dataset folder
+    """
+
+    path = path + '/images/*.JPG'
+    paths = glob(path)
+    data = pd.DataFrame()
+    data['db_name'] = path
+    data['full_path'] = paths
+    p = re.compile('[0-9]+')
+
+    def get_age(row):
+        flname = row.split('/')[-1]
+        age = flname.split('.')[0].split('A')[-1]
+        age = p.match(age).group()
+        return int(age)
+    data['age'] = data['full_path'].map(get_age)
+
+    def clean_path(row):
+        return row.split('/')[-1]
+    data['full_path'] = data['full_path'].map(clean_path)
+    data.to_csv('db/FGNET.csv', columns=['db_name', 'full_path', 'age', 'gender'], index=False)
+
+
+def make_from_adience(path):
+    """Create .csv file as db from Adience dataset
+    Parameters
+    ----------
+    path : string
+        Path to Adience dataset folder
+    """
+
+    fold_files = glob(path + "/*.txt")
+    data = pd.read_csv(fold_files[0], sep='\t')
+    for file in fold_files[1:]:
+        temp = pd.read_csv(file, sep='\t')
+        data = pd.concat([data, temp])
+    data['full_path'] = data['user_id'] + '/coarse_tilt_aligned_face.' + \
+        data['face_id'].astype('str') + '.' + data['original_image']
+
+    def rnd(low, high):
+        return np.random.randint(low, high + 1)
+
+    def makeAge(age):
+        if age == '(0, 2)':
+            return rnd(0, 2)
+        elif age == '(4, 6)':
+            return rnd(4, 6)
+        elif age in ['(8, 12)', '(8, 23)']:
+            return rnd(8, 12)
+        elif age == '(15, 20)':
+            return rnd(15, 20)
+        elif age in ['(25, 32)', '(27, 32)']:
+            return rnd(25, 32)
+        elif age in ['(38, 43)', '(38, 48)', '(38, 42)']:
+            return rnd(38, 43)
+        elif age == '(48, 53)':
+            return rnd(48, 53)
+        elif age == '(60, 100)':
+            return rnd(60, 100)
+        elif age == 'None':
+            return np.nan
+        else:
+            return int(age)
+    data['age'] = data['age'].map(makeAge)
+    data['db_name'] = 'adience'
+    data = data.loc[(~data['age'].isnull()) & ((data['gender'] == 'f') | (
+        data['gender'] == 'm')), ['db_name', 'full_path', 'umur', 'gender']]
+    gender = {'m': 1, 'f': 0}
+    data['gender'] = data['gender'].map(gender)
+    data.to_csv('db/adience.csv', index=False)
+
+
+def main():
+    args = parser.parse_args()
+    DB = args.db_name
+    PATH = args.path
+    command = {'imdb': make_from_imdb,
+               'wiki': make_from_wiki,
+               'utkface': make_from_utkface,
+               'fgnet': make_from_fgnet,
+               'adience': make_from_adience}
+    command[DB](PATH)
+
+
+if __name__ == '__main__':
+    main()