-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
e8642ae
commit c09b89d
Showing
31 changed files
with
2,298 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
*.npy | ||
*.png | ||
*.jpg | ||
*.jpeg | ||
*.json | ||
|
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"This code converts from `.csv` files with matching `filenames.txt` and re-orders them and saves to `.npy` files in canonical order." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import numpy as np\n", | ||
"import pandas as pd\n", | ||
"import os\n", | ||
"\n", | ||
"def get_id(fn):\n", | ||
" return os.path.splitext(os.path.basename(fn))[0]\n", | ||
"\n", | ||
"def read_csv(fn):\n", | ||
" return pd.read_csv(fn, header=None).as_matrix()\n", | ||
"\n", | ||
"def csv_to_canonical_npy(canonical_fn, filenames_fn, csv_fn, npy_fn):\n", | ||
" canonical_filenames = open(canonical_fn).read().splitlines()\n", | ||
" data_filenames = open(filenames_fn).read().splitlines()\n", | ||
" data = read_csv(csv_fn)\n", | ||
" canonical_ids = [get_id(e) for e in canonical_filenames]\n", | ||
" data_ids = [get_id(e) for e in data_filenames]\n", | ||
" data_index = {key:i for i,key in enumerate(data_ids)}\n", | ||
" data_argsort = [data_index[e] for e in canonical_ids]\n", | ||
" data_canonical = data[data_argsort]\n", | ||
" np.save(npy_fn, data_canonical)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"canonical_filenames_fn = '../data/analysis/canonical_filename_order.txt'\n", | ||
"inception_filenames_fn = '../data/dcnn/inceptionv3/filenames.txt'\n", | ||
"vgg_filenames_fn = '../data/dcnn/vgg/filenames.txt'\n", | ||
"\n", | ||
"csv_to_canonical_npy(canonical_filenames_fn, inception_filenames_fn,\n", | ||
" '../data/dcnn/inceptionv3/predictions.csv',\n", | ||
" '../data/dcnn/inceptionv3/predictions_canonical.npy')\n", | ||
"\n", | ||
"csv_to_canonical_npy(canonical_filenames_fn, inception_filenames_fn,\n", | ||
" '../data/dcnn/inceptionv3/features.csv',\n", | ||
" '../data/dcnn/inceptionv3/features_canonical.npy')\n", | ||
"\n", | ||
"csv_to_canonical_npy(canonical_filenames_fn, vgg_filenames_fn,\n", | ||
" '../data/dcnn/vgg/features.csv',\n", | ||
" '../data/dcnn/vgg/features_canonical.npy')" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"This code defines the canonical order for all the files.\n", | ||
"\n", | ||
"It ingests:\n", | ||
"\n", | ||
"- A folder of all the images\n", | ||
"\n", | ||
"And outputs:\n", | ||
"\n", | ||
"- `analysis/filename_order.txt` with lines that look like `Box_014/445.png`\n", | ||
"- `analysis/filename_order_box.txt` with lines that look like `Box_014`\n", | ||
"- `analysis/filename_order_id.txt` with lines that look like `445`" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 18, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from utils.list_all_files import *\n", | ||
"import numpy as np\n", | ||
"import re\n", | ||
"\n", | ||
"input_dir = '../data/photos/png1600'\n", | ||
"output_dir = '../data/analysis'\n", | ||
"\n", | ||
"def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):\n", | ||
" return [int(text) if text.isdigit() else text.lower()\n", | ||
" for text in _nsre.split(s)]\n", | ||
"\n", | ||
"def get_box(fn):\n", | ||
" return os.path.split(fn)[0]\n", | ||
"\n", | ||
"def get_id(fn):\n", | ||
" return os.path.splitext(os.path.basename(fn))[0]\n", | ||
"\n", | ||
"filenames = list(list_all_files(input_dir))\n", | ||
"filenames = [os.path.relpath(e, input_dir) for e in filenames]\n", | ||
"filenames.sort(key=lambda fn: natural_sort_key(get_id(fn)))\n", | ||
"\n", | ||
"np.savetxt(os.path.join(output_dir, 'filename_order.txt'), filenames, fmt='%s')\n", | ||
"np.savetxt(os.path.join(output_dir, 'filename_order_box.txt'), list(map(get_box, filenames)), fmt='%s')\n", | ||
"np.savetxt(os.path.join(output_dir, 'filename_order_id.txt'), list(map(get_id, filenames)), fmt='%s')" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,198 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"This code ingests:\n", | ||
"\n", | ||
"- A folder of `.json` files generated by OpenFace.\n", | ||
"- A folder of images in `Teenie_Harris_PNG1600`\n", | ||
"\n", | ||
"And outputs:\n", | ||
"\n", | ||
"- `images.npy` the cropped imags\n", | ||
"- `indices.npy` the index of face within the photo\n", | ||
"- `descriptors.npy` the OpenFace descriptor for the face\n", | ||
"- `filenames.csv` the filename the face was taken from\n", | ||
"\n", | ||
"Each file has the same number of rows." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"import re\n", | ||
"import json\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"from utils.imutil import *\n", | ||
"from utils.list_all_files import *\n", | ||
"from utils.crop import *\n", | ||
"from utils.progress import *\n", | ||
"from utils.mosaic import *\n", | ||
"from utils.draw_shapes import *" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"input_dir = '../data/openface/json/'\n", | ||
"output_dir = '../data/openface/npy32/'\n", | ||
"output_side = 32\n", | ||
"output_dtype = np.uint8 # the png1600 images are uint16 for some reason" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"59278 0:03:25 288.77/s\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"def natural_sort_key(s, _nsre=re.compile('([0-9]+)')):\n", | ||
" return [int(text) if text.isdigit() else text.lower()\n", | ||
" for text in _nsre.split(s)]\n", | ||
"\n", | ||
"def get_id(fn):\n", | ||
" return os.path.splitext(os.path.basename(fn))[0]\n", | ||
"\n", | ||
"tasks = []\n", | ||
"for input_fn in list_all_files(input_dir):\n", | ||
" tasks.append(input_fn)\n", | ||
"\n", | ||
"tasks.sort(key=lambda x: natural_sort_key(get_id(x)))\n", | ||
"\n", | ||
"def job(task):\n", | ||
" try:\n", | ||
" metadata = json.load(open(task))\n", | ||
" out = []\n", | ||
" img = imread(fn)\n", | ||
" for i, face in enumerate(metadata['faces']):\n", | ||
" w,n,e,s = face['box']\n", | ||
" # force it square\n", | ||
" rows = s - n\n", | ||
" cols = e - w\n", | ||
" side = min(rows, cols)\n", | ||
" s = n + side\n", | ||
" e = w + side\n", | ||
" face_img = safe_crop(img, n, s, w, e, fill=0)\n", | ||
" face_img = imresize(face_img, max_side=output_side)\n", | ||
" if output_dtype is not None:\n", | ||
" face_img = face_img.astype(output_dtype)\n", | ||
" face_rep = np.asarray(face['rep'])\n", | ||
" out.append((task, i, face_img, face_rep))\n", | ||
" return out\n", | ||
" except:\n", | ||
" print(task, metadata['path'], face['box'])\n", | ||
" raise\n", | ||
"\n", | ||
"results = progress_parallel(job, tasks)\n", | ||
"results = [item for sublist in results for item in sublist] # flatten results" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"input_filenames, face_indices, face_images, face_descriptors = list(zip(*results))\n", | ||
"face_images = np.asarray(face_images)\n", | ||
"face_descriptors = np.asarray(face_descriptors)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 5, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"os.makedirs(output_dir, exist_ok=True)\n", | ||
"np.save(os.path.join(output_dir, 'images.npy'), face_images)\n", | ||
"np.save(os.path.join(output_dir, 'indices.npy'), face_indices)\n", | ||
"np.save(os.path.join(output_dir, 'descriptors.npy'), face_descriptors)\n", | ||
"np.savetxt(os.path.join(output_dir, 'filenames.csv'), input_filenames, fmt='%s')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 15, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# imshow(make_mosaic(face_images[:495*495]), fmt='jpg')" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 50, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"image/png": "\n", | ||
"text/plain": [ | ||
"<Figure size 432x288 with 1 Axes>" | ||
] | ||
}, | ||
"metadata": {}, | ||
"output_type": "display_data" | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"0.88043982329\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"plt.hist(results, bins=100)\n", | ||
"plt.show()\n", | ||
"results.sort()\n", | ||
"\n", | ||
"# plt.plot(results)\n", | ||
"# plt.yscale('log')\n", | ||
"# plt.show()\n", | ||
"\n", | ||
"# 88% of faces are smaller than 128 pixels\n", | ||
"print(np.sum(np.asarray(results) < output_side) / len(results))" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.6.8" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.