Skip to content

Commit

Permalink
Testing/Recipes: add matched ciqual
Browse files Browse the repository at this point in the history
  • Loading branch information
Guillaume Hélouis committed Jul 1, 2015
1 parent ac4438d commit 05a8d4f
Show file tree
Hide file tree
Showing 9 changed files with 1,609 additions and 12 deletions.
1,497 changes: 1,497 additions & 0 deletions testing/CIQUAL2013-Donneescsv.csv

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions testing/management/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__author__ = 'kevin'
1 change: 1 addition & 0 deletions testing/management/commands/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
__author__ = 'kevin'
38 changes: 38 additions & 0 deletions testing/management/commands/get_ingreds.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from django.core.management.base import BaseCommand
from testing.models import Ingredient, Nutriment
import time

class Command(BaseCommand):
"""
Fill database with ingredients from the Ciqual ANSES table
"""
help = ''

def handle(self, *args, **options):
csv = "testing/CIQUAL2013-Donneescsv.csv"
print("Parsing %s and filling the database..." % csv)
start_time = time.time()

separator = ";"
with open(csv, 'r', encoding="ISO-8859-1") as f:

# Nutriments
fields = f.readline().rstrip("\n").split(separator)
for i in range(4, 60):
name = fields[i] if fields[i] and fields[i] != "-" else None
if name:
name = name.split(" ")[1:-1] # remove leading number and trailing unit
nutriment = Nutriment(name=name)
nutriment.save()

for n, line in enumerate(f.readlines()):
print("%s ingredients processed" % n, end='\r')
fields = line.rstrip("\n").split(separator)

# Ingredients
name = fields[3] if fields[3] and fields[3] != "-" else None
category = fields[1] if fields[1] and fields[1] != "-" else None
ingredient = Ingredient(name=name, category=category)
ingredient.save()

print('Done in %s seconds.' % (time.time() - start_time))
8 changes: 8 additions & 0 deletions testing/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
from django.db import models

class Ingredient(models.Model):
name = models.CharField(max_length=256)
category = models.CharField(max_length=512)
nutriments = models.ManyToManyField('Nutriment')

class Nutriment(models.Model):
name = models.CharField(max_length=256)

class Comment(models.Model):
url = models.URLField()
text = models.TextField()
54 changes: 48 additions & 6 deletions testing/recipe_engine/db_link.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from menus.models import Recipe, Ingredient
from Levenshtein import distance
import testing.models

def get_closest(parsed_ingredient, matching):
"""
Expand All @@ -13,10 +14,29 @@ def get_closest(parsed_ingredient, matching):
return scores[sorted(scores.keys())[0]]


def get_matching_ingredient(parsed_ingredient):
matching = Ingredient.objects.filter(name=parsed_ingredient)
def get_matching_ingredient(parsed_ingredient, ciqual=False):
if not parsed_ingredient:
return None
if ciqual:
matching = testing.models.Ingredient.objects.filter(name=parsed_ingredient)
else:
matching = Ingredient.objects.filter(name=parsed_ingredient)
if not matching and parsed_ingredient.endswith('s'):
if ciqual:
matching = testing.models.Ingredient.objects.filter(name=parsed_ingredient[:-1])
else:
matching = Ingredient.objects.filter(name=parsed_ingredient[:-1])
if not matching:
matching = Ingredient.objects.filter(name__icontains=parsed_ingredient)
if not matching and parsed_ingredient.endswith('s'):
if ciqual:
matching = testing.models.Ingredient.objects.filter(name__icontains=parsed_ingredient[:-1])
else:
matching = Ingredient.objects.filter(name__icontains=parsed_ingredient[:-1])
if not matching:
if ciqual:
matching = testing.models.Ingredient.objects.filter(name__icontains=parsed_ingredient)
else:
matching = Ingredient.objects.filter(name__icontains=parsed_ingredient)
return get_closest(parsed_ingredient, matching) if matching else None

def get_ease(parsed_ease):
Expand Down Expand Up @@ -49,7 +69,7 @@ def get_matching_ingredients(parsed_ingredients, recipe=None):
# above steps with every word in the string from longest to shortest
matched_ingredients = {}
for i in parsed_ingredients:
if not i:
if not i or not i.name:
continue
ingredient = get_matching_ingredient(i.name)
if not ingredient:
Expand All @@ -63,10 +83,32 @@ def get_matching_ingredients(parsed_ingredients, recipe=None):
recipe.ingredients.add(ingredient)
matched_ingredients[i.name] = ingredient.name
else:
matched_ingredients[i.name] = "<No match found>"
matched_ingredients[i.name] = "<None>"

return matched_ingredients

def get_matching_ciqual_ingredients(parsed_ingredients):
matched_ingredients = {}
for i in parsed_ingredients:
if not i:
continue
ingredient = get_matching_ingredient(i.name, True)
if not ingredient:
for word in reversed(sorted(i.name.split(" "), key=len)):
ingredient = get_matching_ingredient(word, True)
if ingredient:
break

if ingredient:
matched_ingredients[i.name] = ingredient.name
else:
matched_ingredients[i.name] = "<None>"

return matched_ingredients

def link_ingredients(recipe, ingredients):
get_matching_ingredients(ingredients, recipe)

def save_recipe(recipe):
"""
Save the recipe into the database
Expand All @@ -92,5 +134,5 @@ def save_recipe(recipe):
r.origin_url = recipe.url
r.save()

get_matching_ingredients(recipe.ingredients, r)
link_ingredients(r, recipe.ingredients)

3 changes: 3 additions & 0 deletions testing/recipe_engine/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,3 +115,6 @@ def __init__(self, s):

if self.name == self.unit:
self.unit = None

# step 5: clean
self.name = self.name.replace("d'", "")
9 changes: 6 additions & 3 deletions testing/templates/recipes.html
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<nav class="navbar navbar-default">
<div class="container-fluid">
<div class="navbar-header">
<a class="navbar-brand" href="{% url 'testing.views.index' %}">Testing: Recipes</a>
<a class="navbar-brand" href="{% url 'testing.views.index' %}">Testing/Recipes</a>
</div>
</div>
</nav>
Expand Down Expand Up @@ -120,12 +120,15 @@
<tbody>
<tr>
<th>Parsed</th>
<th>Matched</th>
<th>Matched OpenFoodFacts</th>
<th>Matched Ciqual</th>
</tr>
{% for parsed, matched in matched_ingredients.items %}
<tr>
<td>{{ parsed }}</td>
<td>{{ matched }}</td>
{% for m in matched %}
<td>{{ m }}</td>
{% endfor %}
</tr>
{% endfor %}
</tbody>
Expand Down
10 changes: 7 additions & 3 deletions testing/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from testing.models import Comment
from testing.recipe_engine.scraper import random_recipe, Recipe
from testing.recipe_engine.db_link import get_matching_ingredients, save_recipe
from testing.recipe_engine.db_link import get_matching_ingredients, get_matching_ciqual_ingredients, save_recipe

def index(request):
return render(request, 'index.html', {})
Expand All @@ -14,8 +14,12 @@ def recipes(request):
save_recipe(recipe)
else:
recipe = random_recipe()
recipe.save_screenshot()
matched_ingredients = get_matching_ingredients(recipe.ingredients)
#recipe.save_screenshot()
matched_off = get_matching_ingredients(recipe.ingredients)
matched_ciqual = get_matching_ciqual_ingredients(recipe.ingredients)
matched_ingredients = {}
for parsed, matched in matched_off.items():
matched_ingredients[parsed] = (matched_off[parsed], matched_ciqual[parsed])

com_saved = False
if 'new_comment' in request.POST and request.POST['new_comment']:
Expand Down

0 comments on commit 05a8d4f

Please sign in to comment.