Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add include/exclude regex pattern to filter search result #24

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ Before using this searcher, you must first install the following external module
* `requests` (HTTP requests)
* `bs4` (BeautifulSoup)

or just use the `requirements.txt` through CLI: `pip install -r requirements.txt`

### Telegram configuration
To have to bot send you updates on Telegram, follow these steps:
1) Create a bot by writing to the BotFather on Telegram
Expand All @@ -33,7 +35,7 @@ Write `python3 subito-searcher.py --help` to see all the command line arguments.
Here is a cheatsheet of the most common usages:

* Add a new query with name "Auto":
`python3 subito-searcher.py --add Auto --url https://www.subito.it/annunci-italia/vendita/usato/?q=auto [--minPrice 50] [--maxPrice 100]`
`python3 subito-searcher.py --add Auto --url https://www.subito.it/annunci-italia/vendita/usato/?q=auto [--minPrice 50] [--maxPrice 100] [--include "REGEX_PATTERNS"] [--exclude "REGEX_PATTERNS"]`
(keep in mind that you *always* use `--add` and `--url` together, min and max prices are optional)

* Remove the query "Auto":
Expand Down
2 changes: 2 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
beautifulsoup4==4.11.2
requests==2.28.2
65 changes: 39 additions & 26 deletions subito-searcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
parser.set_defaults(win_notifyoff=False)
parser.add_argument('--addtoken', dest='token', help="telegram setup: add bot API token")
parser.add_argument('--addchatid', dest='chatid', help="telegram setup: add bot chat id")
parser.add_argument('-i', '--include', dest='include', action='append', default=[], help="regex search in title for including item")
parser.add_argument('-e', '--exclude', dest='exclude', action='append', default=[], help="regex search in title for excluding item")

args = parser.parse_args()

Expand Down Expand Up @@ -72,13 +74,13 @@ def print_queries():
for search in queries.items():
print("\nsearch: ", search[0])
for query_url in search[1]:
print("query url:", query_url)
for url in search[1].items():
for minP in url[1].items():
for maxP in minP[1].items():
for result in maxP[1].items():
print("\n", result[1].get('title'), ":", result[1].get('price'), "-->", result[1].get('location'))
print(" ", result[0])
url = list(search[1].items())[0]
print("\nquery url:", url[0])
for minP in url[1].items():
for maxP in minP[1].items():
for result in maxP[1].items():
print("\n", result[1].get('title'), ":", result[1].get('price'), "-->", result[1].get('location'))
print(" ", result[0])


# printing a compact list of trackings
Expand All @@ -87,28 +89,28 @@ def print_sitrep():
i = 1
for search in queries.items():
print('\n{}) search: {}'.format(i, search[0]))
for query_url in search[1].items():
for minP in query_url[1].items():
for maxP in minP[1].items():
print("query url:", query_url[0], " ", end='')
if minP[0] !="null":
print(minP[0],"<", end='')
if minP[0] !="null" or maxP[0] !="null":
print(" price ", end='')
if maxP[0] !="null":
print("<", maxP[0], end='')
print("\n")
url = list(search[1].items())[0]
for minP in url[1].items():
for maxP in minP[1].items():
print("query url:", url[0], " ", end='')
if minP[0] !="null":
print(minP[0],"<", end='')
if minP[0] !="null" or maxP[0] !="null":
print(" price ", end='')
if maxP[0] !="null":
print("<", maxP[0], end='')
print("\n")

i+=1

def refresh(notify):
global queries
try:
for search in queries.items():
for url in search[1].items():
for minP in url[1].items():
for maxP in minP[1].items():
run_query(url[0], search[0], notify, minP[0], maxP[0])
url = list(search[1].items())[0]
for minP in url[1].items():
for maxP in minP[1].items():
run_query(url[0], search[0], notify, minP[0], maxP[0], search[1]['include'], search[1]['exclude'])
except requests.exceptions.ConnectionError:
print("***Connection error***")
except requests.exceptions.Timeout:
Expand All @@ -121,7 +123,7 @@ def delete(toDelete):
global queries
queries.pop(toDelete)

def run_query(url, name, notify, minPrice, maxPrice):
def run_query(url, name, notify, minPrice, maxPrice, include, exclude):
print("running query (\"{}\" - {})...".format(name, url))

if minPrice != 'null':
Expand All @@ -138,7 +140,15 @@ def run_query(url, name, notify, minPrice, maxPrice):

for product in product_list_items:
title = product.find('h2').string


# include
if not any(re.search(regex_pattern, title, re.IGNORECASE) is not None for regex_pattern in include):
continue

# exclude
if any(re.search(regex_pattern, title, re.IGNORECASE) is not None for regex_pattern in exclude):
continue

try:
price=product.find('p',class_=re.compile(r'price')).contents[0]
# check if the span tag exists
Expand All @@ -159,7 +169,10 @@ def run_query(url, name, notify, minPrice, maxPrice):
if minPrice == "null" or price == "Unknown price" or price>=minPrice:
if maxPrice == "null" or price == "Unknown price" or price<=maxPrice:
if not queries.get(name): # insert the new search
queries[name] = {url:{minPrice: {maxPrice: {link: {'title': title, 'price': price, 'location': location}}}}}
queries[name] = {
url:{minPrice: {maxPrice: {link: {'title': title, 'price': price, 'location': location}}}},
'include': include, 'exclude': exclude
}
print("\nNew search added:", name)
print("Adding result:", title, "-", price, "-", location)
else: # add search results to dictionary
Expand Down Expand Up @@ -216,7 +229,7 @@ def send_telegram_messages(messages):
print_sitrep()

if args.url is not None and args.name is not None:
run_query(args.url, args.name, False, args.minPrice if args.minPrice is not None else "null", args.maxPrice if args.maxPrice is not None else "null",)
run_query(args.url, args.name, False, args.minPrice if args.minPrice is not None else "null", args.maxPrice if args.maxPrice is not None else "null", args.include, args.exclude)
print("Query added.")

if args.delete is not None:
Expand Down