From c897c8d9b6db6b27a7a7c23de10f916aeb3f7b36 Mon Sep 17 00:00:00 2001 From: Chris Chapman Date: Mon, 19 Aug 2024 15:26:16 -0600 Subject: [PATCH] Refactored scripts - Refactored shared code into common file - introduced --from argument to limit the amount of taxes imported - allowed the account to be set, not hard-coded - Added state-specific tax accounts from accout_us --- account_us.xml | 175 +++++++++++++++++++++++++++++++ scripts/common.py | 83 +++++++++++++++ scripts/import_boundaries.py | 108 +++---------------- scripts/import_rates.py | 198 ++++++++++++----------------------- 4 files changed, 341 insertions(+), 223 deletions(-) create mode 100644 account_us.xml create mode 100644 scripts/common.py diff --git a/account_us.xml b/account_us.xml new file mode 100644 index 0000000..468fc97 --- /dev/null +++ b/account_us.xml @@ -0,0 +1,175 @@ + + + + + + Sales tax collected—Arkansas + 2230-AR + + + + + + Sales tax collected—Georgia + 2230-GA + + + + + + Sales tax collected—Iowa + 2230-IA + + + + + + Sales tax collected—Indiana + 2230-IN + + + + + + Sales tax collected—Kansas + 2230-KS + + + + + + Sales tax collected—Kentucky + 2230-KY + + + + + + Sales tax collected—Michigan + 2230-MI + + + + + + Sales tax collected—Minnesota + 2230-MN + + + + + + Sales tax collected—North Carolina + 2230-NC + + + + + + Sales tax collected—North Dakota + 2230-ND + + + + + + Sales tax collected—Nebraska + 2230-NE + + + + + + Sales tax collected—New Jersey + 2230-NJ + + + + + + Sales tax collected—Nevada + 2230-NV + + + + + + Sales tax collected—Ohio + 2230-OH + + + + + + Sales tax collected—Oklahoma + 2230-OK + + + + + + Sales tax collected—Rhode Island + 2230-RI + + + + + + Sales tax collected—South Dakota + 2230-SD + + + + + + Sales tax collected—Tennessee + 2230-TN + + + + + + Sales tax collected—Utah + 2230-UT + + + + + + Sales tax collected—Vermont + 2230-VT + + + + + + Sales tax collected—Washington + 2230-WA + + + + + + Sales tax collected—Wisconsin + 2230-WI + + + + + + Sales tax collected—West Virginia + 2230-WV + + + + + + Sales tax collected—Wyoming + 2230-WY + + + + + + + diff --git a/scripts/common.py b/scripts/common.py new file mode 100644 index 0000000..c1c1c93 --- /dev/null +++ b/scripts/common.py @@ -0,0 +1,83 @@ +import os +import sys + +from html.parser import HTMLParser +import zipfile +from io import BytesIO, TextIOWrapper + +try: + from urllib.error import HTTPError + from urllib.request import urlopen + from urllib.parse import urljoin +except ImportError: + from urllib2 import urlopen, HTTPError + +try: + from progressbar import ETA, Bar, ProgressBar, SimpleProgress +except ImportError: + ProgressBar = None + +try: + from proteus import Model, config +except ImportError: + prog = os.path.basename(sys.argv[0]) + sys.exit("proteus must be installed to use %s" % prog) + +class LinksExtractor(HTMLParser): + def __init__(self): + super().__init__() + self.links = [] + + def handle_starttag(self, tag, attrs): + if tag == 'a': + for attr in attrs: + if attr[0] == 'href': + self.links.append(attr[1]) + + def get_links(self): + return self.links + +def _progress(iterable): + if ProgressBar: + pbar = ProgressBar( + widgets=[SimpleProgress(), Bar(), ETA()]) + else: + pbar = iter + return pbar(iterable) + +def _remove_forbidden_chars(name): + from trytond.tools import remove_forbidden_chars + return remove_forbidden_chars(name) + +def fetch(code, base): + sys.stderr.write('Fetching') + sys.stderr.flush() + try: + responce = urlopen(base) + except HTTPError as e: + sys.exit("\nError fetching directory listing: %s" % e.reason) + parser = LinksExtractor() + parser.feed(TextIOWrapper(responce, encoding='utf-8').read()) + parser.close() + + files = {os.path.basename(a)[:2]: urljoin(base, a) for a in parser.get_links()} + + try: + responce = urlopen(files[code]) + except KeyError: + sys.exit("\nFile not found for code: %s" % code) + except HTTPError as e: + sys.exit("\nError downloading %s: %s" % (code, e.reason)) + data = responce.read() + + root, ext = os.path.splitext(responce.url) + if ext == '.zip': + with zipfile.ZipFile(BytesIO(data)) as zf: + data = zf.read(os.path.basename(root) + '.csv') + print('.', file=sys.stderr) + return data + +def get_places(code): + Place = Model.get('census.place') + return {p.code_fips: p for p in Place.find([('subdivision.code', '=', code)])} + diff --git a/scripts/import_boundaries.py b/scripts/import_boundaries.py index 22fcf0d..5bab435 100755 --- a/scripts/import_boundaries.py +++ b/scripts/import_boundaries.py @@ -1,47 +1,29 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # This file is part of Tryton. The COPYRIGHT file at the top level of # this repository contains the full copyright notices and license terms. -from __future__ import print_function import csv from collections import defaultdict -import datetime as dt +from datetime import date import os import sys -try: - from urllib.error import HTTPError - from urllib.request import urlopen - from urllib.parse import urljoin -except ImportError: - from urllib2 import urlopen, HTTPError - -from html.parser import HTMLParser -import zipfile from argparse import ArgumentParser from io import BytesIO, TextIOWrapper from itertools import batched +from proteus import Model, config -try: - from progressbar import ETA, Bar, ProgressBar, SimpleProgress -except ImportError: - ProgressBar = None - -try: - from proteus import Model, config -except ImportError: - prog = os.path.basename(sys.argv[0]) - sys.exit("proteus must be installed to use %s" % prog) +from common import fetch, get_places, _progress def clean_boundaries(code): sys.stderr.write('Cleaning boundaries') sys.stderr.flush() Boundary = Model.get('account.tax.boundary') - Boundary._proxy.delete( - [c.id for c in Boundary.find([ - ('authority.subdivision.code', '=', code), - ])], {}) + Boundary._proxy.delete([], {}) + #[c.id for c in Boundary.find([ + # ('authority.subdivision.code', '=', code), + # ])], {}) print('.', file=sys.stderr) def clean_tax_rules(code): @@ -62,70 +44,6 @@ def clean_tax_codes(code): [c.id for c in TaxCode.find([('authority.subdivision.code', '=', code)])], {}) print('.', file=sys.stderr) -class LinksExtractor(HTMLParser): - def __init__(self): - super().__init__() - self.links = [] - - def handle_starttag(self, tag, attrs): - if tag == 'a': - for attr in attrs: - if attr[0] == 'href': - self.links.append(attr[1]) - - def get_links(self): - return self.links - -def _progress(iterable): - if ProgressBar: - widgets = [ - SimpleProgress(), - Bar(), - ETA()] - pbar = ProgressBar(widgets=widgets) - else: - pbar = iter - return pbar(iterable) - -def _remove_forbidden_chars(name): - from trytond.tools import remove_forbidden_chars - return remove_forbidden_chars(name) - -def fetch(code): - sys.stderr.write('Fetching') - sys.stderr.flush() - base = 'https://www.streamlinedsalestax.org/ratesandboundry/Boundary/' - try: - responce = urlopen(base) - except HTTPError as e: - sys.exit("\nError fetching directory listing: %s" % e.reason) - parser = LinksExtractor() - parser.feed(TextIOWrapper(responce, encoding='utf-8').read()) - parser.close() - - files = {os.path.basename(a)[:2]: urljoin(base, a) for a in parser.get_links()} - - try: - responce = urlopen(files[code]) - except KeyError: - sys.exit("\nFile not found for code: %s" % code) - except HTTPError as e: - sys.exit("\nError downloading %s: %s" % (code, e.reason)) - data = responce.read() - - root, ext = os.path.splitext(responce.url) - if ext == '.zip': - with zipfile.ZipFile(BytesIO(data)) as zf: - data = zf.read(os.path.basename(root) + '.csv') - print('.', file=sys.stderr) - return data - -def get_places(code): - Place = Model.get('census.place') - return {p.code_fips: p for p in Place.find([ - ('subdivision.code', '=', code) - ])} - class TaxRuleCollector: def __init__(self, code, places): @@ -357,9 +275,9 @@ def setup_tax_lines(code, tax, amount='tax'): records = [] for row in _progress(reader): authority = places[row['fips_state_code']] - start_date = dt.datetime.strptime(row['start_date'], '%Y%m%d').date() - end_date = dt.datetime.strptime(row['end_date'], '%Y%m%d').date() - end_date = None if end_date == dt.date.max else end_date + start_date = date.fromisoformat(row['start_date']) + end_date = date.fromisoformat(row['end_date']) + end_date = None if end_date == date.max else end_date tax_code = code_collector.collect(row) rule = rule_collector.collect(row) @@ -419,6 +337,8 @@ def setup_tax_lines(code, tax, amount='tax'): 'composite_ser_code', 'fips_state_code', 'fips_state_indicator','fips_county_code', 'fips_place_code', 'fips_place_class_code', 'longitude', 'latitude'] +BASE_URL = 'https://www.streamlinedsalestax.org/ratesandboundry/Boundary/' + def main(database, codes, config_file=None): config.set_trytond(database, config_file=config_file) do_import(codes) @@ -431,7 +351,7 @@ def do_import(codes): clean_boundaries('US-%s' % code) clean_tax_rules('US-%s' % code) clean_tax_codes('US-%s' % code) - import_('US-%s' % code, fetch(code)) + import_('US-%s' % code, fetch(code, BASE_URL)) def run(): diff --git a/scripts/import_rates.py b/scripts/import_rates.py index f172b71..5bf3851 100755 --- a/scripts/import_rates.py +++ b/scripts/import_rates.py @@ -1,103 +1,26 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # This file is part of Tryton. The COPYRIGHT file at the top level of # this repository contains the full copyright notices and license terms. -from __future__ import print_function import csv -import datetime as dt +from datetime import date from decimal import Decimal import os import sys - -try: - from urllib.error import HTTPError - from urllib.request import urlopen - from urllib.parse import urljoin -except ImportError: - from urllib2 import urlopen, HTTPError - -from html.parser import HTMLParser -import zipfile from argparse import ArgumentParser from io import BytesIO, TextIOWrapper +from proteus import Model, config + +from common import fetch, get_places, _progress -try: - from progressbar import ETA, Bar, ProgressBar, SimpleProgress -except ImportError: - ProgressBar = None - -try: - from proteus import Model, config -except ImportError: - prog = os.path.basename(sys.argv[0]) - sys.exit("proteus must be installed to use %s" % prog) - -class LinksExtractor(HTMLParser): - def __init__(self): - super().__init__() - self.links = [] - - def handle_starttag(self, tag, attrs): - if tag == 'a': - for attr in attrs: - if attr[0] == 'href': - self.links.append(attr[1]) - - def get_links(self): - return self.links - -def _progress(iterable): - if ProgressBar: - pbar = ProgressBar( - widgets=[SimpleProgress(), Bar(), ETA()]) - else: - pbar = iter - return pbar(iterable) - -def _remove_forbidden_chars(name): - from trytond.tools import remove_forbidden_chars - return remove_forbidden_chars(name) - -def fetch(code): - sys.stderr.write('Fetching') - base = 'https://www.streamlinedsalestax.org/ratesandboundry/Rates/' - try: - responce = urlopen(base) - except HTTPError as e: - sys.exit("\nError fetching directory listing: %s" % e.reason) - parser = LinksExtractor() - parser.feed(TextIOWrapper(responce, encoding='utf-8').read()) - parser.close() - - files = {os.path.basename(a)[:2]: urljoin(base, a) for a in parser.get_links()} - - try: - responce = urlopen(files[code]) - except KeyError: - sys.exit("\nFile not found for code: %s" % code) - except HTTPError as e: - sys.exit("\nError downloading %s: %s" % (code, e.reason)) - data = responce.read() - - root, ext = os.path.splitext(responce.url) - if ext == '.zip': - with zipfile.ZipFile(BytesIO(data)) as zf: - data = zf.read(os.path.basename(root) + '.csv') - print('.', file=sys.stderr) - return data def get_taxes(code): Tax = Model.get('account.tax') return {(t.name, t.start_date): t for t in Tax.find([ - ('authority', '!=', None), ('authority.subdivision.code', '=', code), ])} -def get_places(code): - Place = Model.get('census.place') - return {p.code_fips: p for p in Place.find([('subdivision.code', '=', 'US-%s' % code)])} - def get_groups(): TaxGroup = Model.get('account.tax.group') return {g.code: g for g in TaxGroup.find([])} @@ -107,7 +30,7 @@ def get_company(): company, = Company.find() return company -def get_tax_account(company=None): +def get_tax_account(name, company=None): Account = Model.get('account.account') if not company: @@ -115,30 +38,35 @@ def get_tax_account(company=None): return Account.find([ ('company', '=', company.id), - ('name', '=', 'Main Tax'), + ['OR', + [('name', '=', name)], + [('code', '=', name)], + ], ], limit=1) -def update_taxes(code, taxes): - Tax = Model.get('account.tax') +def update_taxes(code, stream, from_date, account): print('Importing', file=sys.stderr) + Tax = Model.get('account.tax') places = get_places(code) + taxes = get_taxes(code) groups = get_groups() - tax_account, = get_tax_account() + tax_account, = get_tax_account(account) - f = TextIOWrapper(BytesIO(fetch(code)), encoding='utf-8') + f = TextIOWrapper(BytesIO(stream), encoding='utf-8') records = [] current_code_fips = None for row in _progress(list(csv.DictReader(f, fieldnames=_fieldnames))): authority = places[row['state']] code_fips = row['jurisdiction_fips_code'] jurisdiction = places.get(row['jurisdiction_fips_code']) - start_date = dt.datetime.strptime(row['start_date'], '%Y%m%d').date() - end_date = dt.datetime.strptime(row['end_date'], '%Y%m%d').date() + group = groups[row['jurisdiction_type']] + start_date = date.fromisoformat(row['start_date']) + end_date = date.fromisoformat(row['end_date']) for type_ in ['general_rate_intrastate', 'general_rate_interstate', 'food_rate_intrastate', 'food_rate_interstate']: - name = '%s %s' % (code_fips, type_) + name = '%s %s' % (code_fips, type_) #TODO: isn't there a better name? description = '%s tax (%s)' % (code_fips if jurisdiction is None else jurisdiction.name, row[type_]) sourcing = 'intrastate' if 'intrastate' in type_ else 'interstate' @@ -148,36 +76,36 @@ def update_taxes(code, taxes): if (name, None) in taxes: parent = taxes[(name, None)] else: - parent = Tax(name=name) - - parent.jurisdiction=jurisdiction - parent.description = description - parent.authority = authority - parent.type = 'none' - parent.group = groups[row['jurisdiction_type']] - parent.sourcing = sourcing - parent.rate_type = rate_type + parent = Tax(name=name, + jurisdiction=jurisdiction, + description = description, + authority = authority, + type = 'none', + group = group, + sourcing = sourcing, + rate_type = rate_type) + records.append(parent) + if end_date and end_date <= from_date: + continue # import the parent at least for complete tax rules if (name, start_date) in taxes: record = taxes[(name, start_date)] else: - record = Tax(name=name) - - record.jurisdiction=jurisdiction - record.description = description - record.authority = authority - record.type = 'percentage' - record.group = groups[row['jurisdiction_type']] - record.rate = Decimal(row[type_]) - record.sourcing = sourcing - record.rate_type = rate_type - record.start_date = start_date - record.end_date = None if end_date == dt.date.max else end_date - - record.invoice_account = tax_account - record.credit_note_account = tax_account + record = Tax(name=name, + jurisdiction=jurisdiction, + description = description, + authority = authority, + type = 'percentage', + group = group, + rate = Decimal(row[type_]), + sourcing = sourcing, + rate_type = rate_type, + start_date = start_date, + end_date = None if end_date == date.max else end_date, + invoice_account = tax_account, + credit_note_account = tax_account) records.append(record) current_code_fips = code_fips @@ -191,30 +119,35 @@ def update_taxes_parent(taxes): records = [] for k, record in _progress(taxes.items()): - name, start_date = k if record.type == 'none': continue - else: - record.parent = taxes[(name, None)] - records.append(record) + + name, start_date = k + record.parent = taxes[(name, None)] + records.append(record) Tax.save(records) _fieldnames = ['state', 'jurisdiction_type', 'jurisdiction_fips_code', 'general_rate_intrastate', 'general_rate_interstate', 'food_rate_intrastate', 'food_rate_interstate', 'start_date', 'end_date'] +_base = 'https://www.streamlinedsalestax.org/ratesandboundry/Rates/' -def main(database, codes, config_file=None): +def main(database, args, config_file=None): config.set_trytond(database, config_file=config_file) - do_import(codes) + do_import(args) -def do_import(codes): - for code in codes: +def do_import(args): + for code in args.codes: print(code, file=sys.stderr) - code = code.upper() - taxes = get_taxes('US-%s' % code) - taxes = update_taxes(code, taxes) + if args.account[-1] in ['-', '–', '—']: + account = args.account + code.upper() + else: + account = args.account + from_date = date.min if args.all else args.from_date + tryton_code = 'US-%s' % code.upper() + taxes = update_taxes(tryton_code, fetch(code.upper(), _base), from_date, account) update_taxes_parent(taxes) @@ -223,12 +156,19 @@ def run(): parser.add_argument('-d', '--database', dest='database', required=True) parser.add_argument('-c', '--config', dest='config_file', help='the trytond config file') - parser.add_argument('-a', '--active', action='store_true', - help='only import active taxes') + parser.add_argument('-l', '--liability-account', dest='account', default='2230-', + help='the code of the invoice and credit note account related to the taxes ' + '(defaults to 2230-{code}, see the account_us module)') + parser.add_argument('-f', '--from', dest='from_date', + default=date.today().isoformat(), type=date.fromisoformat, + help='import all taxes active from the given date YYYY-MM-DD ' + '(defaults to %s)' % date.today().isoformat()) + parser.add_argument('--all', action='store_true', + help='import all available taxes (overrides --from)') parser.add_argument('codes', nargs='+') args = parser.parse_args() - main(args.database, args.codes, args.config_file) + main(args.database, args, args.config_file) if __name__ == '__main__':