Skip to content

Commit

Permalink
Permite que os arquivo empresas.csv possa ser separado por UF ou cidade
Browse files Browse the repository at this point in the history
  • Loading branch information
AnthraxisBR committed Feb 28, 2019
1 parent b3c78d6 commit caa3128
Show file tree
Hide file tree
Showing 7 changed files with 164 additions and 0 deletions.
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
FILES_LOCATION=/media/Arquivos
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.idea*
__pycache__/
.env
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,3 +65,26 @@ ou, no python3:
ou, no python3:

`python3 cnpj.py "data\F.K032001K.D81106D" sqlite "data"`


## Separando arquivos CSV por estado ou municipio

Após ter gerado o arquivo empresas.csv, é possível dividir por estado, ou por cidade

###Requisitos

`$ python3 -m pip install python-dotenv`

Para ambos os scripts é necessário informar a localização do arquivo `empresas.csv` no arquivo `.env`

`FILES_LOCATION=/media/Arquivos`

Após isso basta executar os scripts:

####Para separar por UF:

`python3 separar_csv_por_uf.py`

####Para separar por cidade:

`python3 separar_csv_por_cidade.py`
44 changes: 44 additions & 0 deletions extra_csv/header_dict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@

def get_header_index(column):
header = {
'cnpj': 0,
'matriz_filial': 1,
'razao_social': 2,
'nome_fantasia' : 3,
'situacao' : 4,
'data_situacao' : 5,
'motivo_situacao' : 6,
'nm_cidade_exterior' : 7,
'cod_pais' : 8,
'nome_pais' : 9,
'cod_nat_juridica' : 10,
'data_inicio_ativ' : 11,
'cnae_fiscal' : 12,
'tipo_logradouro' : 13,
'logradouro' : 14,
'numero' : 15,
'complemento' : 16,
'bairro' : 17,
'cep' : 18,
'uf' : 19,
'cod_municipio' : 20,
'municipio' : 21,
'ddd_1' : 22,
'telefone_1' : 23,
'ddd_2' : 24,
'telefone_2' : 25,
'ddd_fax' : 26,
'num_fax': 27,
'email' : 28,
'qualif_resp' : 29,
'capital_social' : 30,
'porte' : 31,
'opc_simples' : 32,
'data_opc_simples' : 33,
'data_exc_simples' : 34,
'opc_mei' : 35,
'sit_especial' : 36,
'data_sit_especial' : 37
}

return header[column]
45 changes: 45 additions & 0 deletions extra_csv/separar_csv_por_cidade.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import settings
import csv, codecs, os
from header_dict import get_header_index

location = os.getenv('FILES_LOCATION')

file = 'empresas.csv'

with codecs.open(location + file,'r+','utf-8') as empresas:

empresas = csv.reader(cleaned.replace('\0', '') for cleaned in empresas)

next(empresas,None)

count_sucesso = 0
count_erro = 0

if not os.path.exists(location + 'UFs/'):
os.makedirs(location + 'UFs/')

for empresa in empresas:

uf = empresa[get_header_index('uf')]
municipio = empresa[get_header_index('municipio')]

if not os.path.exists(location + 'UFs/' + uf):
os.makedirs(location + 'UFs/' + uf)

try:
uf_file = open(location + 'UFs/' + uf + "/" + municipio + ".csv", "a+")
except:
uf_file = open(location + 'UFs/' + uf + "/" + municipio + ".csv", "w+")

line = '"' + '","'.join(empresa) + '"\n'

uf_file.writelines(line)

try:
uf_file.writelines('"' + '","'.join(empresa) + '"\n')
count_sucesso += 1
except:
count_erro += 1
print('Erros de processamento: {}'.format(count_erro),end='\r')

print('Empresas processadas: {}'.format(count_sucesso),end='\r')
37 changes: 37 additions & 0 deletions extra_csv/separar_csv_por_uf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import settings
import csv, codecs, os
from header_dict import get_header_index

location = os.getenv('FILES_LOCATION')

file = 'empresas.csv'

with codecs.open(location + file, 'r+', 'utf-8') as empresas:

empresas = csv.reader(x.replace('\0', '') for x in empresas)

next(empresas,None)

count_sucesso = 0
count_erro = 0

if not os.path.exists(location + 'UFs/'):
os.makedirs(location + 'UFs/')

for empresa in empresas:

uf = empresa[get_header_index('uf')]

try:
uf_file = open(location + 'UFs/' + uf + ".csv", "a+")
except:
uf_file = open(location + 'UFs/' + uf + ".csv", "w+")

try:
uf_file.writelines('"' + '","'.join(empresa) + '"\n')
count_sucesso += 1
except:
count_erro += 1
print('Erros de processamento: {}'.format(count_erro), end='\r')

print('Empresas processadas: {}'.format(count_sucesso), end='\r')
11 changes: 11 additions & 0 deletions extra_csv/settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# settings.py
from dotenv import load_dotenv
load_dotenv()

# OR, the same with increased verbosity:
load_dotenv(verbose=True)

# OR, explicitly providing path to '.env'
from pathlib import Path # python3 only
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

0 comments on commit caa3128

Please sign in to comment.