From 9409e8a07e157b8a3bf8fc19bdf22e60acd76249 Mon Sep 17 00:00:00 2001 From: "lucasoribeiro@dcc.ufba.br" Date: Thu, 23 Aug 2018 18:40:20 -0300 Subject: [PATCH] first commit --- analyzer.py | 34 + config.cpp | 48 ++ config.h | 22 + config.ui | 98 +++ contapalavras.cpp | 7 + contapalavras.h | 10 + desktop.ini | 5 + dialogcrece.cpp | 23 + dialogcrece.h | 24 + dialogcrece.ui | 113 +++ dialogjanela.cpp | 68 ++ dialogjanela.h | 31 + dialogjanela.ui | 148 ++++ guipalabras.cpp | 740 +++++++++++++++++ guipalabras.h | 56 ++ guipalabras.ui | 323 ++++++++ main.cpp | 10 + palabras.pro | 34 + palabras.pro.user | 336 ++++++++ palabras.pro.user.2.1pre1 | 113 +++ palabras.pro.user.2.7pre1 | 243 ++++++ palabras.pro.user.3.0-pre1 | 245 ++++++ palabras.pro.user.35f2e92 | 318 ++++++++ palabras.pro.user.4b39ca2 | 336 ++++++++ tratatexto.cpp | 1539 ++++++++++++++++++++++++++++++++++++ tratatexto.h | 69 ++ 26 files changed, 4993 insertions(+) create mode 100755 analyzer.py create mode 100755 config.cpp create mode 100755 config.h create mode 100755 config.ui create mode 100755 contapalavras.cpp create mode 100755 contapalavras.h create mode 100755 desktop.ini create mode 100755 dialogcrece.cpp create mode 100755 dialogcrece.h create mode 100755 dialogcrece.ui create mode 100755 dialogjanela.cpp create mode 100755 dialogjanela.h create mode 100755 dialogjanela.ui create mode 100755 guipalabras.cpp create mode 100755 guipalabras.h create mode 100755 guipalabras.ui create mode 100755 main.cpp create mode 100755 palabras.pro create mode 100755 palabras.pro.user create mode 100755 palabras.pro.user.2.1pre1 create mode 100755 palabras.pro.user.2.7pre1 create mode 100755 palabras.pro.user.3.0-pre1 create mode 100755 palabras.pro.user.35f2e92 create mode 100755 palabras.pro.user.4b39ca2 create mode 100755 tratatexto.cpp create mode 100755 tratatexto.h diff --git a/analyzer.py b/analyzer.py new file mode 100755 index 0000000..de28aab --- /dev/null +++ b/analyzer.py @@ -0,0 +1,34 @@ +import networkx as nx +import sys +def estatisticas(endereco,arq): + arquivo = open(endereco+'/'+arq+'_estatisticas.csv', 'w') + e= endereco+'/'+arq+'Convertido.redeFras.csv' + graph = nx.read_edgelist( + e) + nx.to_directed(graph) + clust_coeficients = nx.clustering(graph) + dict_betweenes = nx.betweenness_centrality(graph) + closeness = nx.closeness_centrality(graph) + eigenvector_centrality = nx.eigenvector_centrality(graph) + centralidade_grau = nx.degree_centrality(graph) + grau = nx.degree(graph) + #modularidade = nx.modularity_matrix(graph) + #centro= nx.center(graph,e=None, usebounds=False) + # diametro= nx.diameter(graph,e=None, usebounds=False) + densidade = nx.density(graph) + print densidade + arquivo.write("No ; Grau ; Betweennes ; Centralidade_Grau ; Centralidade(Closeness) ; Eigenvector_Centrality ; Clustering\n") + for key in dict_betweenes.keys(): + aux = str(dict_betweenes[key]) + aux1= str(grau[key]) + aux2= str(centralidade_grau[key]) + aux3= str(closeness[key]) + aux4= str(eigenvector_centrality[key]) + aux5= str(clust_coeficients[key]) + arquivo.write(key + ' ; '+aux1+' ; '+ aux +' ; '+aux2 +' ; '+aux3+' ; '+aux4+' ; '+aux+ '\n') + arquivo.close() + +ende=str(sys.argv[1]) +arq=str(sys.argv[2]) +estatisticas(ende,arq) + diff --git a/config.cpp b/config.cpp new file mode 100755 index 0000000..9118ad6 --- /dev/null +++ b/config.cpp @@ -0,0 +1,48 @@ +#include "config.h" +#include "ui_config.h" +#include +#include +using namespace std; + +config::config(QWidget *parent) : + QDialog(parent), + ui(new Ui::config) +{ + ui->setupUi(this); + + string maxLinha; + + fstream arqGr,arqPl; + arqGr.open("gramat.ftl",ios_base::in); + if(!arqGr.is_open()) + arqGr.open("gramat.ftl",ios_base::out); + arqPl.open("palav.ftl",ios_base::in); + if(!arqPl.is_open()) + arqPl.open("palav.ftl",ios_base::out); + while(!arqGr.eof()) + { + getline(arqGr,maxLinha); + this->ui->gramat->append(QString(maxLinha.c_str())); + } + + while(!arqPl.eof()) + { + getline(arqPl,maxLinha); + this->ui->palavr->append(QString(maxLinha.c_str())); + } + arqGr.close(); + arqPl.close(); +} + +config::~config() +{ + ofstream arqGr,arqPl; + arqGr.open("gramat.ftl"); + arqGr << this->ui->gramat->toPlainText().toStdString(); + arqGr.close(); + arqPl.open("palav.ftl"); + arqPl << this->ui->palavr->toPlainText().toStdString(); + arqPl.close(); + + delete ui; +} diff --git a/config.h b/config.h new file mode 100755 index 0000000..ae315bb --- /dev/null +++ b/config.h @@ -0,0 +1,22 @@ +#ifndef CONFIG_H +#define CONFIG_H + +#include + +namespace Ui { +class config; +} + +class config : public QDialog +{ + Q_OBJECT + +public: + explicit config(QWidget *parent = 0); + ~config(); + +private: + Ui::config *ui; +}; + +#endif // CONFIG_H diff --git a/config.ui b/config.ui new file mode 100755 index 0000000..263ace4 --- /dev/null +++ b/config.ui @@ -0,0 +1,98 @@ + + + config + + + + 0 + 0 + 400 + 300 + + + + Dialog + + + + + + FILTROS A SEREM ELIMINADOS NA CONSTRUÇÃO DAS REDES + + + + + + + 1 + + + + Classes Gramaticais + + + + + + + + + + Palavras + + + + + + + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + + + + + buttonBox + accepted() + config + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + config + reject() + + + 316 + 260 + + + 286 + 274 + + + + + diff --git a/contapalavras.cpp b/contapalavras.cpp new file mode 100755 index 0000000..1828a75 --- /dev/null +++ b/contapalavras.cpp @@ -0,0 +1,7 @@ +#include "contapalavras.h" +#include + +contaPalavras::contaPalavras() +{ +} + diff --git a/contapalavras.h b/contapalavras.h new file mode 100755 index 0000000..0bef089 --- /dev/null +++ b/contapalavras.h @@ -0,0 +1,10 @@ +#ifndef CONTAPALAVRAS_H +#define CONTAPALAVRAS_H + +class contaPalavras +{ +public: + contaPalavras(); +}; + +#endif // CONTAPALAVRAS_H diff --git a/desktop.ini b/desktop.ini new file mode 100755 index 0000000..13ff7ac --- /dev/null +++ b/desktop.ini @@ -0,0 +1,5 @@ +[.ShellClassInfo] +InfoTip=Esta pasta está compartilhada on-line. +IconFile=C:\Program Files (x86)\Google\Drive\googledrivesync.exe +IconIndex=16 + \ No newline at end of file diff --git a/dialogcrece.cpp b/dialogcrece.cpp new file mode 100755 index 0000000..fd49cd6 --- /dev/null +++ b/dialogcrece.cpp @@ -0,0 +1,23 @@ +#include "dialogcrece.h" +#include "ui_dialogcrece.h" + +DialogCrece::DialogCrece(QWidget *parent) : + QDialog(parent), + ui(new Ui::DialogCrece) +{ + ui->setupUi(this); +} + +DialogCrece::~DialogCrece() +{ + delete ui; +} + +QString DialogCrece::getClasses() +{ + return ui->ClassesLineEdit->text(); +} +int DialogCrece::getPasso() +{ + return this->ui->PassoSpinBox->value(); +} diff --git a/dialogcrece.h b/dialogcrece.h new file mode 100755 index 0000000..736add3 --- /dev/null +++ b/dialogcrece.h @@ -0,0 +1,24 @@ +#ifndef DIALOGCRECE_H +#define DIALOGCRECE_H + +#include + +namespace Ui { + class DialogCrece; +} + +class DialogCrece : public QDialog +{ + Q_OBJECT + +public: + explicit DialogCrece(QWidget *parent = 0); + QString getClasses(); + int getPasso(); + ~DialogCrece(); + +private: + Ui::DialogCrece *ui; +}; + +#endif // DIALOGCRECE_H diff --git a/dialogcrece.ui b/dialogcrece.ui new file mode 100755 index 0000000..8d28fe5 --- /dev/null +++ b/dialogcrece.ui @@ -0,0 +1,113 @@ + + + DialogCrece + + + + 0 + 0 + 611 + 138 + + + + Dialog + + + + + + + + Passo, em quantidade de palavras + + + + + + + 999999999 + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + + + + Classes separadas por vírgula + + + + + + + + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + + + + + + + buttonBox + accepted() + DialogCrece + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + DialogCrece + reject() + + + 316 + 260 + + + 286 + 274 + + + + + diff --git a/dialogjanela.cpp b/dialogjanela.cpp new file mode 100755 index 0000000..c7ceffb --- /dev/null +++ b/dialogjanela.cpp @@ -0,0 +1,68 @@ +#include "dialogjanela.h" +#include "ui_dialogjanela.h" + +DialogJanela::DialogJanela(QWidget *parent) : + QDialog(parent), + ui(new Ui::DialogJanela) +{ + ui->setupUi(this); +} + +DialogJanela::~DialogJanela() +{ + delete ui; +} + +void DialogJanela::changeEvent(QEvent *e) +{ + QDialog::changeEvent(e); + switch (e->type()) { + case QEvent::LanguageChange: + ui->retranslateUi(this); + break; + default: + break; + } +} + +char DialogJanela::getIdioma() +{ + if (this->ui->ingles->isChecked()) + return 1; + + if (this->ui->portugues->isChecked()) + return 2; + + return 0; +} + +char DialogJanela::getAnalise() +{ + if (this->ui->normal->isChecked()) + return 1; + + if (this->ui->canonico->isChecked()) + return 2; + + if (this->ui->completo->isChecked()) + return 3; + + return 0; +} + + + +void DialogJanela::on_cancelar_clicked() +{ + this->done(0); +} + +void DialogJanela::on_inicio_clicked() +{ + opIdioma = getIdioma(); + opAnalise = getAnalise(); + if (opIdioma == 0 || opAnalise == 0) + this->done(0); + else + this->done(1); +} diff --git a/dialogjanela.h b/dialogjanela.h new file mode 100755 index 0000000..da03bbe --- /dev/null +++ b/dialogjanela.h @@ -0,0 +1,31 @@ +#ifndef DIALOGJANELA_H +#define DIALOGJANELA_H + +#include + +namespace Ui { + class DialogJanela; +} + +class DialogJanela : public QDialog { + Q_OBJECT +public: + DialogJanela(QWidget *parent = 0); + ~DialogJanela(); + char getIdioma(); + char getAnalise(); + char opIdioma, opAnalise; + + +protected: + void changeEvent(QEvent *e); + +private: + Ui::DialogJanela *ui; + +private slots: + void on_cancelar_clicked(); + void on_inicio_clicked(); +}; + +#endif // DIALOGJANELA_H diff --git a/dialogjanela.ui b/dialogjanela.ui new file mode 100755 index 0000000..9ceb4a4 --- /dev/null +++ b/dialogjanela.ui @@ -0,0 +1,148 @@ + + + DialogJanela + + + + 0 + 0 + 280 + 216 + + + + Dialog + + + + + 0 + 10 + 251 + 80 + + + + Escolha tipo de prétratamento: + + + + false + + + + 10 + 20 + 111 + 17 + + + + Normal + + + + + false + + + + 10 + 40 + 121 + 17 + + + + Canônico + + + true + + + + + + 10 + 60 + 161 + 17 + + + + Completo + + + + + + + 10 + 90 + 120 + 61 + + + + Escolha Idioma: + + + + + 10 + 20 + 91 + 17 + + + + Inglês + + + + + true + + + + 10 + 40 + 111 + 17 + + + + Português + + + + + + + 170 + 180 + 75 + 23 + + + + Cancelar + + + + + + 60 + 180 + 75 + 23 + + + + Iniciar + + + + + + diff --git a/guipalabras.cpp b/guipalabras.cpp new file mode 100755 index 0000000..87e4950 --- /dev/null +++ b/guipalabras.cpp @@ -0,0 +1,740 @@ +#include "guipalabras.h" +#include "ui_guipalabras.h" +#include +#include "tratatexto.h" +#include +#include +#include +#include +#include +#include "dialogcrece.h" +#include "config.h" +#include +using namespace std; + +guiPalabras::guiPalabras(QWidget *parent) : + QMainWindow(parent), + ui(new Ui::guiPalabras) +{ + ui->setupUi(this); + this->ui->menuAnalises->setEnabled(true); + this->ui->labelStatus->setText("Nenhum arquivo aberto"); +} + +guiPalabras::~guiPalabras() +{ + delete ui; +} + +void guiPalabras::changeEvent(QEvent *e) +{ + QMainWindow::changeEvent(e); + switch (e->type()) { + case QEvent::LanguageChange: + ui->retranslateUi(this); + break; + default: + break; + } +} + +void guiPalabras::on_actionAbrir_triggered() +{ + + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) para abrir"), + QString::null, + QString::null); + if(this->listaArquivosEntrada.size()>0) + { + this->ui->labelStatus->clear(); + this->ui->menuAnalises->setEnabled(true); + } + else + { + this->ui->menuAnalises->setEnabled(false); + } +} + +void guiPalabras::on_actionPre_tratamento_triggered() +{ + /* Fiz diversas modificações nesta funçao com a inteção de resolver o problema da codificação de letras latinas + as modificações estão marcadas com comentarios. +*/ + + //Quando a caixa de dialogo abre ele marca 0 caso não seja selecionado nada + int meleca = opEscolha.exec(); + if (meleca == 0) + return; + //Recebem as opçes de escolha de idioma e o tipo de analise + char idioma = opEscolha.opIdioma; + char tipoanalise = opEscolha.opAnalise; + + // + trataTexto arqIndividual; + QString path; + QProgressDialog progress("Tratando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + foreach(path, this->listaArquivosEntrada) + { + progress.setValue(i++); + QFileInfo arqInfo(path); + QString nomePrincipal = arqInfo.path()+"/"+arqInfo.baseName(); + string localArquivo=arqInfo.path().toStdString(); + string nomeArquivo=arqInfo.baseName().toStdString(); + + if(idioma==1){ + system(("cd "+localArquivo +"&& cp "+nomeArquivo+".txt "+nomeArquivo+".txtConvertido.txt").c_str()); + arqIndividual.lowerWord(path+"Convertido.txt", nomePrincipal+"Convertido"); + arqIndividual.preanalyze(nomePrincipal+"Convertido", idioma, tipoanalise); + }else{ + + string tipoDoArquivo= arqIndividual.GetStdoutFromCommand(("cd "+arqInfo.path().toStdString()+" && file -i "+arqInfo.baseName().toStdString()+".txt").c_str());// esta linha recupera o tipo de arquivo de texto está sendo tratado + bool v=false; + QString auxiliar; + string s; + //Este for está buscando apenas a codificação do arquivo, que vem precedida de algumas outras informações inuteis para o nosso interesse + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + arquivoSaida.open("resultados.txt", ios::out); + arquivoSaida << "Arquivo\tWordTot\tWordDiffN\tWordDiffCan\n"; + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqTag = arqInfo.baseName() + ".tag"; + QString arqFrq = arqInfo.baseName() + ".frq"; + if (!arqIndividual.abreArquivo(arqTag.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + arqInfo.baseName()); + msgBox.exec(); + + } + // saving contents: + arqIndividual.frequenciaPalavra(arqFrq); + arquivoSaida << arqTag.toStdString().c_str() << "\t" + << arqIndividual.palTot << "\t" << arqIndividual.palDiffN << "\t" + << arqIndividual.palDiffC << endl; + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } + arquivoSaida.close(); + +} + + +void guiPalabras::on_actionPalavras_2_triggered() +{ + trataTexto arqIndividual; + QString path; + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + if (!arqIndividual.redePalavras((arqInfo.path()+"/"+arqInfo.baseName()+"Convertido").toStdString(), 2)) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + arqInfo.baseName()); + msgBox.exec(); + + } + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } +} + +void guiPalabras::on_actionSentencas_2_triggered() +{ + trataTexto arqIndividual; + QString path; + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + arqIndividual.carregaFiltros(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + + if (!arqIndividual.redeSentencas((arqInfo.path()+"/"+arqInfo.baseName()+"Convertido").toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + arqInfo.baseName()); + msgBox.exec(); + + } + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } +} + +void guiPalabras::on_actionVisualizacao_triggered() +{ + system ("wine ~/.wine/drive_c/pajek/Pajek/PAJEK.exe"); +} + +void guiPalabras::on_actionCalculos_triggered() +{ + QString path; + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + //int i=0; + progress.show(); + string comand/*="netall > resultadosNetAll.txt"*/; + string co2; + //system(comand.c_str()); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString nomeArqSai = arqInfo.baseName(); + QString nomeArqEnt = arqInfo.baseName(); + QString endereco= arqInfo.path(); + cout<> resultadosNetAll.txt";*/ + comand="python analyzer.py "+endereco.toStdString()+ + " "+nomeArqEnt.toStdString(); + co2="nano config.ui"; + system(comand.c_str()); + } +} + +void guiPalabras::on_actionMinusculas_triggered() +{ + trataTexto arqIndividual; + QString path; + QProgressDialog progress("Tratando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + foreach(path, this->listaArquivosEntrada) + { + progress.setValue(i++); + QFileInfo arqInfo(path); + //QString nomePrincipal = arqInfo.baseName(); + + arqIndividual.lowerWord(path, arqInfo.baseName()); + //arqIndividual.preanalyze(nomePrincipal, idioma, tipoanalise); + if (progress.wasCanceled()) + break; + } +} +//Esta função procura no arquivo +void guiPalabras::on_actionFuncionais_triggered() +{ + trataTexto arqIndividual; + ofstream arquivoSaida; + QString path; + QString tipoNFunc = "AQ,R,N,VM,JJ,RB,VB,WRB";//Tinha um FW como Tag + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + arquivoSaida.open("estatisticasfuncionais.txt", ios::out); + arquivoSaida << "Arquivo\tpalContenido\tpalFuncionais\tpalVocabulario\n"; + + foreach(path, this->listaArquivosEntrada) + { + //QFileInfo arqInfo(path); + //QString arqTag = arqInfo.baseName() + ".tag"; + if (!arqIndividual.abreArquivoTAG(path.toStdString(),specialFunc)) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + // saving contents: + arquivoSaida << path.toStdString().c_str() << "\t" + << arqIndividual.palFuncionalT << "\t" << arqIndividual.palContenidoT << + "\t" << arqIndividual.palVocabularioT << endl; + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } + arquivoSaida.close(); + +} + +void guiPalabras::on_actionZipfPromedio_triggered() +{ + trataTexto arqIndividual; + ofstream arquivoSaida; + QString path; + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) FRQ para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + arquivoSaida.open("zipfpromedio.txt", ios::out);// Acrescentei isso arqInfo.path()+"/"+ + arquivoSaida << "Arquivo\tzipfpromedio\n"; + + foreach(path, this->listaArquivosEntrada) + { + if (!arqIndividual.abreArquivoFRQ(path.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + // saving contents: + arquivoSaida << path.toStdString().c_str() << "\t" + << arqIndividual.zipfpromedio << endl; + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } + arquivoSaida.close(); + +} + + +void guiPalabras::on_actionTamSentencas_triggered() +{ + trataTexto arqIndividual; + QString path; + QString tipoNFunc = "AQ,R,N,VM,FW,JJ,RB,VB,WRB"; + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqFfr = arqInfo.baseName() + ".ffr"; + if (!arqIndividual.abreArquivoTAGFFR(path.toStdString(),specialFunc,arqFfr.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } +} + + +void guiPalabras::on_actionTags_triggered() +{ + DialogCrece para; + trataTexto arqIndividual; + ofstream arquivoCre,arqResum,arqResum50; + QString path; + if(para.exec()==0) return; + QString tipoNFunc = para.getClasses(); + int passo=para.getPasso(); + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + arqResum.open("resum.dat",ios::out); + arqResum50.open("resum50k.dat",ios::out); + arqResum << "arquivo\tTamanho\tWDN\tWDC\tNPN\tNPC\t"; + arqResum50 << "arquivo\tTamanho\tWDN\tWDC\tNPN\tNPC\t"; + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqCre = arqInfo.baseName() + ".cret"; + progress.setValue(i++); + if (progress.wasCanceled()) + break; + if (!arqIndividual.abreArquivoCRE(path.toStdString(),specialFunc,passo,arqCre.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + arquivoCre.close(); + } +} + +void guiPalabras::on_actionCrescPalabras_triggered() +{ + DialogCrece para; + trataTexto arqIndividual; + ofstream arquivoCre,arqResum,arqResum50; + QString path; + if(para.exec()==0) return; + QString tipoNFunc = para.getClasses(); + int passo=para.getPasso(); + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + + arqResum.open("resumPal.dat",ios::out); + arqResum50.open("resumPal55k.dat",ios::out); + arqResum << "arquivo\tTamanho\tWDN\tWDC\tNPN\tNPC\t"; + arqResum50 << "arquivo\tTamanho\tWDN\tWDC\tNPN\tNPC\t"; + + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqCre = arqInfo.baseName() + ".crep"; + progress.setValue(i++); + if (progress.wasCanceled()) + break; + if (!arqIndividual.abreArquivoCREP(path.toStdString(),specialFunc,passo,arqCre.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + arquivoCre.close(); + } +} + +void guiPalabras::on_actionCresSentencas_triggered() +{ + DialogCrece para; + trataTexto arqIndividual; + ofstream arquivoCre,arqResum,arqResum50; + QString path; + if(para.exec()==0) return; + QString tipoNFunc = para.getClasses(); + int passo=para.getPasso(); + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + arqResum.open("resumSentences.dat",ios::out); + arqResum50.open("resumSentences55k.dat",ios::out); + arqResum << "Tamanho\tNum_Sent\tWPS\tDWPC\tPPS\t"; + arqResum50 << "Tamanho\tNum_Sent\tWPS\tDWPC\tPPS\t"; + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqCre = arqInfo.baseName() + ".creS"; + progress.setValue(i++); + if (progress.wasCanceled()) + break; + if (!arqIndividual.abreArquivoCRES(path.toStdString(),specialFunc,passo,arqCre.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + break; + + } + arquivoCre.close(); + } +} + +void guiPalabras::on_actionDeclara_es_utterances_triggered() +{ + DialogCrece para; + trataTexto arqIndividual; + ofstream arquivoCre,arqResum,arqResum50; + QString path; + if(para.exec()==0) return; + QString tipoNFunc = para.getClasses(); + int passo=para.getPasso(); + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + arqResum.open("resumUtt.dat",ios::out); + arqResum50.open("resumUtt55k.dat",ios::out); + arqResum << "Tamanho\tNum_Sent\tWPS\tDWPC\t"; + arqResum50 << "Tamanho\tNum_Sent\tWPS\tDWPC\t"; + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqCre = arqInfo.baseName() + ".creD"; + progress.setValue(i++); + if (progress.wasCanceled()) + break; + if (!arqIndividual.abreArquivoCRED(path.toStdString(),specialFunc,passo,arqCre.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + break; + + } + arquivoCre.close(); + } +} + +void guiPalabras::on_actionSubs_Simples_triggered() +{ + trataTexto arqIndividual; + QString path; + QString nomeArqSubs; + string alvo, subs; + QStringList regrasAlvo; + QStringList regrasSubs; + ifstream arqSubs; + nomeArqSubs = QFileDialog::getOpenFileName( + this, + tr("Escolha arquivo(s) de regras para abrir"), + QString::null, + QString::null); + arqSubs.open(nomeArqSubs.toStdString().c_str()); + do{ + arqSubs >> alvo >> subs; + QString Qalvo=alvo.c_str(); + QString Qsubs=subs.c_str(); + Qsubs.replace("\\s"," "); + Qsubs.replace("\\d",""); + + regrasAlvo.push_back(Qalvo); + regrasSubs.push_back(Qsubs); + }while(!arqSubs.eof()); + + + + QProgressDialog progress("Aplicando Regras...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + foreach(path, this->listaArquivosEntrada) + { + cout<<"passei aqui"<listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + arqIndividual.carregaFiltros(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + + if (!arqIndividual.redeFrases((arqInfo.path()+"/"+arqInfo.baseName()+"Convertido").toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + arqInfo.baseName()); + msgBox.exec(); + + } + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } +} +void guiPalabras::on_actionOp_es_de_rede_triggered() +{ + config op; + op.exec(); + +} +void guiPalabras:: on_actionAjuda_triggered(){ + +} diff --git a/guipalabras.h b/guipalabras.h new file mode 100755 index 0000000..391a611 --- /dev/null +++ b/guipalabras.h @@ -0,0 +1,56 @@ +#ifndef GUIPALABRAS_H +#define GUIPALABRAS_H + +#include +#include +#include +#include +#include "dialogjanela.h" +#include "dialogjanela.h" +#include "ui_dialogjanela.h" + +namespace Ui { + class guiPalabras; +} + +class guiPalabras : public QMainWindow { + Q_OBJECT +public: + guiPalabras(QWidget *parent = 0); + ~guiPalabras(); + +protected: + void changeEvent(QEvent *e); + + +private: + Ui::guiPalabras *ui; + QStringList listaArquivosEntrada; + DialogJanela opEscolha; + +private slots: + + +private slots: + void on_actionTamSentencas_triggered(); + void on_actionZipfPromedio_triggered(); + void on_actionFuncionais_triggered(); + void on_actionMinusculas_triggered(); + void on_actionCalculos_triggered(); + void on_actionVisualizacao_triggered(); + void on_actionSentencas_2_triggered(); + void on_actionPalavras_2_triggered(); + void on_actionPalavras_triggered(); + void on_actionPre_tratamento_triggered(); + void on_actionAbrir_triggered(); + void on_actionTags_triggered(); + void on_actionCrescPalabras_triggered(); + void on_actionCresSentencas_triggered(); + void on_actionDeclara_es_utterances_triggered(); + void on_actionSubs_Simples_triggered(); + void on_actionOp_es_de_rede_triggered(); + void on_actionFrases_triggered(); + void on_actionAjuda_triggered(); +}; + +#endif // GUIPALABRAS_H diff --git a/guipalabras.ui b/guipalabras.ui new file mode 100755 index 0000000..a60b526 --- /dev/null +++ b/guipalabras.ui @@ -0,0 +1,323 @@ + + + guiPalabras + + + + 0 + 0 + 497 + 400 + + + + Palabras + + + + + false + + + + 9 + 9 + 471 + 311 + + + + + 0 + 5 + + + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> +<html><head><meta name="qrichtext" content="1" /><style type="text/css"> +p, li { white-space: pre-wrap; } +</style></head><body style=" font-family:'Sans Serif'; font-size:9pt; font-weight:400; font-style:normal;"> +<p align="justify" style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-weight:600;">Para utilizar este programa no linux é necessário baixar uma biblioteca escrita em C++ chamada de Freeling. Ela é responsável por analizar sintaticamente o texto e é parte fundamental de todos os processos feitos por esse programa. Por favor baixe o </span><a href="https://github.com/TALP-UPC/FreeLing/releases"><span style=" font-weight:600; text-decoration: underline; color:#0000ff;">Freeling .</span></a></p> +<p align="justify" style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">Cuidados que se deve tomar com relação ao texto a ser analizado:</p> +<p align="justify" style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">-O texto deve estar com a terminação .txt </p> +<p align="justify" style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">-Se o nome estiver como no exemplo &quot;nome do arquivo.txt&quot; transformar para &quot;nome_do_arquivo.txt&quot;<br /></p></body></html> + + + + + false + + + + 10 + 321 + 281 + 20 + + + + : + + + labelStatus + textEditResult + + + + + 0 + 0 + 497 + 22 + + + + + Arquivo + + + + + + + + Analises + + + + Redes + + + + + + + + + Crescimento + + + + + + + + + Regras de substituiçao + + + + + + + + + + + + + + + + + + + true + + + Resultados + + + + + + + Ajuda + + + + + + + + + + TopToolBarArea + + + false + + + + + + Abrir o(s) Aquivo(s) + + + + + Fechar + + + + + Sair do Programa + + + true + + + false + + + + + Totais + + + + + Diferentes + + + + + Sentencas + + + false + + + false + + + + + Pre-tratamento + + + + + false + + + Palavras + + + false + + + + + Palavras + + + + + Sentencas + + + + + Calculos + + + + + Visualizacao + + + true + + + + + false + + + Minusculas + + + false + + + false + + + + + Funcionais + + + + + ZipfPromedio + + + + + tamSentencas + + + + + Tags + + + + + Palabras + + + + + Sentenças + + + + + Declarações (utterances) + + + + + Subs Simples + + + + + Opções de rede + + + + + Frases + + + + + teste + + + + + + + + actionSalvar + triggered() + actionFechar + trigger() + + + -1 + -1 + + + -1 + -1 + + + + + diff --git a/main.cpp b/main.cpp new file mode 100755 index 0000000..b7f3019 --- /dev/null +++ b/main.cpp @@ -0,0 +1,10 @@ +#include +#include "guipalabras.h" + +int main(int argc, char *argv[]) +{ + QApplication a(argc, argv); + guiPalabras w; + w.show(); + return a.exec(); +} diff --git a/palabras.pro b/palabras.pro new file mode 100755 index 0000000..1ccaaf2 --- /dev/null +++ b/palabras.pro @@ -0,0 +1,34 @@ +# ------------------------------------------------- +# Project created by QtCreator 2010-06-24T17:25:40 +# ------------------------------------------------- + +QT += core gui + +greaterThan(QT_MAJOR_VERSION, 4): QT += widgets + +TARGET = palabras +TEMPLATE = app +SOURCES += main.cpp \ + guipalabras.cpp \ + contapalavras.cpp \ + tratatexto.cpp \ + dialogjanela.cpp \ + dialogcrece.cpp \ + config.cpp +HEADERS += guipalabras.h \ + contapalavras.h \ + tratatexto.h \ + dialogjanela.h \ + tratatexto.h \ + dialogcrece.h \ + config.h +FORMS += guipalabras.ui \ + dialogjanela.ui \ + dialogcrece.ui \ + config.ui + +DISTFILES += \ + analyzer.py + + + diff --git a/palabras.pro.user b/palabras.pro.user new file mode 100755 index 0000000..6fa27e3 --- /dev/null +++ b/palabras.pro.user @@ -0,0 +1,336 @@ + + + + + + EnvironmentId + {c3d3a9ab-44b1-402b-b49b-8c4de2988f2b} + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + 80 + true + true + 1 + true + false + 0 + true + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.10.1 GCC 64bit + Desktop Qt 5.10.1 GCC 64bit + qt.qt5.5101.gcc_64_kit + 0 + 0 + 0 + + /home/lucas/Desktop/IC/QT projeto/build-palabras-Desktop_Qt_5_10_1_GCC_64bit-Debug + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + true + + + /home/lucas/Desktop/IC/QT projeto/build-palabras-Desktop_Qt_5_10_1_GCC_64bit-Release + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + + /home/lucas/Desktop/IC/QT projeto/build-palabras-Desktop_Qt_5_10_1_GCC_64bit-Profile + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + true + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Profile + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + 3 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + false + false + 1000 + + true + + false + false + false + false + true + 0.01 + 10 + true + 1 + 25 + + 1 + true + false + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + 2 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:/home/lucas/Desktop/IC/QT projeto/palablasSilvia/palabras.pro + true + + palabras.pro + false + + /home/lucas/Desktop/IC/QT projeto/build-palabras-Desktop_Qt_5_10_1_GCC_64bit-Debug + 3768 + false + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 18 + + + Version + 18 + + diff --git a/palabras.pro.user.2.1pre1 b/palabras.pro.user.2.1pre1 new file mode 100755 index 0000000..c1b2c50 --- /dev/null +++ b/palabras.pro.user.2.1pre1 @@ -0,0 +1,113 @@ + + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + System + + + + ProjectExplorer.Project.Target.0 + + Desktop + Qt4ProjectManager.Target.DesktopTarget + 0 + 0 + + + qmake + QtProjectManager.QMakeBuildStep + + + + Make + Qt4ProjectManager.MakeStep + false + + + + 2 + + Make + Qt4ProjectManager.MakeStep + true + + clean + + + + 1 + false + + Debug + Qt4ProjectManager.Qt4BuildConfiguration + 2 + /home/scaldeira/Escritorio/palabras-build-desktop + 2 + 0 + true + + + + qmake + QtProjectManager.QMakeBuildStep + + + + Make + Qt4ProjectManager.MakeStep + false + + + + 2 + + Make + Qt4ProjectManager.MakeStep + true + + clean + + + + 1 + false + + Release + Qt4ProjectManager.Qt4BuildConfiguration + 0 + /home/scaldeira/Escritorio/palabras-build-desktop + 2 + 0 + true + + 2 + + palabras + Qt4ProjectManager.Qt4RunConfiguration + 2 + + palabras.pro + false + false + + false + false + + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 4 + + diff --git a/palabras.pro.user.2.7pre1 b/palabras.pro.user.2.7pre1 new file mode 100755 index 0000000..0e9be3c --- /dev/null +++ b/palabras.pro.user.2.7pre1 @@ -0,0 +1,243 @@ + + + + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + true + 1 + true + 0 + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.0.2 MinGW 32bit + Desktop Qt 5.0.2 MinGW 32bit + qt.502.win32_mingw47.essentials_kit + 0 + 0 + 0 + + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + true + + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + C:/Users/JoséGarcia/Documents/Programas/QT/PalabrasSilvia/debug + true + + + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + true + + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + C:/Users/JoséGarcia/Documents/Programas/QT/PalabrasSilvia/release + true + + 2 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + true + + false + false + false + false + true + 0.01 + 10 + true + 25 + + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:C:/Users/JoséGarcia/Documents/My Box Files/QT/palablasSilvia/palabras.pro + 2 + + palabras.pro + false + false + + + 3768 + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.EnvironmentId + {8ac8d706-7235-4bcb-9480-b125e4bca84c} + + + ProjectExplorer.Project.Updater.FileVersion + 12 + + diff --git a/palabras.pro.user.3.0-pre1 b/palabras.pro.user.3.0-pre1 new file mode 100755 index 0000000..76e2f84 --- /dev/null +++ b/palabras.pro.user.3.0-pre1 @@ -0,0 +1,245 @@ + + + + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + true + 1 + true + 0 + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.1.1 MinGW 32bit + Desktop Qt 5.1.1 MinGW 32bit + qt.511.win32_mingw48.essentials_kit + 0 + 0 + 0 + + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + true + + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + C:/Users/JoséGarcia/Documents/Programas/QT/PalabrasSilvia + true + + + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + true + + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + C:/Users/JoséGarcia/Documents/Programas/QT/PalabrasSilvia + true + + 2 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + true + + false + false + false + false + true + 0.01 + 10 + true + 25 + + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + 2 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:C:/Users/JoséGarcia/Documents/My Box Files/QT/palablasSilvia/palabras.pro + + palabras.pro + false + false + + 3768 + true + false + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.EnvironmentId + {8ac8d706-7235-4bcb-9480-b125e4bca84c} + + + ProjectExplorer.Project.Updater.FileVersion + 14 + + diff --git a/palabras.pro.user.35f2e92 b/palabras.pro.user.35f2e92 new file mode 100755 index 0000000..40da7d2 --- /dev/null +++ b/palabras.pro.user.35f2e92 @@ -0,0 +1,318 @@ + + + + + + EnvironmentId + {35f2e920-c6c4-43b7-af26-efdc52411479} + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + 80 + true + true + 1 + true + false + 0 + true + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.8.0 MinGW 32bit + Desktop Qt 5.8.0 MinGW 32bit + qt.58.win32_mingw53_kit + 0 + 0 + 0 + + C:/Users/vivas/Google Drive/QT/build-palabras-Desktop_Qt_5_8_0_MinGW_32bit-Debug + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + true + + + C:/Users/vivas/Google Drive/QT/build-palabras-Desktop_Qt_5_8_0_MinGW_32bit-Release + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + + C:/Users/vivas/Google Drive/QT/build-palabras-Desktop_Qt_5_8_0_MinGW_32bit-Profile + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + true + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Profile + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + 3 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + false + false + 1000 + + true + + false + false + false + false + true + 0.01 + 10 + true + 1 + 25 + + 1 + true + false + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + 2 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:C:/Users/vivas/Google Drive/QT/palablasSilvia/palabras.pro + true + + palabras.pro + false + + C:/Users/vivas/Google Drive/QT/build-palabras-Desktop_Qt_5_8_0_MinGW_32bit-Debug + 3768 + false + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 18 + + + Version + 18 + + diff --git a/palabras.pro.user.4b39ca2 b/palabras.pro.user.4b39ca2 new file mode 100755 index 0000000..16c36c2 --- /dev/null +++ b/palabras.pro.user.4b39ca2 @@ -0,0 +1,336 @@ + + + + + + EnvironmentId + {4b39ca23-9a9c-4cbb-856f-0a9e7c1918a4} + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + 80 + true + true + 1 + true + false + 0 + true + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.9.0 GCC 64bit + Desktop Qt 5.9.0 GCC 64bit + qt.59.gcc_64_kit + 1 + 0 + 0 + + /home/lucas/Desktop/IC/Softwares/build-palabras-Desktop_Qt_5_9_0_GCC_64bit-Debug + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + true + + + /home/lucas/Desktop/IC/Softwares/build-palabras-Desktop_Qt_5_9_0_GCC_64bit-Release + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + + /home/lucas/Desktop/IC/Softwares/build-palabras-Desktop_Qt_5_9_0_GCC_64bit-Profile + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + true + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + -DCMAKE_BUILD_TYPE=Debug + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Profile + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + 3 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + false + false + 1000 + + true + + false + false + false + false + true + 0.01 + 10 + true + 1 + 25 + + 1 + true + false + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + 2 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:/home/lucas/Desktop/IC/Softwares/palablasSilvia/palabras.pro + true + + palabras.pro + false + + /home/lucas/Desktop/IC/Softwares/build-palabras-Desktop_Qt_5_9_0_GCC_64bit-Release + 3768 + false + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 18 + + + Version + 18 + + diff --git a/tratatexto.cpp b/tratatexto.cpp new file mode 100755 index 0000000..4a84400 --- /dev/null +++ b/tratatexto.cpp @@ -0,0 +1,1539 @@ +#include "tratatexto.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +trataTexto::trataTexto() +{ + QString tipoPunct = "Fp,Fs,Fd,Fx,Fg,Fit,Fat"; + QStringList specialPunct = tipoPunct.split(","); + for (int i=0; ipontuacao.insert(pair(specialPunct[i].toStdString(),0)); + carregaFiltros(); +} + +bool trataTexto::abreArquivo(string nome) +{ + + ifstream arquivo; + string token; + QStringList list1; + int nPal=0; + arquivo.open(nome.c_str(), ios::in); + + this->normal.clear(); + this->canonica.clear(); + list1.clear(); + while (!arquivo.eof()) + { + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); + string codigo2 = list1[1].toLower().toStdString(); + if (list1[2][0] == 'F') + continue; + + this->normal.insert(pair(codigo1,normal[codigo1]+1)); + normal[codigo1]=normal[codigo1]+1; + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + canonica[codigo2]=canonica[codigo2]+1; + nPal++; + + } + + arquivo.close(); + this->palDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + return true; +} + + +bool trataTexto::abreArquivoTAG(string nome, QStringList tags) +{ + ifstream arquivo; + string token; + QStringList list1; + int nPalFuncional=0; + int nPalContenido=0; + arquivo.open(nome.c_str(), ios::in); + this->vocabulario.clear(); + + while (!arquivo.eof()) // && (nPalFuncional + nPalContenido <= 4000)) + { + int i=0; + getline(arquivo, token); + + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo = list1[2].toStdString(); + if (list1[2][0] == 'F') + continue; + string codigo2 = list1[1].toLower().toStdString(); + + for (i=0;ivocabulario.insert(pair(codigo2,vocabulario[codigo2]+1)); + vocabulario[codigo2]=vocabulario[codigo2]+1; + nPalContenido++; + break; + } + } + + if (i==tags.size()) + nPalFuncional++; + } + arquivo.close(); + this->palVocabularioT = this->vocabulario.size(); + this->palFuncionalT=nPalFuncional; + this->palContenidoT=nPalContenido; + return true; +} + + + +bool trataTexto::abreArquivoCRE(string nome, QStringList tags,int passo, string arqCre) +{ + ifstream arquivo; + ofstream arquivoCre,arqResum; + int totCanAn=0; + int totNorAn=0; + map::iterator it; + string token; + vector tagContNov; + vector tagCont; + + QStringList list1; + int tamanho=passo; + int nPal=0; + if(passo==0) tamanho=99999999; // garante que apenas o ultimo valor é impresso. + + + arquivo.open(nome.c_str(), ios::in); + + arquivoCre.open(arqCre.c_str(),ios::out); + arquivoCre << "Tamanho\tWDN\tWDC\tNPN\tNPC\t"; + for(int i=0;inormal.clear(); + this->canonica.clear(); + list1.clear(); + + tagContNov.assign(tags.size(),0); + tagCont.assign(tags.size(),0); + + while (!arquivo.eof()) + { + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); // nomal + string codigo2 = list1[1].toLower().toStdString(); // canonica + string codigo3 = list1[2].toStdString(); // tag + if (list1[2][0] == 'F') + continue; + + this->normal.insert(pair(codigo1,normal[codigo1]+1)); + normal[codigo1]=normal[codigo1]+1; + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + + canonica[codigo2]=canonica[codigo2]+1; + for (int i=0;i=tamanho) + { + arquivoCre << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + totNorAn=normal.size(); + totCanAn=canonica.size(); + for(unsigned int i=0;i=55000) // resumo a 55000 palavras + { + arqResum.open("resum55k.dat",ios::app); + arqResum << arqCre << "\t" << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + for(unsigned int i=0;inormal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + totNorAn=normal.size(); + totCanAn=canonica.size(); + for(unsigned int i=0;ipalDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + arqResum.open("resum.dat",ios::app); + arqResum << arqCre << "\t" << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + for(unsigned int i=0;i::iterator it; + string token; + vector tagContNov; + vector tagCont; + + QStringList list1; + int tamanho=passo; + int nPal=0; + + arquivo.open(nome.c_str(), ios::in); + + arquivoCre.open(arqCre.c_str(),ios::out); + arquivoCre << "Tamanho\tWDN\tWDC\tNPN\tNPC\t"; + for(int i=0;inormal.clear(); + this->canonica.clear(); + list1.clear(); + if(passo==0) tamanho=99999999; // garante que apenas o ultimo valor é impresso. + + tagCont.assign(tags.size(),0); + + while (!arquivo.eof()) + { + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); // nomal + string codigo2 = list1[1].toLower().toStdString(); // canonica + string codigo3 = list1[2].toStdString(); // tag + if (list1[2][0] == 'F') + continue; + + this->normal.insert(pair(codigo1,normal[codigo1]+1)); + normal[codigo1]=normal[codigo1]+1; + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + + canonica[codigo2]=canonica[codigo2]+1; + for (int i=0;i=tamanho) + { + arquivoCre << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + totNorAn=normal.size(); + totCanAn=canonica.size(); + for(unsigned int i=0;i=55000) // resumo a 55000 palavras + { + arqResum.open("resumPal55k.dat",ios::app); + arqResum << arqCre<< "\t" << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + for(unsigned int i=0;inormal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + totNorAn=normal.size(); + totCanAn=canonica.size(); + for(unsigned int i=0;ipalDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + arqResum.open("resumPal.dat",ios::app); + arqResum << arqCre<< "\t" << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + for(unsigned int i=0;i::iterator it; + string token; + vector tagCont; + + QStringList list1; + int tamanho=passo; + int nPal=0; + + if(passo==0) tamanho=99999999; // garante que apenas o ultimo valor é impresso. + + arquivo.open(nome.c_str(), ios::in); + + arquivoCre.open(arqCre.c_str(),ios::out); + arquivoCre << "Tamanho\tNum_Sent\tWPS\tDWPC\tPPS\t"; + for(int i=0;inormal.clear(); + this->canonica.clear(); + list1.clear(); + QStringList endSentences,endPauses; + endSentences << "Fat" << "Fp" << "Fs" << "Fit"; + endPauses << "Fat" << "Fd" << "Fp" << "Fs" << "Fx" <<"Fit" << "Fg" <<"Fe" <<"Fc"; + + tagCont.assign(tags.size(),0); + + while (!arquivo.eof()) + { + bool fS,fP; + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); // nomal + string codigo2 = list1[1].toLower().toStdString(); // canonica + string codigo3 = list1[2].toStdString(); // tag + + fS=endSentences.contains(list1[2],Qt::CaseInsensitive); + fP=endPauses.contains(list1[2],Qt::CaseInsensitive); + + if (fS|| fP) + { + if(fS)nSent++; + if(fP)nPaus++; + nDWPS+=this->canonica.size(); + this->canonica.clear(); + this->normal.clear(); + if(nPal>=tamanho) + { + arquivoCre << nPal <<"\t" << nSent << "\t" << + (double)nPal/nSent<<"\t"<< + (double)nDWPS/nSent<<"\t" << + (double) nPal/nPaus<<"\t"; + for(unsigned int i=0;i=55000) // resumo a 50000 palavras + { + arqResum.open("resumSentences55k.dat",ios::app); + arqResum << arqCre << "\t" << nSent <<"\t" << (double)nPal/nSent << "\t" << + (double)nDWPS/nSent<<"\t" << (double) nPal/nPaus<<"\t"; + for(unsigned int i=0;inormal.insert(pair(codigo1,normal[codigo1]+1)); + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + + for (int i=0;ipalDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + arqResum.open("resumSentences.dat",ios::app); + arqResum << arqCre << "\t" << nSent <<"\t" << (double)nPal/nSent << "\t" << + (double)nDWPS/nSent<<"\t"<<(double) nPal/nPaus<<"\t"; + for(unsigned int i=0;i::iterator it; + string token; + vector tagCont; + + QStringList list1; + int tamanho=passo; + int nPal=0; + + if(passo==0) tamanho=99999999; // garante que apenas o ultimo valor é impresso. + + arquivo.open(nome.c_str(), ios::in); + + arquivoCre.open(arqCre.c_str(),ios::out); + arquivoCre << "Tamanho\tNum_Sent\tWPS\tDWPC\t"; + for(int i=0;inormal.clear(); + this->canonica.clear(); + list1.clear(); + + tagCont.assign(tags.size(),0); + + while (!arquivo.eof()) + { + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); // nomal + string codigo2 = list1[1].toLower().toStdString(); // canonica + string codigo3 = list1[2].toStdString(); // tag + if (list1[2][0] == 'F') + { + nSent++; + nDWPS+=this->canonica.size(); + this->canonica.clear(); + this->normal.clear(); + if(nPal>=tamanho) + { + arquivoCre << nPal <<"\t" << nSent << "\t" << + (double)nPal/nSent<<"\t"<< + (double)nDWPS/nSent<<"\t"; + for(unsigned int i=0;i=55000) // resumo a 50000 palavras + { + arqResum.open("resumUtt55k.dat",ios::app); + arqResum << arqCre << "\t" << nSent <<"\t" << (double)nPal/nSent << "\t" << + (double)nDWPS/nSent<<"\t"; + for(unsigned int i=0;inormal.insert(pair(codigo1,normal[codigo1]+1)); + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + + for (int i=0;ipalDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + arqResum.open("resumUtt.dat",ios::app); + arqResum << arqCre << "\t" << nSent <<"\t" << (double)nPal/nSent << "\t" << + (double)nDWPS/nSent<<"\t"; + for(unsigned int i=0;ifiltroGram.push_back(QString(maxLinha.c_str())); + } + + while(!arqPl.eof()) + { + getline(arqPl,maxLinha); + this->filtroPala.push_back(QString(maxLinha.c_str())); + } + arqGr.close(); + arqPl.close(); +} + +bool trataTexto::filtro(QStringList token) +{ + bool flagGr,flagPl; + flagGr=flagPl=false; + for(int i=0;ifiltroGram.size();i++) + { + int tam=this->filtroGram[i].size(); + if(tam==0) continue; + if(this->filtroGram[i].toUpper()==token[2].left(tam).toUpper()) + { + flagGr=true; + break; + } + } + for(int i=0;ifiltroPala.size();i++) + { + int tam=this->filtroPala[i].size(); + if(tam==0) continue; + if(this->filtroPala[i].toUpper()==token[1].toUpper()) + { + flagPl=true; + break; + } + } + return flagGr||flagPl; +} + + + +bool trataTexto::abreArquivoFRQ(string nome) +{ + ifstream arquivo; + string token; + QStringList list1; + list frequencia; + list ::iterator it; + arquivo.open(nome.c_str(), ios::in); + + getline(arquivo, token); + + while (!arquivo.eof()) + { + getline(arquivo, token); + + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split("\t"); + if (list1.size() < 2) + return false; + int freq = list1[1].toInt(); + frequencia.push_back(freq); + } + arquivo.close(); + + frequencia.sort(); + int soma=0; + int somaFreq=0; + int i=0; + for (it=frequencia.begin();it!=frequencia.end();++it) + { + soma+= (*it*(frequencia.size()-i)); + somaFreq+= *it; + i++; + + } + + this->zipfpromedio=(double)soma/somaFreq; + + return true; +} + + +bool trataTexto::abreArquivoTAGFFR(string nome, QStringList tags, string arqFfr) +{ + ifstream arqEntrada; + ofstream arqSaida; + string token; + QStringList list1; + vector totPalavras; + vector palContenido; + vector tamFrase; + vector qtdFrases; + arqEntrada.open(nome.c_str(), ios::in); + arqSaida.open(arqFfr.c_str(), ios::out); + + totPalavras.push_back(0); + palContenido.push_back(0); + int maxTamSent=0; + int is=0; + + while (!arqEntrada.eof()) + { + int i=0; + getline(arqEntrada, token); + + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo = list1[2].toStdString(); + if (!list1[2].compare("Fat") || !list1[2].compare("Fd") || !list1[2].compare("Fit") + || !list1[2].compare("Fp") || !list1[2].compare("Fs") || !list1[2].compare("Fx")) + { + totPalavras.push_back(0); + palContenido.push_back(0); + maxTamSent = (totPalavras[is]>maxTamSent)?totPalavras[is]:maxTamSent; + is++; + continue; + } + + if (list1[2][0] == 'F' && list1[2].compare("Fw")) + continue; + + totPalavras[is]++; + + for (i=0;i \"" + baseName + ".tag\""); + system(comando2.toStdString().c_str()); + } + + QString comando("analyze -f " + arqConfig + ".cfg tagged <\"" + + baseName + ".pre\"> \"" + baseName + ".tag\""); + system(comando.toStdString().c_str()); +} + + +bool trataTexto::redePalavras (string infArq, int janela) +{ + + + vector janPalNormal, janPalCanonica; + QStringList list1; + string nomeArquivo = infArq + ".tag"; + QFile arquivo(nomeArquivo.c_str()); + arquivo.open(QIODevice::ReadOnly); + QTextStream in(&arquivo); + in.setCodec("UTF-8"); + + + this->normal.clear(); + this->canonica.clear(); + this->normalArestas.clear(); + this->canonicaArestas.clear(); + + list1.clear(); + + for (int i=0;inormal.insert(pair(codigo1,normal.size())); + this->canonica.insert(pair(codigo2,canonica.size())); + + janPalNormal.push_back(codigo1); + janPalCanonica.push_back(codigo2); + } + + while(!in.atEnd()) + { + for (unsigned i=0;inormalArestas.insert(pair(codigo1,normalArestas[codigo1]++)); + this->normalArestas.insert(pair(codigo3,normalArestas[codigo3]++)); + this->canonicaArestas.insert(pair(codigo2,canonicaArestas[codigo2]++)); + this->canonicaArestas.insert(pair(codigo4,canonicaArestas[codigo4]++)); + } + } + janPalNormal.erase(janPalNormal.begin()); + janPalCanonica.erase(janPalCanonica.begin()); + + LER: + QString qtoken = in.readLine(1000); + if(qtoken.size() == 0) + { + if (in.atEnd()) continue; + else goto LER; + } + list1 = qtoken.split(" "); + + //QString qtoken(token.c_str()); + //list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + + if(filtro(list1)) // desconsidera as palavras e formas canonicas preselecionadas. + { + if (in.atEnd()) continue; + else goto LER; + } + if (list1[2][0] == 'F') + { + if (in.atEnd()) continue; + else goto LER; + } + string codigo1 = list1[0].toLower().toStdString(); + string codigo2 = list1[1].toLower().toStdString(); + janPalNormal.push_back(codigo1); + janPalCanonica.push_back(codigo2); + this->normal.insert(pair(codigo1,normal.size())); + this->canonica.insert(pair(codigo2,canonica.size())); + } + arquivo.close(); + + this->imprimeNet(infArq); + + return true; + +} + +string trataTexto:: GetStdoutFromCommand(string cmd) { + + string data; + FILE * stream; + const int max_buffer = 256; + char buffer[max_buffer]; + cmd.append(" 2>&1"); + + stream = popen(cmd.c_str(), "r"); + if (stream) { + while (!feof(stream)) + if (fgets(buffer, max_buffer, stream) != NULL) data.append(buffer); + pclose(stream); +} +return data; +} + + +void trataTexto::lowerWord (QString nomeArqE, QString nomeArqS) +{ + //fstream arquivoEntrada; + //fstream arquivoSaida; + //arquivoEntrada.open(nomeArqE); + //arquivoSaida.open(nomeArqS+".pre"); + QFile arqEntrada(nomeArqE); + QFile arqSaida(nomeArqS + ".pre"); + char maxLinha[50]; + arqEntrada.open(QIODevice::ReadOnly); + arqSaida.open(QIODevice::WriteOnly); + while(arqEntrada.readLine( maxLinha,sizeof(maxLinha))>0) + { + int cont=0; + string aux; + while(maxLinha[cont]!='\0'){ + if(maxLinha[cont]=='\0'){ + break; + } + if((maxLinha[cont]>='A'&& maxLinha[cont]<='Z')){ + maxLinha[cont]=maxLinha[cont]+32; + } + /* switch (maxLinha[cont]) { + case 'É':maxLinha[cont]='é'; + break; + case 'Ó': maxLinha[cont]='ó'; + break; + case 'Ç': maxLinha[cont]='ç'; + break; + case 'Á': maxLinha[cont]='á'; + break; + case 'À': maxLinha[cont]='à'; + break; + case 'Ô': maxLinha[cont]='ô'; + break; + case 'Ê': maxLinha[cont]='ê'; + break; + case 'Í': maxLinha[cont]='í'; + break; + case 'Ã': maxLinha[cont]='ã'; + break; + case 'Õ': maxLinha[cont]='õ'; + break; + case 'Ẽ': maxLinha[cont]='ẽ'; + default: + break; + }*/ + cont++; + } + + QTextStream outStream(&arqSaida); + outStream<0) + { + QString qlinha = maxLinha; + for(unsigned i=0;iit = this->canonica.begin(); this->it != this->canonica.end(); this->it++) + { + qlinha = QString::number(i)+" \""+QString(this->it->first.c_str())+" \"\n"; + qlinha=qlinha.toLower(); + qlinha=qlinha.toLocal8Bit(); + //arqSaidaCan.write(qlinha.toStdString().c_str()); + this->it->second = i++; //Neste ponto aqui ele atribui a canonica.second o valor de i++ + } + i = 1; + + for (this->it = this->normal.begin(); this->it != this->normal.end(); this->it++) + { + qlinha = QString::number(i)+" \""+QString(this->it->first.c_str())+" \"\n"; + qlinha=qlinha.toLower(); + qlinha=qlinha.toLocal8Bit(); + // arqSaidaNormal.write(qlinha.toStdString().c_str()); + this->it->second = i++; //atribui a normal.second o valor de i++ + } + + //imprime "*edges" nos dois arquivos para simbolizar que a partir dali começa a parte das arestas + // arqSaidaCan.write("*edges\n"); + //arqSaidaNormal.write("*edges\n"); + + //laço responsavel por imprimir as arestas entre os vertices e seus pesos + for (this->it = this->canonicaArestas.begin(); this->it != this->canonicaArestas.end(); this->it++) + { + QString palavra(this->it->first.c_str()); //atribui a variavel palavra o valor de canonicaArestas.first que são pares de palavras em redeSentenças e individuais em rede de Palavras + QStringList lPalavra = palavra.split(" ");//Caso sejam pares de palavras ele divide ela quando achar um espaço + QString pal1=QString::number(this->canonica[lPalavra[0].toStdString()]);//pega o indice(rotulo do vertice) da primeira palavra + QString pal2=QString::number(this->canonica[lPalavra[1].toStdString()]);//pega o indice(rotulo do vertice) da segunda palavra + // QString qlinha = pal1 + " " + pal2 + " " + + QString qlinha =lPalavra[0]+" "+lPalavra[1]+ " " + + QString::number(this->it->second)+ "\n"; + arqSaidaCan.write(qlinha.toStdString().c_str());//imprime no arquivo o formato "tagVetice1 tagVertice2 Peso" + } +//Neste laço o mesmo processo é refeito, no entanto com as palavras normais + for (this->it = this->normalArestas.begin(); this->it != this->normalArestas.end(); this->it++) + { + QString palavra(this->it->first.c_str()); + QStringList lPalavra = palavra.split(" "); + QString pal1=QString::number(this->normal[lPalavra[0].toStdString()]); + QString pal2=QString::number(this->normal[lPalavra[1].toStdString()]); + // QString qlinha = pal1 + " " + pal2 + " " + + QString qlinha= lPalavra[0]+" "+ lPalavra[1]+" "+ + QString::number(this->it->second)+ "\n"; + arqSaidaNormal.write(qlinha.toStdString().c_str()); + } + arqSaidaCan.close(); + arqSaidaNormal.close(); +} + +bool trataTexto::redeSentencas (string infArq) +{ + string token;//Não foi utilizado para nada + vector sentPalNormal, sentPalCanonica;//guarda as palavras da sentença + string nomeArquivo = infArq + ".tag"; + QFile arquivo(nomeArquivo.c_str()); + arquivo.open(QIODevice::ReadOnly); + QTextStream in(&arquivo); + in.setCodec("UTF-8"); + //in.setCodec("LATIN"); + + + map > grafo;//Armazena em cada substantivo a sua fila de sentencas + queue substantivos; + set subConj; + + + this->normal.clear(); + this->canonica.clear(); + this->normalArestas.clear(); + this->canonicaArestas.clear(); + this->redeSentencasImpressaoCanonica.clear(); + this->redeSentencasImpressaoNormal.clear(); + + int quantidadeDeVertices = 0; + while(!in.atEnd()) + { + sentPalNormal.clear(); + sentPalCanonica.clear(); + + string sentenca; + int quantidadeDeSubstantivos = 0; + bool tem_substantivo = false; // cond + bool terminou = false; + + + while(!in.atEnd()) + { + QString linha = in.readLine(1000); + if(linha.isEmpty()) + continue; + + QStringList tokens = linha.split(" "); + if (tokens.size() < 4) + return false; + + if(filtro(tokens)) // desconsidera as palavras e formas canonicas preselecionadas. + continue; + + string tag = tokens[2].toStdString(); + string palavra = tokens[0].toLower().toStdString(); + string canonica = tokens[1].toLower().toStdString(); + if(tag[0]!='F' && !filtro(tokens)){ + if(sentenca.size()>0) + sentenca = sentenca + "_" + canonica; + else + sentenca=sentenca+canonica; + + } + if (/*eFimSentenca(tokens[2]) ||*/ tag[0] == 'F' && (tag[1]=='a' ||(tag[1]=='i')|| tag[1]=='p'||tag[1]=='s')) + { + terminou = true; + + if (tem_substantivo) + { + quantidadeDeVertices = quantidadeDeVertices+1; + for (int r = 0; r < quantidadeDeSubstantivos; ++r) + { + grafo[substantivos.front()].push(sentenca); + substantivos.pop(); + } + } + break; + + } + else + { + if (tag[0]=='N'){ + tem_substantivo=true; + ++quantidadeDeSubstantivos; + //aux.push(primeiraPalavraDaLinha); + subConj.insert(canonica); + substantivos.push(canonica); + + } + } + } + } + + arquivo.close(); + + nomeArquivo = infArq + ".redeSent.csv"; + QFile arquivoNet(nomeArquivo.c_str()); + + if (!arquivoNet.open(QIODevice::WriteOnly)) + cerr << "erro ao abrir!\n"; + + + + + int contar=0,anteriorCond=0; + hash calcula_hash; + + //QString qlinha = "*vertices " + QString::number(quantidadeDeVertices) + "\n"; + // arquivoNet.write(qlinha.toUtf8()); + + int quantidadeDePares=0; + //queue > arestas; //Armazena a lista com os pares de arestas para liga-los no grafo + queue > arestas; + for(set::iterator it= subConj.begin();it!=subConj.end();it++){ + ++contar; + bool primeiro = true; + //size_t anterior = 0; + string sentAnterior; + while (!grafo[*it].empty()) + { + size_t id = calcula_hash(grafo[*it].front()); + //arquivoNet.write(QString::number(id).toUtf8()); + //arquivoNet.write(" \""); + //arquivoNet.write(grafo[*it].front().c_str()); + // arquivoNet.write("\""); + + if(anteriorCond==contar-1 && !primeiro) + { + ++quantidadeDePares; + arestas.push(make_pair(sentAnterior,grafo[*it].front())); + } + //arquivoNet.write("\n"); + anteriorCond=contar; + // anterior=id; + string zera; + sentAnterior=zera; + sentAnterior=sentAnterior+grafo[*it].front(); + if (!grafo[*it].empty()) + grafo[*it].pop(); + primeiro=false; + ++contar; + } + } + + arquivoNet.write("source target\n"); + while(!arestas.empty()){ + //pair front = arestas.front(); + arquivoNet.write((arestas.front().first).c_str()); + arquivoNet.write(" "); + arquivoNet.write((arestas.front().second).c_str()); + arquivoNet.write("\n"); + //cout<pontuacao.count(classe.toStdString()) == 1; +} + +void trataTexto::frequenciaPalavra(QString nomeArq) +{ + map::iterator it; + ofstream arquivo; + arquivo.open(nomeArq.toStdString().c_str(), ios::out); + + arquivo <<"Palavras" <<"Frequencia" <canonica.begin();it!=this->canonica.end();it++) + { + arquivo <<(*it).first <<"\t" <<(*it).second < sentPalNormal, sentPalCanonica;//guarda as palavras da sentença + string nomeArquivo = infArq + ".tag"; + QFile arquivo(nomeArquivo.c_str()); + arquivo.open(QIODevice::ReadOnly); + QTextStream in(&arquivo); + in.setCodec("UTF-8"); + //in.setCodec("LATIN"); + + + map > grafo;//Armazena em cada substantivo a sua fila de sentencas + queue substantivos; + set subConj; + + + this->normal.clear(); + this->canonica.clear(); + this->normalArestas.clear(); + this->canonicaArestas.clear(); + this->redeSentencasImpressaoCanonica.clear(); + this->redeSentencasImpressaoNormal.clear(); + + int quantidadeDeVertices = 0; + while(!in.atEnd()) + { + sentPalNormal.clear(); + sentPalCanonica.clear(); + + string sentenca; + int quantidadeDeSubstantivos = 0; + bool tem_substantivo = false; // cond + bool terminou = false; + + + while(!in.atEnd()) + { + QString linha = in.readLine(1000); + if(linha.isEmpty()) + continue; + + QStringList tokens = linha.split(" "); + if (tokens.size() < 4) + return false; + + if(filtro(tokens)) // desconsidera as palavras e formas canonicas preselecionadas. + continue; + + string tag = tokens[2].toStdString(); + string palavra = tokens[0].toLower().toStdString(); + string canonica = tokens[1].toLower().toStdString(); + if(tag[0]!='F' && !filtro(tokens)){ + if(sentenca.size()>0){ + sentenca = sentenca + "_" + canonica; + cout< calcula_hash; + + //QString qlinha = "*vertices " + QString::number(quantidadeDeVertices) + "\n"; + // arquivoNet.write(qlinha.toUtf8()); + + int quantidadeDePares=0; + //queue > arestas; //Armazena a lista com os pares de arestas para liga-los no grafo + queue > arestas; + for(set::iterator it= subConj.begin();it!=subConj.end();it++){ + ++contar; + bool primeiro = true; + //size_t anterior = 0; + string sentAnterior; + while (!grafo[*it].empty()) + { + size_t id = calcula_hash(grafo[*it].front()); + //arquivoNet.write(QString::number(id).toUtf8()); + //arquivoNet.write(" \""); + //arquivoNet.write(grafo[*it].front().c_str()); + // arquivoNet.write("\""); + + if(anteriorCond==contar-1 && !primeiro) + { + ++quantidadeDePares; + arestas.push(make_pair(sentAnterior,grafo[*it].front())); + } + //arquivoNet.write("\n"); + anteriorCond=contar; + // anterior=id; + string zera; + sentAnterior=zera; + sentAnterior=sentAnterior+grafo[*it].front(); + if (!grafo[*it].empty()) + grafo[*it].pop(); + primeiro=false; + ++contar; + } + } + + arquivoNet.write("source target\n"); + while(!arestas.empty()){ + //pair front = arestas.front(); + arquivoNet.write((arestas.front().first).c_str()); + arquivoNet.write(" "); + arquivoNet.write((arestas.front().second).c_str()); + arquivoNet.write("\n"); + //cout<redeSentencasImpressaoCanonica.size())+ "\n"; + arqSaidaCan.write(qlinha.toStdString().c_str()); + qlinha="*vertices " + QString::number(this->redeSentencasImpressaoNormal.size())+ "\n"; + arqSaidaNormal.write(qlinha.toStdString().c_str()); + //aqui ele esta imprimindo o numero da palavra e a palavra + i = 1; + for (this->it = this->redeSentencasImpressaoCanonica.begin(); this->it != this->redeSentencasImpressaoCanonica.end(); this->it++) + { + qlinha = QString::number(i)+" \""+QString(this->it->first.c_str())+" \"\n"; + qlinha=qlinha.toLower(); + qlinha=qlinha.toLocal8Bit(); + arqSaidaCan.write(qlinha.toStdString().c_str()); + this->it->second = i++; //Neste ponto aqui ele atribui a canonica.second o valor de i++ + } + i = 1; + + for (this->it = this->redeSentencasImpressaoNormal.begin(); this->it != this->redeSentencasImpressaoNormal.end(); this->it++) + { + qlinha = QString::number(i)+" \""+QString(this->it->first.c_str())+" \"\n"; + qlinha=qlinha.toLower(); + qlinha=qlinha.toLocal8Bit(); + arqSaidaNormal.write(qlinha.toStdString().c_str()); + this->it->second = i++; //atribui a normal.second o valor de i++ + } + + //imprime "*edges" nos dois arquivos para simbolizar que a partir dali começa a parte das arestas + arqSaidaCan.write("*edges\n"); + arqSaidaNormal.write("*edges\n"); + + //laço responsavel por imprimir as arestas entre os vertices e seus pesos + for (this->it = this->canonicaArestas.begin(); this->it != this->canonicaArestas.end(); this->it++) + { + QString palavra(this->it->first.c_str()); //atribui a variavel palavra o valor de canonicaArestas.first que são pares de palavras em redeSentenças e individuais em rede de Palavras + QStringList lPalavra = palavra.split(" ");//Caso sejam pares de palavras ele divide ela quando achar um espaço + QString pal1=QString::number(this->redeSentencasImpressaoCanonica[palavra]);//pega o indice(rotulo do vertice) da primeira palavra + QString pal2=QString::number(this->redeSentencasImpressaoCanonica[palavra]);//pega o indice(rotulo do vertice) da segunda palavra + QString qlinha = pal1 + " " + pal2 + " " + + QString::number(this->it->second)+ "\n"; + arqSaidaCan.write(qlinha.toStdString().c_str());//imprime no arquivo o formato "tagVetice1 tagVertice2 Peso" + } + //Neste laço o mesmo processo é refeito, no entanto com as palavras normais + for (this->it = this->normalArestas.begin(); this->it != this->normalArestas.end(); this->it++) + { + QString palavra(this->it->first.c_str()); + QStringList lPalavra = palavra.split(" "); + QString pal1=QString::number(this->normal[lPalavra[0].toStdString()]); + QString pal2=QString::number(this->normal[lPalavra[1].toStdString()]); + QString qlinha = pal1 + " " + pal2 + " " + + QString::number(this->it->second)+ "\n"; + arqSaidaNormal.write(qlinha.toStdString().c_str()); + } + arqSaidaCan.close(); + arqSaidaNormal.close(); + +} +*/ diff --git a/tratatexto.h b/tratatexto.h new file mode 100755 index 0000000..3c01837 --- /dev/null +++ b/tratatexto.h @@ -0,0 +1,69 @@ +#ifndef TRATATEXTO_H +#define TRATATEXTO_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +typedef struct +{ +string palavra1; +string palavra2;// +string classe; +double prob; +} linha; + + +class trataTexto +{ +public: + trataTexto(); + int palTot, palDiffN, palDiffC; + map normal; + map canonica; + map vocabulario; + map normalArestas; + map canonicaArestas; + map::iterator it; + map pontuacao; + mapredeSentencasImpressaoCanonica; + mapredeSentencasImpressaoNormal; + bool abreArquivo(string); + bool abreArquivoTAG(string arquivo, QStringList tags); + bool abreArquivoFRQ(string arquivo); + bool abreArquivoCRE(string nome, QStringList tags,int passo,string arqCre); + bool abreArquivoCREP(string nome, QStringList tags,int passo,string arqCre); + bool abreArquivoCRES(string nome, QStringList tags,int passo,string arqCre); + bool abreArquivoCRED(string nome, QStringList tags,int passo,string arqCre); + void carregaFiltros(); + bool filtro(QStringList token); + + string GetStdoutFromCommand(string cmd); + void imprimeREdeSentenca(string nomeArq); + bool abreArquivoTAGFFR(string arquivo, QStringList tags, string arqFfr); + void preanalyze(QString base, char idioma, char tipoAnalise); + void estatisticaPalavras(QString); + bool redePalavras (string infArq, int janela); + void imprimeNet (string nomeArq); + bool redeSentencas (string infArq); + bool redeFrases (string infArq); + bool eFimSentenca(QString); + void lowerWord (QString nomeArqE, QString nomeArqS); + int palFuncionalT; + int palVocabularioT; + int palContenidoT; + double zipfpromedio; + void frequenciaPalavra (QString nomeArq); + void simpleSubs (QString nomeArqE, QString nomeArqS, QStringList regAlvo, QStringList regSubs); + QStringList filtroGram,filtroPala; +}; + +#endif // TRATATEXTO_H