diff --git a/analyzer.py b/analyzer.py new file mode 100755 index 0000000..de28aab --- /dev/null +++ b/analyzer.py @@ -0,0 +1,34 @@ +import networkx as nx +import sys +def estatisticas(endereco,arq): + arquivo = open(endereco+'/'+arq+'_estatisticas.csv', 'w') + e= endereco+'/'+arq+'Convertido.redeFras.csv' + graph = nx.read_edgelist( + e) + nx.to_directed(graph) + clust_coeficients = nx.clustering(graph) + dict_betweenes = nx.betweenness_centrality(graph) + closeness = nx.closeness_centrality(graph) + eigenvector_centrality = nx.eigenvector_centrality(graph) + centralidade_grau = nx.degree_centrality(graph) + grau = nx.degree(graph) + #modularidade = nx.modularity_matrix(graph) + #centro= nx.center(graph,e=None, usebounds=False) + # diametro= nx.diameter(graph,e=None, usebounds=False) + densidade = nx.density(graph) + print densidade + arquivo.write("No ; Grau ; Betweennes ; Centralidade_Grau ; Centralidade(Closeness) ; Eigenvector_Centrality ; Clustering\n") + for key in dict_betweenes.keys(): + aux = str(dict_betweenes[key]) + aux1= str(grau[key]) + aux2= str(centralidade_grau[key]) + aux3= str(closeness[key]) + aux4= str(eigenvector_centrality[key]) + aux5= str(clust_coeficients[key]) + arquivo.write(key + ' ; '+aux1+' ; '+ aux +' ; '+aux2 +' ; '+aux3+' ; '+aux4+' ; '+aux+ '\n') + arquivo.close() + +ende=str(sys.argv[1]) +arq=str(sys.argv[2]) +estatisticas(ende,arq) + diff --git a/config.cpp b/config.cpp new file mode 100755 index 0000000..9118ad6 --- /dev/null +++ b/config.cpp @@ -0,0 +1,48 @@ +#include "config.h" +#include "ui_config.h" +#include +#include +using namespace std; + +config::config(QWidget *parent) : + QDialog(parent), + ui(new Ui::config) +{ + ui->setupUi(this); + + string maxLinha; + + fstream arqGr,arqPl; + arqGr.open("gramat.ftl",ios_base::in); + if(!arqGr.is_open()) + arqGr.open("gramat.ftl",ios_base::out); + arqPl.open("palav.ftl",ios_base::in); + if(!arqPl.is_open()) + arqPl.open("palav.ftl",ios_base::out); + while(!arqGr.eof()) + { + getline(arqGr,maxLinha); + this->ui->gramat->append(QString(maxLinha.c_str())); + } + + while(!arqPl.eof()) + { + getline(arqPl,maxLinha); + this->ui->palavr->append(QString(maxLinha.c_str())); + } + arqGr.close(); + arqPl.close(); +} + +config::~config() +{ + ofstream arqGr,arqPl; + arqGr.open("gramat.ftl"); + arqGr << this->ui->gramat->toPlainText().toStdString(); + arqGr.close(); + arqPl.open("palav.ftl"); + arqPl << this->ui->palavr->toPlainText().toStdString(); + arqPl.close(); + + delete ui; +} diff --git a/config.h b/config.h new file mode 100755 index 0000000..ae315bb --- /dev/null +++ b/config.h @@ -0,0 +1,22 @@ +#ifndef CONFIG_H +#define CONFIG_H + +#include + +namespace Ui { +class config; +} + +class config : public QDialog +{ + Q_OBJECT + +public: + explicit config(QWidget *parent = 0); + ~config(); + +private: + Ui::config *ui; +}; + +#endif // CONFIG_H diff --git a/config.ui b/config.ui new file mode 100755 index 0000000..263ace4 --- /dev/null +++ b/config.ui @@ -0,0 +1,98 @@ + + + config + + + + 0 + 0 + 400 + 300 + + + + Dialog + + + + + + FILTROS A SEREM ELIMINADOS NA CONSTRUÇÃO DAS REDES + + + + + + + 1 + + + + Classes Gramaticais + + + + + + + + + + Palavras + + + + + + + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + + + + + buttonBox + accepted() + config + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + config + reject() + + + 316 + 260 + + + 286 + 274 + + + + + diff --git a/contapalavras.cpp b/contapalavras.cpp new file mode 100755 index 0000000..1828a75 --- /dev/null +++ b/contapalavras.cpp @@ -0,0 +1,7 @@ +#include "contapalavras.h" +#include + +contaPalavras::contaPalavras() +{ +} + diff --git a/contapalavras.h b/contapalavras.h new file mode 100755 index 0000000..0bef089 --- /dev/null +++ b/contapalavras.h @@ -0,0 +1,10 @@ +#ifndef CONTAPALAVRAS_H +#define CONTAPALAVRAS_H + +class contaPalavras +{ +public: + contaPalavras(); +}; + +#endif // CONTAPALAVRAS_H diff --git a/desktop.ini b/desktop.ini new file mode 100755 index 0000000..13ff7ac --- /dev/null +++ b/desktop.ini @@ -0,0 +1,5 @@ +[.ShellClassInfo] +InfoTip=Esta pasta está compartilhada on-line. +IconFile=C:\Program Files (x86)\Google\Drive\googledrivesync.exe +IconIndex=16 + \ No newline at end of file diff --git a/dialogcrece.cpp b/dialogcrece.cpp new file mode 100755 index 0000000..fd49cd6 --- /dev/null +++ b/dialogcrece.cpp @@ -0,0 +1,23 @@ +#include "dialogcrece.h" +#include "ui_dialogcrece.h" + +DialogCrece::DialogCrece(QWidget *parent) : + QDialog(parent), + ui(new Ui::DialogCrece) +{ + ui->setupUi(this); +} + +DialogCrece::~DialogCrece() +{ + delete ui; +} + +QString DialogCrece::getClasses() +{ + return ui->ClassesLineEdit->text(); +} +int DialogCrece::getPasso() +{ + return this->ui->PassoSpinBox->value(); +} diff --git a/dialogcrece.h b/dialogcrece.h new file mode 100755 index 0000000..736add3 --- /dev/null +++ b/dialogcrece.h @@ -0,0 +1,24 @@ +#ifndef DIALOGCRECE_H +#define DIALOGCRECE_H + +#include + +namespace Ui { + class DialogCrece; +} + +class DialogCrece : public QDialog +{ + Q_OBJECT + +public: + explicit DialogCrece(QWidget *parent = 0); + QString getClasses(); + int getPasso(); + ~DialogCrece(); + +private: + Ui::DialogCrece *ui; +}; + +#endif // DIALOGCRECE_H diff --git a/dialogcrece.ui b/dialogcrece.ui new file mode 100755 index 0000000..8d28fe5 --- /dev/null +++ b/dialogcrece.ui @@ -0,0 +1,113 @@ + + + DialogCrece + + + + 0 + 0 + 611 + 138 + + + + Dialog + + + + + + + + Passo, em quantidade de palavras + + + + + + + 999999999 + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + + + + Classes separadas por vírgula + + + + + + + + + + + + + + Qt::Horizontal + + + QDialogButtonBox::Cancel|QDialogButtonBox::Ok + + + + + + + + + + + buttonBox + accepted() + DialogCrece + accept() + + + 248 + 254 + + + 157 + 274 + + + + + buttonBox + rejected() + DialogCrece + reject() + + + 316 + 260 + + + 286 + 274 + + + + + diff --git a/dialogjanela.cpp b/dialogjanela.cpp new file mode 100755 index 0000000..c7ceffb --- /dev/null +++ b/dialogjanela.cpp @@ -0,0 +1,68 @@ +#include "dialogjanela.h" +#include "ui_dialogjanela.h" + +DialogJanela::DialogJanela(QWidget *parent) : + QDialog(parent), + ui(new Ui::DialogJanela) +{ + ui->setupUi(this); +} + +DialogJanela::~DialogJanela() +{ + delete ui; +} + +void DialogJanela::changeEvent(QEvent *e) +{ + QDialog::changeEvent(e); + switch (e->type()) { + case QEvent::LanguageChange: + ui->retranslateUi(this); + break; + default: + break; + } +} + +char DialogJanela::getIdioma() +{ + if (this->ui->ingles->isChecked()) + return 1; + + if (this->ui->portugues->isChecked()) + return 2; + + return 0; +} + +char DialogJanela::getAnalise() +{ + if (this->ui->normal->isChecked()) + return 1; + + if (this->ui->canonico->isChecked()) + return 2; + + if (this->ui->completo->isChecked()) + return 3; + + return 0; +} + + + +void DialogJanela::on_cancelar_clicked() +{ + this->done(0); +} + +void DialogJanela::on_inicio_clicked() +{ + opIdioma = getIdioma(); + opAnalise = getAnalise(); + if (opIdioma == 0 || opAnalise == 0) + this->done(0); + else + this->done(1); +} diff --git a/dialogjanela.h b/dialogjanela.h new file mode 100755 index 0000000..da03bbe --- /dev/null +++ b/dialogjanela.h @@ -0,0 +1,31 @@ +#ifndef DIALOGJANELA_H +#define DIALOGJANELA_H + +#include + +namespace Ui { + class DialogJanela; +} + +class DialogJanela : public QDialog { + Q_OBJECT +public: + DialogJanela(QWidget *parent = 0); + ~DialogJanela(); + char getIdioma(); + char getAnalise(); + char opIdioma, opAnalise; + + +protected: + void changeEvent(QEvent *e); + +private: + Ui::DialogJanela *ui; + +private slots: + void on_cancelar_clicked(); + void on_inicio_clicked(); +}; + +#endif // DIALOGJANELA_H diff --git a/dialogjanela.ui b/dialogjanela.ui new file mode 100755 index 0000000..9ceb4a4 --- /dev/null +++ b/dialogjanela.ui @@ -0,0 +1,148 @@ + + + DialogJanela + + + + 0 + 0 + 280 + 216 + + + + Dialog + + + + + 0 + 10 + 251 + 80 + + + + Escolha tipo de prétratamento: + + + + false + + + + 10 + 20 + 111 + 17 + + + + Normal + + + + + false + + + + 10 + 40 + 121 + 17 + + + + Canônico + + + true + + + + + + 10 + 60 + 161 + 17 + + + + Completo + + + + + + + 10 + 90 + 120 + 61 + + + + Escolha Idioma: + + + + + 10 + 20 + 91 + 17 + + + + Inglês + + + + + true + + + + 10 + 40 + 111 + 17 + + + + Português + + + + + + + 170 + 180 + 75 + 23 + + + + Cancelar + + + + + + 60 + 180 + 75 + 23 + + + + Iniciar + + + + + + diff --git a/guipalabras.cpp b/guipalabras.cpp new file mode 100755 index 0000000..87e4950 --- /dev/null +++ b/guipalabras.cpp @@ -0,0 +1,740 @@ +#include "guipalabras.h" +#include "ui_guipalabras.h" +#include +#include "tratatexto.h" +#include +#include +#include +#include +#include +#include "dialogcrece.h" +#include "config.h" +#include +using namespace std; + +guiPalabras::guiPalabras(QWidget *parent) : + QMainWindow(parent), + ui(new Ui::guiPalabras) +{ + ui->setupUi(this); + this->ui->menuAnalises->setEnabled(true); + this->ui->labelStatus->setText("Nenhum arquivo aberto"); +} + +guiPalabras::~guiPalabras() +{ + delete ui; +} + +void guiPalabras::changeEvent(QEvent *e) +{ + QMainWindow::changeEvent(e); + switch (e->type()) { + case QEvent::LanguageChange: + ui->retranslateUi(this); + break; + default: + break; + } +} + +void guiPalabras::on_actionAbrir_triggered() +{ + + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) para abrir"), + QString::null, + QString::null); + if(this->listaArquivosEntrada.size()>0) + { + this->ui->labelStatus->clear(); + this->ui->menuAnalises->setEnabled(true); + } + else + { + this->ui->menuAnalises->setEnabled(false); + } +} + +void guiPalabras::on_actionPre_tratamento_triggered() +{ + /* Fiz diversas modificações nesta funçao com a inteção de resolver o problema da codificação de letras latinas + as modificações estão marcadas com comentarios. +*/ + + //Quando a caixa de dialogo abre ele marca 0 caso não seja selecionado nada + int meleca = opEscolha.exec(); + if (meleca == 0) + return; + //Recebem as opçes de escolha de idioma e o tipo de analise + char idioma = opEscolha.opIdioma; + char tipoanalise = opEscolha.opAnalise; + + // + trataTexto arqIndividual; + QString path; + QProgressDialog progress("Tratando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + foreach(path, this->listaArquivosEntrada) + { + progress.setValue(i++); + QFileInfo arqInfo(path); + QString nomePrincipal = arqInfo.path()+"/"+arqInfo.baseName(); + string localArquivo=arqInfo.path().toStdString(); + string nomeArquivo=arqInfo.baseName().toStdString(); + + if(idioma==1){ + system(("cd "+localArquivo +"&& cp "+nomeArquivo+".txt "+nomeArquivo+".txtConvertido.txt").c_str()); + arqIndividual.lowerWord(path+"Convertido.txt", nomePrincipal+"Convertido"); + arqIndividual.preanalyze(nomePrincipal+"Convertido", idioma, tipoanalise); + }else{ + + string tipoDoArquivo= arqIndividual.GetStdoutFromCommand(("cd "+arqInfo.path().toStdString()+" && file -i "+arqInfo.baseName().toStdString()+".txt").c_str());// esta linha recupera o tipo de arquivo de texto está sendo tratado + bool v=false; + QString auxiliar; + string s; + //Este for está buscando apenas a codificação do arquivo, que vem precedida de algumas outras informações inuteis para o nosso interesse + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + arquivoSaida.open("resultados.txt", ios::out); + arquivoSaida << "Arquivo\tWordTot\tWordDiffN\tWordDiffCan\n"; + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqTag = arqInfo.baseName() + ".tag"; + QString arqFrq = arqInfo.baseName() + ".frq"; + if (!arqIndividual.abreArquivo(arqTag.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + arqInfo.baseName()); + msgBox.exec(); + + } + // saving contents: + arqIndividual.frequenciaPalavra(arqFrq); + arquivoSaida << arqTag.toStdString().c_str() << "\t" + << arqIndividual.palTot << "\t" << arqIndividual.palDiffN << "\t" + << arqIndividual.palDiffC << endl; + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } + arquivoSaida.close(); + +} + + +void guiPalabras::on_actionPalavras_2_triggered() +{ + trataTexto arqIndividual; + QString path; + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + if (!arqIndividual.redePalavras((arqInfo.path()+"/"+arqInfo.baseName()+"Convertido").toStdString(), 2)) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + arqInfo.baseName()); + msgBox.exec(); + + } + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } +} + +void guiPalabras::on_actionSentencas_2_triggered() +{ + trataTexto arqIndividual; + QString path; + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + arqIndividual.carregaFiltros(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + + if (!arqIndividual.redeSentencas((arqInfo.path()+"/"+arqInfo.baseName()+"Convertido").toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + arqInfo.baseName()); + msgBox.exec(); + + } + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } +} + +void guiPalabras::on_actionVisualizacao_triggered() +{ + system ("wine ~/.wine/drive_c/pajek/Pajek/PAJEK.exe"); +} + +void guiPalabras::on_actionCalculos_triggered() +{ + QString path; + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + //int i=0; + progress.show(); + string comand/*="netall > resultadosNetAll.txt"*/; + string co2; + //system(comand.c_str()); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString nomeArqSai = arqInfo.baseName(); + QString nomeArqEnt = arqInfo.baseName(); + QString endereco= arqInfo.path(); + cout<> resultadosNetAll.txt";*/ + comand="python analyzer.py "+endereco.toStdString()+ + " "+nomeArqEnt.toStdString(); + co2="nano config.ui"; + system(comand.c_str()); + } +} + +void guiPalabras::on_actionMinusculas_triggered() +{ + trataTexto arqIndividual; + QString path; + QProgressDialog progress("Tratando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + foreach(path, this->listaArquivosEntrada) + { + progress.setValue(i++); + QFileInfo arqInfo(path); + //QString nomePrincipal = arqInfo.baseName(); + + arqIndividual.lowerWord(path, arqInfo.baseName()); + //arqIndividual.preanalyze(nomePrincipal, idioma, tipoanalise); + if (progress.wasCanceled()) + break; + } +} +//Esta função procura no arquivo +void guiPalabras::on_actionFuncionais_triggered() +{ + trataTexto arqIndividual; + ofstream arquivoSaida; + QString path; + QString tipoNFunc = "AQ,R,N,VM,JJ,RB,VB,WRB";//Tinha um FW como Tag + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + arquivoSaida.open("estatisticasfuncionais.txt", ios::out); + arquivoSaida << "Arquivo\tpalContenido\tpalFuncionais\tpalVocabulario\n"; + + foreach(path, this->listaArquivosEntrada) + { + //QFileInfo arqInfo(path); + //QString arqTag = arqInfo.baseName() + ".tag"; + if (!arqIndividual.abreArquivoTAG(path.toStdString(),specialFunc)) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + // saving contents: + arquivoSaida << path.toStdString().c_str() << "\t" + << arqIndividual.palFuncionalT << "\t" << arqIndividual.palContenidoT << + "\t" << arqIndividual.palVocabularioT << endl; + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } + arquivoSaida.close(); + +} + +void guiPalabras::on_actionZipfPromedio_triggered() +{ + trataTexto arqIndividual; + ofstream arquivoSaida; + QString path; + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) FRQ para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + arquivoSaida.open("zipfpromedio.txt", ios::out);// Acrescentei isso arqInfo.path()+"/"+ + arquivoSaida << "Arquivo\tzipfpromedio\n"; + + foreach(path, this->listaArquivosEntrada) + { + if (!arqIndividual.abreArquivoFRQ(path.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + // saving contents: + arquivoSaida << path.toStdString().c_str() << "\t" + << arqIndividual.zipfpromedio << endl; + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } + arquivoSaida.close(); + +} + + +void guiPalabras::on_actionTamSentencas_triggered() +{ + trataTexto arqIndividual; + QString path; + QString tipoNFunc = "AQ,R,N,VM,FW,JJ,RB,VB,WRB"; + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + + QProgressDialog progress("Calculando...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqFfr = arqInfo.baseName() + ".ffr"; + if (!arqIndividual.abreArquivoTAGFFR(path.toStdString(),specialFunc,arqFfr.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } +} + + +void guiPalabras::on_actionTags_triggered() +{ + DialogCrece para; + trataTexto arqIndividual; + ofstream arquivoCre,arqResum,arqResum50; + QString path; + if(para.exec()==0) return; + QString tipoNFunc = para.getClasses(); + int passo=para.getPasso(); + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + arqResum.open("resum.dat",ios::out); + arqResum50.open("resum50k.dat",ios::out); + arqResum << "arquivo\tTamanho\tWDN\tWDC\tNPN\tNPC\t"; + arqResum50 << "arquivo\tTamanho\tWDN\tWDC\tNPN\tNPC\t"; + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqCre = arqInfo.baseName() + ".cret"; + progress.setValue(i++); + if (progress.wasCanceled()) + break; + if (!arqIndividual.abreArquivoCRE(path.toStdString(),specialFunc,passo,arqCre.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + arquivoCre.close(); + } +} + +void guiPalabras::on_actionCrescPalabras_triggered() +{ + DialogCrece para; + trataTexto arqIndividual; + ofstream arquivoCre,arqResum,arqResum50; + QString path; + if(para.exec()==0) return; + QString tipoNFunc = para.getClasses(); + int passo=para.getPasso(); + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + + arqResum.open("resumPal.dat",ios::out); + arqResum50.open("resumPal55k.dat",ios::out); + arqResum << "arquivo\tTamanho\tWDN\tWDC\tNPN\tNPC\t"; + arqResum50 << "arquivo\tTamanho\tWDN\tWDC\tNPN\tNPC\t"; + + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqCre = arqInfo.baseName() + ".crep"; + progress.setValue(i++); + if (progress.wasCanceled()) + break; + if (!arqIndividual.abreArquivoCREP(path.toStdString(),specialFunc,passo,arqCre.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + + } + arquivoCre.close(); + } +} + +void guiPalabras::on_actionCresSentencas_triggered() +{ + DialogCrece para; + trataTexto arqIndividual; + ofstream arquivoCre,arqResum,arqResum50; + QString path; + if(para.exec()==0) return; + QString tipoNFunc = para.getClasses(); + int passo=para.getPasso(); + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + arqResum.open("resumSentences.dat",ios::out); + arqResum50.open("resumSentences55k.dat",ios::out); + arqResum << "Tamanho\tNum_Sent\tWPS\tDWPC\tPPS\t"; + arqResum50 << "Tamanho\tNum_Sent\tWPS\tDWPC\tPPS\t"; + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqCre = arqInfo.baseName() + ".creS"; + progress.setValue(i++); + if (progress.wasCanceled()) + break; + if (!arqIndividual.abreArquivoCRES(path.toStdString(),specialFunc,passo,arqCre.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + break; + + } + arquivoCre.close(); + } +} + +void guiPalabras::on_actionDeclara_es_utterances_triggered() +{ + DialogCrece para; + trataTexto arqIndividual; + ofstream arquivoCre,arqResum,arqResum50; + QString path; + if(para.exec()==0) return; + QString tipoNFunc = para.getClasses(); + int passo=para.getPasso(); + QStringList specialFunc = tipoNFunc.split(","); + this->listaArquivosEntrada = QFileDialog::getOpenFileNames( + this, + tr("Escolha arquivo(s) TAG para abrir"), + QString::null, + QString::null); + + if(this->listaArquivosEntrada.size()==0) + return; + arqResum.open("resumUtt.dat",ios::out); + arqResum50.open("resumUtt55k.dat",ios::out); + arqResum << "Tamanho\tNum_Sent\tWPS\tDWPC\t"; + arqResum50 << "Tamanho\tNum_Sent\tWPS\tDWPC\t"; + for(int i=0;ilistaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + QString arqCre = arqInfo.baseName() + ".creD"; + progress.setValue(i++); + if (progress.wasCanceled()) + break; + if (!arqIndividual.abreArquivoCRED(path.toStdString(),specialFunc,passo,arqCre.toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + path); + msgBox.exec(); + break; + + } + arquivoCre.close(); + } +} + +void guiPalabras::on_actionSubs_Simples_triggered() +{ + trataTexto arqIndividual; + QString path; + QString nomeArqSubs; + string alvo, subs; + QStringList regrasAlvo; + QStringList regrasSubs; + ifstream arqSubs; + nomeArqSubs = QFileDialog::getOpenFileName( + this, + tr("Escolha arquivo(s) de regras para abrir"), + QString::null, + QString::null); + arqSubs.open(nomeArqSubs.toStdString().c_str()); + do{ + arqSubs >> alvo >> subs; + QString Qalvo=alvo.c_str(); + QString Qsubs=subs.c_str(); + Qsubs.replace("\\s"," "); + Qsubs.replace("\\d",""); + + regrasAlvo.push_back(Qalvo); + regrasSubs.push_back(Qsubs); + }while(!arqSubs.eof()); + + + + QProgressDialog progress("Aplicando Regras...Aguarde", "Cancelar", 0, + this->listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + foreach(path, this->listaArquivosEntrada) + { + cout<<"passei aqui"<listaArquivosEntrada.size(), this); + progress.setWindowModality(Qt::WindowModal); + int i=0; + progress.show(); + arqIndividual.carregaFiltros(); + + foreach(path, this->listaArquivosEntrada) + { + QFileInfo arqInfo(path); + + if (!arqIndividual.redeFrases((arqInfo.path()+"/"+arqInfo.baseName()+"Convertido").toStdString())) + { + QMessageBox msgBox; + msgBox.setIcon(QMessageBox::Critical); + msgBox.setText("Existe um erro em seu arquivo" + arqInfo.baseName()); + msgBox.exec(); + + } + + progress.setValue(i++); + if (progress.wasCanceled()) + break; + } +} +void guiPalabras::on_actionOp_es_de_rede_triggered() +{ + config op; + op.exec(); + +} +void guiPalabras:: on_actionAjuda_triggered(){ + +} diff --git a/guipalabras.h b/guipalabras.h new file mode 100755 index 0000000..391a611 --- /dev/null +++ b/guipalabras.h @@ -0,0 +1,56 @@ +#ifndef GUIPALABRAS_H +#define GUIPALABRAS_H + +#include +#include +#include +#include +#include "dialogjanela.h" +#include "dialogjanela.h" +#include "ui_dialogjanela.h" + +namespace Ui { + class guiPalabras; +} + +class guiPalabras : public QMainWindow { + Q_OBJECT +public: + guiPalabras(QWidget *parent = 0); + ~guiPalabras(); + +protected: + void changeEvent(QEvent *e); + + +private: + Ui::guiPalabras *ui; + QStringList listaArquivosEntrada; + DialogJanela opEscolha; + +private slots: + + +private slots: + void on_actionTamSentencas_triggered(); + void on_actionZipfPromedio_triggered(); + void on_actionFuncionais_triggered(); + void on_actionMinusculas_triggered(); + void on_actionCalculos_triggered(); + void on_actionVisualizacao_triggered(); + void on_actionSentencas_2_triggered(); + void on_actionPalavras_2_triggered(); + void on_actionPalavras_triggered(); + void on_actionPre_tratamento_triggered(); + void on_actionAbrir_triggered(); + void on_actionTags_triggered(); + void on_actionCrescPalabras_triggered(); + void on_actionCresSentencas_triggered(); + void on_actionDeclara_es_utterances_triggered(); + void on_actionSubs_Simples_triggered(); + void on_actionOp_es_de_rede_triggered(); + void on_actionFrases_triggered(); + void on_actionAjuda_triggered(); +}; + +#endif // GUIPALABRAS_H diff --git a/guipalabras.ui b/guipalabras.ui new file mode 100755 index 0000000..a60b526 --- /dev/null +++ b/guipalabras.ui @@ -0,0 +1,323 @@ + + + guiPalabras + + + + 0 + 0 + 497 + 400 + + + + Palabras + + + + + false + + + + 9 + 9 + 471 + 311 + + + + + 0 + 5 + + + + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd"> +<html><head><meta name="qrichtext" content="1" /><style type="text/css"> +p, li { white-space: pre-wrap; } +</style></head><body style=" font-family:'Sans Serif'; font-size:9pt; font-weight:400; font-style:normal;"> +<p align="justify" style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" font-weight:600;">Para utilizar este programa no linux é necessário baixar uma biblioteca escrita em C++ chamada de Freeling. Ela é responsável por analizar sintaticamente o texto e é parte fundamental de todos os processos feitos por esse programa. Por favor baixe o </span><a href="https://github.com/TALP-UPC/FreeLing/releases"><span style=" font-weight:600; text-decoration: underline; color:#0000ff;">Freeling .</span></a></p> +<p align="justify" style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">Cuidados que se deve tomar com relação ao texto a ser analizado:</p> +<p align="justify" style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">-O texto deve estar com a terminação .txt </p> +<p align="justify" style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;">-Se o nome estiver como no exemplo &quot;nome do arquivo.txt&quot; transformar para &quot;nome_do_arquivo.txt&quot;<br /></p></body></html> + + + + + false + + + + 10 + 321 + 281 + 20 + + + + : + + + labelStatus + textEditResult + + + + + 0 + 0 + 497 + 22 + + + + + Arquivo + + + + + + + + Analises + + + + Redes + + + + + + + + + Crescimento + + + + + + + + + Regras de substituiçao + + + + + + + + + + + + + + + + + + + true + + + Resultados + + + + + + + Ajuda + + + + + + + + + + TopToolBarArea + + + false + + + + + + Abrir o(s) Aquivo(s) + + + + + Fechar + + + + + Sair do Programa + + + true + + + false + + + + + Totais + + + + + Diferentes + + + + + Sentencas + + + false + + + false + + + + + Pre-tratamento + + + + + false + + + Palavras + + + false + + + + + Palavras + + + + + Sentencas + + + + + Calculos + + + + + Visualizacao + + + true + + + + + false + + + Minusculas + + + false + + + false + + + + + Funcionais + + + + + ZipfPromedio + + + + + tamSentencas + + + + + Tags + + + + + Palabras + + + + + Sentenças + + + + + Declarações (utterances) + + + + + Subs Simples + + + + + Opções de rede + + + + + Frases + + + + + teste + + + + + + + + actionSalvar + triggered() + actionFechar + trigger() + + + -1 + -1 + + + -1 + -1 + + + + + diff --git a/main.cpp b/main.cpp new file mode 100755 index 0000000..b7f3019 --- /dev/null +++ b/main.cpp @@ -0,0 +1,10 @@ +#include +#include "guipalabras.h" + +int main(int argc, char *argv[]) +{ + QApplication a(argc, argv); + guiPalabras w; + w.show(); + return a.exec(); +} diff --git a/palabras.pro b/palabras.pro new file mode 100755 index 0000000..1ccaaf2 --- /dev/null +++ b/palabras.pro @@ -0,0 +1,34 @@ +# ------------------------------------------------- +# Project created by QtCreator 2010-06-24T17:25:40 +# ------------------------------------------------- + +QT += core gui + +greaterThan(QT_MAJOR_VERSION, 4): QT += widgets + +TARGET = palabras +TEMPLATE = app +SOURCES += main.cpp \ + guipalabras.cpp \ + contapalavras.cpp \ + tratatexto.cpp \ + dialogjanela.cpp \ + dialogcrece.cpp \ + config.cpp +HEADERS += guipalabras.h \ + contapalavras.h \ + tratatexto.h \ + dialogjanela.h \ + tratatexto.h \ + dialogcrece.h \ + config.h +FORMS += guipalabras.ui \ + dialogjanela.ui \ + dialogcrece.ui \ + config.ui + +DISTFILES += \ + analyzer.py + + + diff --git a/palabras.pro.user b/palabras.pro.user new file mode 100755 index 0000000..6fa27e3 --- /dev/null +++ b/palabras.pro.user @@ -0,0 +1,336 @@ + + + + + + EnvironmentId + {c3d3a9ab-44b1-402b-b49b-8c4de2988f2b} + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + 80 + true + true + 1 + true + false + 0 + true + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.10.1 GCC 64bit + Desktop Qt 5.10.1 GCC 64bit + qt.qt5.5101.gcc_64_kit + 0 + 0 + 0 + + /home/lucas/Desktop/IC/QT projeto/build-palabras-Desktop_Qt_5_10_1_GCC_64bit-Debug + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + true + + + /home/lucas/Desktop/IC/QT projeto/build-palabras-Desktop_Qt_5_10_1_GCC_64bit-Release + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + + /home/lucas/Desktop/IC/QT projeto/build-palabras-Desktop_Qt_5_10_1_GCC_64bit-Profile + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + true + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Profile + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + 3 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + false + false + 1000 + + true + + false + false + false + false + true + 0.01 + 10 + true + 1 + 25 + + 1 + true + false + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + 2 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:/home/lucas/Desktop/IC/QT projeto/palablasSilvia/palabras.pro + true + + palabras.pro + false + + /home/lucas/Desktop/IC/QT projeto/build-palabras-Desktop_Qt_5_10_1_GCC_64bit-Debug + 3768 + false + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 18 + + + Version + 18 + + diff --git a/palabras.pro.user.2.1pre1 b/palabras.pro.user.2.1pre1 new file mode 100755 index 0000000..c1b2c50 --- /dev/null +++ b/palabras.pro.user.2.1pre1 @@ -0,0 +1,113 @@ + + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + System + + + + ProjectExplorer.Project.Target.0 + + Desktop + Qt4ProjectManager.Target.DesktopTarget + 0 + 0 + + + qmake + QtProjectManager.QMakeBuildStep + + + + Make + Qt4ProjectManager.MakeStep + false + + + + 2 + + Make + Qt4ProjectManager.MakeStep + true + + clean + + + + 1 + false + + Debug + Qt4ProjectManager.Qt4BuildConfiguration + 2 + /home/scaldeira/Escritorio/palabras-build-desktop + 2 + 0 + true + + + + qmake + QtProjectManager.QMakeBuildStep + + + + Make + Qt4ProjectManager.MakeStep + false + + + + 2 + + Make + Qt4ProjectManager.MakeStep + true + + clean + + + + 1 + false + + Release + Qt4ProjectManager.Qt4BuildConfiguration + 0 + /home/scaldeira/Escritorio/palabras-build-desktop + 2 + 0 + true + + 2 + + palabras + Qt4ProjectManager.Qt4RunConfiguration + 2 + + palabras.pro + false + false + + false + false + + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 4 + + diff --git a/palabras.pro.user.2.7pre1 b/palabras.pro.user.2.7pre1 new file mode 100755 index 0000000..0e9be3c --- /dev/null +++ b/palabras.pro.user.2.7pre1 @@ -0,0 +1,243 @@ + + + + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + true + 1 + true + 0 + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.0.2 MinGW 32bit + Desktop Qt 5.0.2 MinGW 32bit + qt.502.win32_mingw47.essentials_kit + 0 + 0 + 0 + + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + true + + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + C:/Users/JoséGarcia/Documents/Programas/QT/PalabrasSilvia/debug + true + + + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + true + + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + C:/Users/JoséGarcia/Documents/Programas/QT/PalabrasSilvia/release + true + + 2 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + true + + false + false + false + false + true + 0.01 + 10 + true + 25 + + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:C:/Users/JoséGarcia/Documents/My Box Files/QT/palablasSilvia/palabras.pro + 2 + + palabras.pro + false + false + + + 3768 + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.EnvironmentId + {8ac8d706-7235-4bcb-9480-b125e4bca84c} + + + ProjectExplorer.Project.Updater.FileVersion + 12 + + diff --git a/palabras.pro.user.3.0-pre1 b/palabras.pro.user.3.0-pre1 new file mode 100755 index 0000000..76e2f84 --- /dev/null +++ b/palabras.pro.user.3.0-pre1 @@ -0,0 +1,245 @@ + + + + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + true + 1 + true + 0 + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.1.1 MinGW 32bit + Desktop Qt 5.1.1 MinGW 32bit + qt.511.win32_mingw48.essentials_kit + 0 + 0 + 0 + + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + true + + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + C:/Users/JoséGarcia/Documents/Programas/QT/PalabrasSilvia + true + + + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + true + + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + C:/Users/JoséGarcia/Documents/Programas/QT/PalabrasSilvia + true + + 2 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + true + + false + false + false + false + true + 0.01 + 10 + true + 25 + + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + 2 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:C:/Users/JoséGarcia/Documents/My Box Files/QT/palablasSilvia/palabras.pro + + palabras.pro + false + false + + 3768 + true + false + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.EnvironmentId + {8ac8d706-7235-4bcb-9480-b125e4bca84c} + + + ProjectExplorer.Project.Updater.FileVersion + 14 + + diff --git a/palabras.pro.user.35f2e92 b/palabras.pro.user.35f2e92 new file mode 100755 index 0000000..40da7d2 --- /dev/null +++ b/palabras.pro.user.35f2e92 @@ -0,0 +1,318 @@ + + + + + + EnvironmentId + {35f2e920-c6c4-43b7-af26-efdc52411479} + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + 80 + true + true + 1 + true + false + 0 + true + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.8.0 MinGW 32bit + Desktop Qt 5.8.0 MinGW 32bit + qt.58.win32_mingw53_kit + 0 + 0 + 0 + + C:/Users/vivas/Google Drive/QT/build-palabras-Desktop_Qt_5_8_0_MinGW_32bit-Debug + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + true + + + C:/Users/vivas/Google Drive/QT/build-palabras-Desktop_Qt_5_8_0_MinGW_32bit-Release + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + + C:/Users/vivas/Google Drive/QT/build-palabras-Desktop_Qt_5_8_0_MinGW_32bit-Profile + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + true + false + + + true + Make + + Qt4ProjectManager.MakeStep + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Profile + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + 3 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + false + false + 1000 + + true + + false + false + false + false + true + 0.01 + 10 + true + 1 + 25 + + 1 + true + false + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + 2 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:C:/Users/vivas/Google Drive/QT/palablasSilvia/palabras.pro + true + + palabras.pro + false + + C:/Users/vivas/Google Drive/QT/build-palabras-Desktop_Qt_5_8_0_MinGW_32bit-Debug + 3768 + false + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 18 + + + Version + 18 + + diff --git a/palabras.pro.user.4b39ca2 b/palabras.pro.user.4b39ca2 new file mode 100755 index 0000000..16c36c2 --- /dev/null +++ b/palabras.pro.user.4b39ca2 @@ -0,0 +1,336 @@ + + + + + + EnvironmentId + {4b39ca23-9a9c-4cbb-856f-0a9e7c1918a4} + + + ProjectExplorer.Project.ActiveTarget + 0 + + + ProjectExplorer.Project.EditorSettings + + true + false + true + + Cpp + + CppGlobal + + + + QmlJS + + QmlJSGlobal + + + 2 + UTF-8 + false + 4 + false + 80 + true + true + 1 + true + false + 0 + true + true + 0 + 8 + true + 1 + true + true + true + false + + + + ProjectExplorer.Project.PluginSettings + + + + ProjectExplorer.Project.Target.0 + + Desktop Qt 5.9.0 GCC 64bit + Desktop Qt 5.9.0 GCC 64bit + qt.59.gcc_64_kit + 1 + 0 + 0 + + /home/lucas/Desktop/IC/Softwares/build-palabras-Desktop_Qt_5_9_0_GCC_64bit-Debug + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Debug + + Qt4ProjectManager.Qt4BuildConfiguration + 2 + true + + + /home/lucas/Desktop/IC/Softwares/build-palabras-Desktop_Qt_5_9_0_GCC_64bit-Release + + + true + qmake + + QtProjectManager.QMakeBuildStep + false + + false + false + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Release + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + + /home/lucas/Desktop/IC/Softwares/build-palabras-Desktop_Qt_5_9_0_GCC_64bit-Profile + + + true + qmake + + QtProjectManager.QMakeBuildStep + true + + false + true + false + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + false + -DCMAKE_BUILD_TYPE=Debug + + + 2 + Build + + ProjectExplorer.BuildSteps.Build + + + + true + Make + + Qt4ProjectManager.MakeStep + + -w + -r + + true + clean + + + 1 + Clean + + ProjectExplorer.BuildSteps.Clean + + 2 + false + + Profile + + Qt4ProjectManager.Qt4BuildConfiguration + 0 + true + + 3 + + + 0 + Deploy + + ProjectExplorer.BuildSteps.Deploy + + 1 + Deploy locally + + ProjectExplorer.DefaultDeployConfiguration + + 1 + + + false + false + 1000 + + true + + false + false + false + false + true + 0.01 + 10 + true + 1 + 25 + + 1 + true + false + true + valgrind + + 0 + 1 + 2 + 3 + 4 + 5 + 6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + + 2 + + palabras + + Qt4ProjectManager.Qt4RunConfiguration:/home/lucas/Desktop/IC/Softwares/palablasSilvia/palabras.pro + true + + palabras.pro + false + + /home/lucas/Desktop/IC/Softwares/build-palabras-Desktop_Qt_5_9_0_GCC_64bit-Release + 3768 + false + true + false + false + true + + 1 + + + + ProjectExplorer.Project.TargetCount + 1 + + + ProjectExplorer.Project.Updater.FileVersion + 18 + + + Version + 18 + + diff --git a/tratatexto.cpp b/tratatexto.cpp new file mode 100755 index 0000000..4a84400 --- /dev/null +++ b/tratatexto.cpp @@ -0,0 +1,1539 @@ +#include "tratatexto.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +trataTexto::trataTexto() +{ + QString tipoPunct = "Fp,Fs,Fd,Fx,Fg,Fit,Fat"; + QStringList specialPunct = tipoPunct.split(","); + for (int i=0; ipontuacao.insert(pair(specialPunct[i].toStdString(),0)); + carregaFiltros(); +} + +bool trataTexto::abreArquivo(string nome) +{ + + ifstream arquivo; + string token; + QStringList list1; + int nPal=0; + arquivo.open(nome.c_str(), ios::in); + + this->normal.clear(); + this->canonica.clear(); + list1.clear(); + while (!arquivo.eof()) + { + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); + string codigo2 = list1[1].toLower().toStdString(); + if (list1[2][0] == 'F') + continue; + + this->normal.insert(pair(codigo1,normal[codigo1]+1)); + normal[codigo1]=normal[codigo1]+1; + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + canonica[codigo2]=canonica[codigo2]+1; + nPal++; + + } + + arquivo.close(); + this->palDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + return true; +} + + +bool trataTexto::abreArquivoTAG(string nome, QStringList tags) +{ + ifstream arquivo; + string token; + QStringList list1; + int nPalFuncional=0; + int nPalContenido=0; + arquivo.open(nome.c_str(), ios::in); + this->vocabulario.clear(); + + while (!arquivo.eof()) // && (nPalFuncional + nPalContenido <= 4000)) + { + int i=0; + getline(arquivo, token); + + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo = list1[2].toStdString(); + if (list1[2][0] == 'F') + continue; + string codigo2 = list1[1].toLower().toStdString(); + + for (i=0;ivocabulario.insert(pair(codigo2,vocabulario[codigo2]+1)); + vocabulario[codigo2]=vocabulario[codigo2]+1; + nPalContenido++; + break; + } + } + + if (i==tags.size()) + nPalFuncional++; + } + arquivo.close(); + this->palVocabularioT = this->vocabulario.size(); + this->palFuncionalT=nPalFuncional; + this->palContenidoT=nPalContenido; + return true; +} + + + +bool trataTexto::abreArquivoCRE(string nome, QStringList tags,int passo, string arqCre) +{ + ifstream arquivo; + ofstream arquivoCre,arqResum; + int totCanAn=0; + int totNorAn=0; + map::iterator it; + string token; + vector tagContNov; + vector tagCont; + + QStringList list1; + int tamanho=passo; + int nPal=0; + if(passo==0) tamanho=99999999; // garante que apenas o ultimo valor é impresso. + + + arquivo.open(nome.c_str(), ios::in); + + arquivoCre.open(arqCre.c_str(),ios::out); + arquivoCre << "Tamanho\tWDN\tWDC\tNPN\tNPC\t"; + for(int i=0;inormal.clear(); + this->canonica.clear(); + list1.clear(); + + tagContNov.assign(tags.size(),0); + tagCont.assign(tags.size(),0); + + while (!arquivo.eof()) + { + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); // nomal + string codigo2 = list1[1].toLower().toStdString(); // canonica + string codigo3 = list1[2].toStdString(); // tag + if (list1[2][0] == 'F') + continue; + + this->normal.insert(pair(codigo1,normal[codigo1]+1)); + normal[codigo1]=normal[codigo1]+1; + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + + canonica[codigo2]=canonica[codigo2]+1; + for (int i=0;i=tamanho) + { + arquivoCre << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + totNorAn=normal.size(); + totCanAn=canonica.size(); + for(unsigned int i=0;i=55000) // resumo a 55000 palavras + { + arqResum.open("resum55k.dat",ios::app); + arqResum << arqCre << "\t" << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + for(unsigned int i=0;inormal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + totNorAn=normal.size(); + totCanAn=canonica.size(); + for(unsigned int i=0;ipalDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + arqResum.open("resum.dat",ios::app); + arqResum << arqCre << "\t" << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + for(unsigned int i=0;i::iterator it; + string token; + vector tagContNov; + vector tagCont; + + QStringList list1; + int tamanho=passo; + int nPal=0; + + arquivo.open(nome.c_str(), ios::in); + + arquivoCre.open(arqCre.c_str(),ios::out); + arquivoCre << "Tamanho\tWDN\tWDC\tNPN\tNPC\t"; + for(int i=0;inormal.clear(); + this->canonica.clear(); + list1.clear(); + if(passo==0) tamanho=99999999; // garante que apenas o ultimo valor é impresso. + + tagCont.assign(tags.size(),0); + + while (!arquivo.eof()) + { + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); // nomal + string codigo2 = list1[1].toLower().toStdString(); // canonica + string codigo3 = list1[2].toStdString(); // tag + if (list1[2][0] == 'F') + continue; + + this->normal.insert(pair(codigo1,normal[codigo1]+1)); + normal[codigo1]=normal[codigo1]+1; + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + + canonica[codigo2]=canonica[codigo2]+1; + for (int i=0;i=tamanho) + { + arquivoCre << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + totNorAn=normal.size(); + totCanAn=canonica.size(); + for(unsigned int i=0;i=55000) // resumo a 55000 palavras + { + arqResum.open("resumPal55k.dat",ios::app); + arqResum << arqCre<< "\t" << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + for(unsigned int i=0;inormal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + totNorAn=normal.size(); + totCanAn=canonica.size(); + for(unsigned int i=0;ipalDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + arqResum.open("resumPal.dat",ios::app); + arqResum << arqCre<< "\t" << nPal <<"\t" << this->normal.size() << "\t" << + this->canonica.size()<<"\t"<< + this->normal.size()-totNorAn<<"\t"<< + this->canonica.size()-totCanAn<<"\t"; + for(unsigned int i=0;i::iterator it; + string token; + vector tagCont; + + QStringList list1; + int tamanho=passo; + int nPal=0; + + if(passo==0) tamanho=99999999; // garante que apenas o ultimo valor é impresso. + + arquivo.open(nome.c_str(), ios::in); + + arquivoCre.open(arqCre.c_str(),ios::out); + arquivoCre << "Tamanho\tNum_Sent\tWPS\tDWPC\tPPS\t"; + for(int i=0;inormal.clear(); + this->canonica.clear(); + list1.clear(); + QStringList endSentences,endPauses; + endSentences << "Fat" << "Fp" << "Fs" << "Fit"; + endPauses << "Fat" << "Fd" << "Fp" << "Fs" << "Fx" <<"Fit" << "Fg" <<"Fe" <<"Fc"; + + tagCont.assign(tags.size(),0); + + while (!arquivo.eof()) + { + bool fS,fP; + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); // nomal + string codigo2 = list1[1].toLower().toStdString(); // canonica + string codigo3 = list1[2].toStdString(); // tag + + fS=endSentences.contains(list1[2],Qt::CaseInsensitive); + fP=endPauses.contains(list1[2],Qt::CaseInsensitive); + + if (fS|| fP) + { + if(fS)nSent++; + if(fP)nPaus++; + nDWPS+=this->canonica.size(); + this->canonica.clear(); + this->normal.clear(); + if(nPal>=tamanho) + { + arquivoCre << nPal <<"\t" << nSent << "\t" << + (double)nPal/nSent<<"\t"<< + (double)nDWPS/nSent<<"\t" << + (double) nPal/nPaus<<"\t"; + for(unsigned int i=0;i=55000) // resumo a 50000 palavras + { + arqResum.open("resumSentences55k.dat",ios::app); + arqResum << arqCre << "\t" << nSent <<"\t" << (double)nPal/nSent << "\t" << + (double)nDWPS/nSent<<"\t" << (double) nPal/nPaus<<"\t"; + for(unsigned int i=0;inormal.insert(pair(codigo1,normal[codigo1]+1)); + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + + for (int i=0;ipalDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + arqResum.open("resumSentences.dat",ios::app); + arqResum << arqCre << "\t" << nSent <<"\t" << (double)nPal/nSent << "\t" << + (double)nDWPS/nSent<<"\t"<<(double) nPal/nPaus<<"\t"; + for(unsigned int i=0;i::iterator it; + string token; + vector tagCont; + + QStringList list1; + int tamanho=passo; + int nPal=0; + + if(passo==0) tamanho=99999999; // garante que apenas o ultimo valor é impresso. + + arquivo.open(nome.c_str(), ios::in); + + arquivoCre.open(arqCre.c_str(),ios::out); + arquivoCre << "Tamanho\tNum_Sent\tWPS\tDWPC\t"; + for(int i=0;inormal.clear(); + this->canonica.clear(); + list1.clear(); + + tagCont.assign(tags.size(),0); + + while (!arquivo.eof()) + { + getline(arquivo, token); + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo1 = list1[0].toLower().toStdString(); // nomal + string codigo2 = list1[1].toLower().toStdString(); // canonica + string codigo3 = list1[2].toStdString(); // tag + if (list1[2][0] == 'F') + { + nSent++; + nDWPS+=this->canonica.size(); + this->canonica.clear(); + this->normal.clear(); + if(nPal>=tamanho) + { + arquivoCre << nPal <<"\t" << nSent << "\t" << + (double)nPal/nSent<<"\t"<< + (double)nDWPS/nSent<<"\t"; + for(unsigned int i=0;i=55000) // resumo a 50000 palavras + { + arqResum.open("resumUtt55k.dat",ios::app); + arqResum << arqCre << "\t" << nSent <<"\t" << (double)nPal/nSent << "\t" << + (double)nDWPS/nSent<<"\t"; + for(unsigned int i=0;inormal.insert(pair(codigo1,normal[codigo1]+1)); + this->canonica.insert(pair(codigo2,canonica[codigo2]+1)); + + for (int i=0;ipalDiffN = this->normal.size(); + this->palDiffC = this->canonica.size(); + this->palTot = nPal; + arqResum.open("resumUtt.dat",ios::app); + arqResum << arqCre << "\t" << nSent <<"\t" << (double)nPal/nSent << "\t" << + (double)nDWPS/nSent<<"\t"; + for(unsigned int i=0;ifiltroGram.push_back(QString(maxLinha.c_str())); + } + + while(!arqPl.eof()) + { + getline(arqPl,maxLinha); + this->filtroPala.push_back(QString(maxLinha.c_str())); + } + arqGr.close(); + arqPl.close(); +} + +bool trataTexto::filtro(QStringList token) +{ + bool flagGr,flagPl; + flagGr=flagPl=false; + for(int i=0;ifiltroGram.size();i++) + { + int tam=this->filtroGram[i].size(); + if(tam==0) continue; + if(this->filtroGram[i].toUpper()==token[2].left(tam).toUpper()) + { + flagGr=true; + break; + } + } + for(int i=0;ifiltroPala.size();i++) + { + int tam=this->filtroPala[i].size(); + if(tam==0) continue; + if(this->filtroPala[i].toUpper()==token[1].toUpper()) + { + flagPl=true; + break; + } + } + return flagGr||flagPl; +} + + + +bool trataTexto::abreArquivoFRQ(string nome) +{ + ifstream arquivo; + string token; + QStringList list1; + list frequencia; + list ::iterator it; + arquivo.open(nome.c_str(), ios::in); + + getline(arquivo, token); + + while (!arquivo.eof()) + { + getline(arquivo, token); + + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split("\t"); + if (list1.size() < 2) + return false; + int freq = list1[1].toInt(); + frequencia.push_back(freq); + } + arquivo.close(); + + frequencia.sort(); + int soma=0; + int somaFreq=0; + int i=0; + for (it=frequencia.begin();it!=frequencia.end();++it) + { + soma+= (*it*(frequencia.size()-i)); + somaFreq+= *it; + i++; + + } + + this->zipfpromedio=(double)soma/somaFreq; + + return true; +} + + +bool trataTexto::abreArquivoTAGFFR(string nome, QStringList tags, string arqFfr) +{ + ifstream arqEntrada; + ofstream arqSaida; + string token; + QStringList list1; + vector totPalavras; + vector palContenido; + vector tamFrase; + vector qtdFrases; + arqEntrada.open(nome.c_str(), ios::in); + arqSaida.open(arqFfr.c_str(), ios::out); + + totPalavras.push_back(0); + palContenido.push_back(0); + int maxTamSent=0; + int is=0; + + while (!arqEntrada.eof()) + { + int i=0; + getline(arqEntrada, token); + + if(token.size() == 0) + continue; + QString qtoken; + QTextStream meleca(token.c_str()); + qtoken = meleca.readLine(1000); + list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + string codigo = list1[2].toStdString(); + if (!list1[2].compare("Fat") || !list1[2].compare("Fd") || !list1[2].compare("Fit") + || !list1[2].compare("Fp") || !list1[2].compare("Fs") || !list1[2].compare("Fx")) + { + totPalavras.push_back(0); + palContenido.push_back(0); + maxTamSent = (totPalavras[is]>maxTamSent)?totPalavras[is]:maxTamSent; + is++; + continue; + } + + if (list1[2][0] == 'F' && list1[2].compare("Fw")) + continue; + + totPalavras[is]++; + + for (i=0;i \"" + baseName + ".tag\""); + system(comando2.toStdString().c_str()); + } + + QString comando("analyze -f " + arqConfig + ".cfg tagged <\"" + + baseName + ".pre\"> \"" + baseName + ".tag\""); + system(comando.toStdString().c_str()); +} + + +bool trataTexto::redePalavras (string infArq, int janela) +{ + + + vector janPalNormal, janPalCanonica; + QStringList list1; + string nomeArquivo = infArq + ".tag"; + QFile arquivo(nomeArquivo.c_str()); + arquivo.open(QIODevice::ReadOnly); + QTextStream in(&arquivo); + in.setCodec("UTF-8"); + + + this->normal.clear(); + this->canonica.clear(); + this->normalArestas.clear(); + this->canonicaArestas.clear(); + + list1.clear(); + + for (int i=0;inormal.insert(pair(codigo1,normal.size())); + this->canonica.insert(pair(codigo2,canonica.size())); + + janPalNormal.push_back(codigo1); + janPalCanonica.push_back(codigo2); + } + + while(!in.atEnd()) + { + for (unsigned i=0;inormalArestas.insert(pair(codigo1,normalArestas[codigo1]++)); + this->normalArestas.insert(pair(codigo3,normalArestas[codigo3]++)); + this->canonicaArestas.insert(pair(codigo2,canonicaArestas[codigo2]++)); + this->canonicaArestas.insert(pair(codigo4,canonicaArestas[codigo4]++)); + } + } + janPalNormal.erase(janPalNormal.begin()); + janPalCanonica.erase(janPalCanonica.begin()); + + LER: + QString qtoken = in.readLine(1000); + if(qtoken.size() == 0) + { + if (in.atEnd()) continue; + else goto LER; + } + list1 = qtoken.split(" "); + + //QString qtoken(token.c_str()); + //list1 = qtoken.split(" "); + if (list1.size() < 4) + return false; + + if(filtro(list1)) // desconsidera as palavras e formas canonicas preselecionadas. + { + if (in.atEnd()) continue; + else goto LER; + } + if (list1[2][0] == 'F') + { + if (in.atEnd()) continue; + else goto LER; + } + string codigo1 = list1[0].toLower().toStdString(); + string codigo2 = list1[1].toLower().toStdString(); + janPalNormal.push_back(codigo1); + janPalCanonica.push_back(codigo2); + this->normal.insert(pair(codigo1,normal.size())); + this->canonica.insert(pair(codigo2,canonica.size())); + } + arquivo.close(); + + this->imprimeNet(infArq); + + return true; + +} + +string trataTexto:: GetStdoutFromCommand(string cmd) { + + string data; + FILE * stream; + const int max_buffer = 256; + char buffer[max_buffer]; + cmd.append(" 2>&1"); + + stream = popen(cmd.c_str(), "r"); + if (stream) { + while (!feof(stream)) + if (fgets(buffer, max_buffer, stream) != NULL) data.append(buffer); + pclose(stream); +} +return data; +} + + +void trataTexto::lowerWord (QString nomeArqE, QString nomeArqS) +{ + //fstream arquivoEntrada; + //fstream arquivoSaida; + //arquivoEntrada.open(nomeArqE); + //arquivoSaida.open(nomeArqS+".pre"); + QFile arqEntrada(nomeArqE); + QFile arqSaida(nomeArqS + ".pre"); + char maxLinha[50]; + arqEntrada.open(QIODevice::ReadOnly); + arqSaida.open(QIODevice::WriteOnly); + while(arqEntrada.readLine( maxLinha,sizeof(maxLinha))>0) + { + int cont=0; + string aux; + while(maxLinha[cont]!='\0'){ + if(maxLinha[cont]=='\0'){ + break; + } + if((maxLinha[cont]>='A'&& maxLinha[cont]<='Z')){ + maxLinha[cont]=maxLinha[cont]+32; + } + /* switch (maxLinha[cont]) { + case 'É':maxLinha[cont]='é'; + break; + case 'Ó': maxLinha[cont]='ó'; + break; + case 'Ç': maxLinha[cont]='ç'; + break; + case 'Á': maxLinha[cont]='á'; + break; + case 'À': maxLinha[cont]='à'; + break; + case 'Ô': maxLinha[cont]='ô'; + break; + case 'Ê': maxLinha[cont]='ê'; + break; + case 'Í': maxLinha[cont]='í'; + break; + case 'Ã': maxLinha[cont]='ã'; + break; + case 'Õ': maxLinha[cont]='õ'; + break; + case 'Ẽ': maxLinha[cont]='ẽ'; + default: + break; + }*/ + cont++; + } + + QTextStream outStream(&arqSaida); + outStream<0) + { + QString qlinha = maxLinha; + for(unsigned i=0;iit = this->canonica.begin(); this->it != this->canonica.end(); this->it++) + { + qlinha = QString::number(i)+" \""+QString(this->it->first.c_str())+" \"\n"; + qlinha=qlinha.toLower(); + qlinha=qlinha.toLocal8Bit(); + //arqSaidaCan.write(qlinha.toStdString().c_str()); + this->it->second = i++; //Neste ponto aqui ele atribui a canonica.second o valor de i++ + } + i = 1; + + for (this->it = this->normal.begin(); this->it != this->normal.end(); this->it++) + { + qlinha = QString::number(i)+" \""+QString(this->it->first.c_str())+" \"\n"; + qlinha=qlinha.toLower(); + qlinha=qlinha.toLocal8Bit(); + // arqSaidaNormal.write(qlinha.toStdString().c_str()); + this->it->second = i++; //atribui a normal.second o valor de i++ + } + + //imprime "*edges" nos dois arquivos para simbolizar que a partir dali começa a parte das arestas + // arqSaidaCan.write("*edges\n"); + //arqSaidaNormal.write("*edges\n"); + + //laço responsavel por imprimir as arestas entre os vertices e seus pesos + for (this->it = this->canonicaArestas.begin(); this->it != this->canonicaArestas.end(); this->it++) + { + QString palavra(this->it->first.c_str()); //atribui a variavel palavra o valor de canonicaArestas.first que são pares de palavras em redeSentenças e individuais em rede de Palavras + QStringList lPalavra = palavra.split(" ");//Caso sejam pares de palavras ele divide ela quando achar um espaço + QString pal1=QString::number(this->canonica[lPalavra[0].toStdString()]);//pega o indice(rotulo do vertice) da primeira palavra + QString pal2=QString::number(this->canonica[lPalavra[1].toStdString()]);//pega o indice(rotulo do vertice) da segunda palavra + // QString qlinha = pal1 + " " + pal2 + " " + + QString qlinha =lPalavra[0]+" "+lPalavra[1]+ " " + + QString::number(this->it->second)+ "\n"; + arqSaidaCan.write(qlinha.toStdString().c_str());//imprime no arquivo o formato "tagVetice1 tagVertice2 Peso" + } +//Neste laço o mesmo processo é refeito, no entanto com as palavras normais + for (this->it = this->normalArestas.begin(); this->it != this->normalArestas.end(); this->it++) + { + QString palavra(this->it->first.c_str()); + QStringList lPalavra = palavra.split(" "); + QString pal1=QString::number(this->normal[lPalavra[0].toStdString()]); + QString pal2=QString::number(this->normal[lPalavra[1].toStdString()]); + // QString qlinha = pal1 + " " + pal2 + " " + + QString qlinha= lPalavra[0]+" "+ lPalavra[1]+" "+ + QString::number(this->it->second)+ "\n"; + arqSaidaNormal.write(qlinha.toStdString().c_str()); + } + arqSaidaCan.close(); + arqSaidaNormal.close(); +} + +bool trataTexto::redeSentencas (string infArq) +{ + string token;//Não foi utilizado para nada + vector sentPalNormal, sentPalCanonica;//guarda as palavras da sentença + string nomeArquivo = infArq + ".tag"; + QFile arquivo(nomeArquivo.c_str()); + arquivo.open(QIODevice::ReadOnly); + QTextStream in(&arquivo); + in.setCodec("UTF-8"); + //in.setCodec("LATIN"); + + + map > grafo;//Armazena em cada substantivo a sua fila de sentencas + queue substantivos; + set subConj; + + + this->normal.clear(); + this->canonica.clear(); + this->normalArestas.clear(); + this->canonicaArestas.clear(); + this->redeSentencasImpressaoCanonica.clear(); + this->redeSentencasImpressaoNormal.clear(); + + int quantidadeDeVertices = 0; + while(!in.atEnd()) + { + sentPalNormal.clear(); + sentPalCanonica.clear(); + + string sentenca; + int quantidadeDeSubstantivos = 0; + bool tem_substantivo = false; // cond + bool terminou = false; + + + while(!in.atEnd()) + { + QString linha = in.readLine(1000); + if(linha.isEmpty()) + continue; + + QStringList tokens = linha.split(" "); + if (tokens.size() < 4) + return false; + + if(filtro(tokens)) // desconsidera as palavras e formas canonicas preselecionadas. + continue; + + string tag = tokens[2].toStdString(); + string palavra = tokens[0].toLower().toStdString(); + string canonica = tokens[1].toLower().toStdString(); + if(tag[0]!='F' && !filtro(tokens)){ + if(sentenca.size()>0) + sentenca = sentenca + "_" + canonica; + else + sentenca=sentenca+canonica; + + } + if (/*eFimSentenca(tokens[2]) ||*/ tag[0] == 'F' && (tag[1]=='a' ||(tag[1]=='i')|| tag[1]=='p'||tag[1]=='s')) + { + terminou = true; + + if (tem_substantivo) + { + quantidadeDeVertices = quantidadeDeVertices+1; + for (int r = 0; r < quantidadeDeSubstantivos; ++r) + { + grafo[substantivos.front()].push(sentenca); + substantivos.pop(); + } + } + break; + + } + else + { + if (tag[0]=='N'){ + tem_substantivo=true; + ++quantidadeDeSubstantivos; + //aux.push(primeiraPalavraDaLinha); + subConj.insert(canonica); + substantivos.push(canonica); + + } + } + } + } + + arquivo.close(); + + nomeArquivo = infArq + ".redeSent.csv"; + QFile arquivoNet(nomeArquivo.c_str()); + + if (!arquivoNet.open(QIODevice::WriteOnly)) + cerr << "erro ao abrir!\n"; + + + + + int contar=0,anteriorCond=0; + hash calcula_hash; + + //QString qlinha = "*vertices " + QString::number(quantidadeDeVertices) + "\n"; + // arquivoNet.write(qlinha.toUtf8()); + + int quantidadeDePares=0; + //queue > arestas; //Armazena a lista com os pares de arestas para liga-los no grafo + queue > arestas; + for(set::iterator it= subConj.begin();it!=subConj.end();it++){ + ++contar; + bool primeiro = true; + //size_t anterior = 0; + string sentAnterior; + while (!grafo[*it].empty()) + { + size_t id = calcula_hash(grafo[*it].front()); + //arquivoNet.write(QString::number(id).toUtf8()); + //arquivoNet.write(" \""); + //arquivoNet.write(grafo[*it].front().c_str()); + // arquivoNet.write("\""); + + if(anteriorCond==contar-1 && !primeiro) + { + ++quantidadeDePares; + arestas.push(make_pair(sentAnterior,grafo[*it].front())); + } + //arquivoNet.write("\n"); + anteriorCond=contar; + // anterior=id; + string zera; + sentAnterior=zera; + sentAnterior=sentAnterior+grafo[*it].front(); + if (!grafo[*it].empty()) + grafo[*it].pop(); + primeiro=false; + ++contar; + } + } + + arquivoNet.write("source target\n"); + while(!arestas.empty()){ + //pair front = arestas.front(); + arquivoNet.write((arestas.front().first).c_str()); + arquivoNet.write(" "); + arquivoNet.write((arestas.front().second).c_str()); + arquivoNet.write("\n"); + //cout<pontuacao.count(classe.toStdString()) == 1; +} + +void trataTexto::frequenciaPalavra(QString nomeArq) +{ + map::iterator it; + ofstream arquivo; + arquivo.open(nomeArq.toStdString().c_str(), ios::out); + + arquivo <<"Palavras" <<"Frequencia" <canonica.begin();it!=this->canonica.end();it++) + { + arquivo <<(*it).first <<"\t" <<(*it).second < sentPalNormal, sentPalCanonica;//guarda as palavras da sentença + string nomeArquivo = infArq + ".tag"; + QFile arquivo(nomeArquivo.c_str()); + arquivo.open(QIODevice::ReadOnly); + QTextStream in(&arquivo); + in.setCodec("UTF-8"); + //in.setCodec("LATIN"); + + + map > grafo;//Armazena em cada substantivo a sua fila de sentencas + queue substantivos; + set subConj; + + + this->normal.clear(); + this->canonica.clear(); + this->normalArestas.clear(); + this->canonicaArestas.clear(); + this->redeSentencasImpressaoCanonica.clear(); + this->redeSentencasImpressaoNormal.clear(); + + int quantidadeDeVertices = 0; + while(!in.atEnd()) + { + sentPalNormal.clear(); + sentPalCanonica.clear(); + + string sentenca; + int quantidadeDeSubstantivos = 0; + bool tem_substantivo = false; // cond + bool terminou = false; + + + while(!in.atEnd()) + { + QString linha = in.readLine(1000); + if(linha.isEmpty()) + continue; + + QStringList tokens = linha.split(" "); + if (tokens.size() < 4) + return false; + + if(filtro(tokens)) // desconsidera as palavras e formas canonicas preselecionadas. + continue; + + string tag = tokens[2].toStdString(); + string palavra = tokens[0].toLower().toStdString(); + string canonica = tokens[1].toLower().toStdString(); + if(tag[0]!='F' && !filtro(tokens)){ + if(sentenca.size()>0){ + sentenca = sentenca + "_" + canonica; + cout< calcula_hash; + + //QString qlinha = "*vertices " + QString::number(quantidadeDeVertices) + "\n"; + // arquivoNet.write(qlinha.toUtf8()); + + int quantidadeDePares=0; + //queue > arestas; //Armazena a lista com os pares de arestas para liga-los no grafo + queue > arestas; + for(set::iterator it= subConj.begin();it!=subConj.end();it++){ + ++contar; + bool primeiro = true; + //size_t anterior = 0; + string sentAnterior; + while (!grafo[*it].empty()) + { + size_t id = calcula_hash(grafo[*it].front()); + //arquivoNet.write(QString::number(id).toUtf8()); + //arquivoNet.write(" \""); + //arquivoNet.write(grafo[*it].front().c_str()); + // arquivoNet.write("\""); + + if(anteriorCond==contar-1 && !primeiro) + { + ++quantidadeDePares; + arestas.push(make_pair(sentAnterior,grafo[*it].front())); + } + //arquivoNet.write("\n"); + anteriorCond=contar; + // anterior=id; + string zera; + sentAnterior=zera; + sentAnterior=sentAnterior+grafo[*it].front(); + if (!grafo[*it].empty()) + grafo[*it].pop(); + primeiro=false; + ++contar; + } + } + + arquivoNet.write("source target\n"); + while(!arestas.empty()){ + //pair front = arestas.front(); + arquivoNet.write((arestas.front().first).c_str()); + arquivoNet.write(" "); + arquivoNet.write((arestas.front().second).c_str()); + arquivoNet.write("\n"); + //cout<redeSentencasImpressaoCanonica.size())+ "\n"; + arqSaidaCan.write(qlinha.toStdString().c_str()); + qlinha="*vertices " + QString::number(this->redeSentencasImpressaoNormal.size())+ "\n"; + arqSaidaNormal.write(qlinha.toStdString().c_str()); + //aqui ele esta imprimindo o numero da palavra e a palavra + i = 1; + for (this->it = this->redeSentencasImpressaoCanonica.begin(); this->it != this->redeSentencasImpressaoCanonica.end(); this->it++) + { + qlinha = QString::number(i)+" \""+QString(this->it->first.c_str())+" \"\n"; + qlinha=qlinha.toLower(); + qlinha=qlinha.toLocal8Bit(); + arqSaidaCan.write(qlinha.toStdString().c_str()); + this->it->second = i++; //Neste ponto aqui ele atribui a canonica.second o valor de i++ + } + i = 1; + + for (this->it = this->redeSentencasImpressaoNormal.begin(); this->it != this->redeSentencasImpressaoNormal.end(); this->it++) + { + qlinha = QString::number(i)+" \""+QString(this->it->first.c_str())+" \"\n"; + qlinha=qlinha.toLower(); + qlinha=qlinha.toLocal8Bit(); + arqSaidaNormal.write(qlinha.toStdString().c_str()); + this->it->second = i++; //atribui a normal.second o valor de i++ + } + + //imprime "*edges" nos dois arquivos para simbolizar que a partir dali começa a parte das arestas + arqSaidaCan.write("*edges\n"); + arqSaidaNormal.write("*edges\n"); + + //laço responsavel por imprimir as arestas entre os vertices e seus pesos + for (this->it = this->canonicaArestas.begin(); this->it != this->canonicaArestas.end(); this->it++) + { + QString palavra(this->it->first.c_str()); //atribui a variavel palavra o valor de canonicaArestas.first que são pares de palavras em redeSentenças e individuais em rede de Palavras + QStringList lPalavra = palavra.split(" ");//Caso sejam pares de palavras ele divide ela quando achar um espaço + QString pal1=QString::number(this->redeSentencasImpressaoCanonica[palavra]);//pega o indice(rotulo do vertice) da primeira palavra + QString pal2=QString::number(this->redeSentencasImpressaoCanonica[palavra]);//pega o indice(rotulo do vertice) da segunda palavra + QString qlinha = pal1 + " " + pal2 + " " + + QString::number(this->it->second)+ "\n"; + arqSaidaCan.write(qlinha.toStdString().c_str());//imprime no arquivo o formato "tagVetice1 tagVertice2 Peso" + } + //Neste laço o mesmo processo é refeito, no entanto com as palavras normais + for (this->it = this->normalArestas.begin(); this->it != this->normalArestas.end(); this->it++) + { + QString palavra(this->it->first.c_str()); + QStringList lPalavra = palavra.split(" "); + QString pal1=QString::number(this->normal[lPalavra[0].toStdString()]); + QString pal2=QString::number(this->normal[lPalavra[1].toStdString()]); + QString qlinha = pal1 + " " + pal2 + " " + + QString::number(this->it->second)+ "\n"; + arqSaidaNormal.write(qlinha.toStdString().c_str()); + } + arqSaidaCan.close(); + arqSaidaNormal.close(); + +} +*/ diff --git a/tratatexto.h b/tratatexto.h new file mode 100755 index 0000000..3c01837 --- /dev/null +++ b/tratatexto.h @@ -0,0 +1,69 @@ +#ifndef TRATATEXTO_H +#define TRATATEXTO_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +typedef struct +{ +string palavra1; +string palavra2;// +string classe; +double prob; +} linha; + + +class trataTexto +{ +public: + trataTexto(); + int palTot, palDiffN, palDiffC; + map normal; + map canonica; + map vocabulario; + map normalArestas; + map canonicaArestas; + map::iterator it; + map pontuacao; + mapredeSentencasImpressaoCanonica; + mapredeSentencasImpressaoNormal; + bool abreArquivo(string); + bool abreArquivoTAG(string arquivo, QStringList tags); + bool abreArquivoFRQ(string arquivo); + bool abreArquivoCRE(string nome, QStringList tags,int passo,string arqCre); + bool abreArquivoCREP(string nome, QStringList tags,int passo,string arqCre); + bool abreArquivoCRES(string nome, QStringList tags,int passo,string arqCre); + bool abreArquivoCRED(string nome, QStringList tags,int passo,string arqCre); + void carregaFiltros(); + bool filtro(QStringList token); + + string GetStdoutFromCommand(string cmd); + void imprimeREdeSentenca(string nomeArq); + bool abreArquivoTAGFFR(string arquivo, QStringList tags, string arqFfr); + void preanalyze(QString base, char idioma, char tipoAnalise); + void estatisticaPalavras(QString); + bool redePalavras (string infArq, int janela); + void imprimeNet (string nomeArq); + bool redeSentencas (string infArq); + bool redeFrases (string infArq); + bool eFimSentenca(QString); + void lowerWord (QString nomeArqE, QString nomeArqS); + int palFuncionalT; + int palVocabularioT; + int palContenidoT; + double zipfpromedio; + void frequenciaPalavra (QString nomeArq); + void simpleSubs (QString nomeArqE, QString nomeArqS, QStringList regAlvo, QStringList regSubs); + QStringList filtroGram,filtroPala; +}; + +#endif // TRATATEXTO_H