From 1cad70e1fb772a95a0242eb2f61ff80419c2fd4b Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Fri, 19 May 2023 10:08:34 -0400 Subject: [PATCH 01/15] Start to standardize the dbs --- alembic/env.py | 5 +- ...a3b83_add_filename_column_to_attachment.py | 53 ---- ...8a836bedb3f_add_na_flag_to_notice_table.py | 54 ---- .../78823e9293e9_match_local_migrations.py | 167 ++++++++++++ manifest.yml | 4 +- sql/migrations/r11p0.sql | 87 +++++++ src/fbo_scraper/db/auto_db.py | 244 ++++++++++++++++++ 7 files changed, 505 insertions(+), 109 deletions(-) delete mode 100644 alembic/versions/3725519a3b83_add_filename_column_to_attachment.py delete mode 100644 alembic/versions/48a836bedb3f_add_na_flag_to_notice_table.py create mode 100644 alembic/versions/78823e9293e9_match_local_migrations.py create mode 100644 sql/migrations/r11p0.sql create mode 100644 src/fbo_scraper/db/auto_db.py diff --git a/alembic/env.py b/alembic/env.py index dfedb7b2..c4ba1f5d 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -22,7 +22,10 @@ config.set_main_option('sqlalchemy.url', get_db_url()) from fbo_scraper.db import db -target_metadata = db.Base.metadata +from fbo_scraper.db import auto_db + +#target_metadata = db.Base.metadata +target_metadata = auto_db.Base.metadata # other values from the config, defined by the needs of env.py, diff --git a/alembic/versions/3725519a3b83_add_filename_column_to_attachment.py b/alembic/versions/3725519a3b83_add_filename_column_to_attachment.py deleted file mode 100644 index b15a0310..00000000 --- a/alembic/versions/3725519a3b83_add_filename_column_to_attachment.py +++ /dev/null @@ -1,53 +0,0 @@ -"""add filename column to Attachment - -Revision ID: 3725519a3b83 -Revises: -Create Date: 2019-04-08 19:37:28.880047 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy import engine_from_config -from sqlalchemy.engine import reflection - - -def table_has_column(table, column): - """Check table has a column. Usefule before trying to add or drop the column - in an alembic migration file. - - Arguments: - table {str} -- name of the table - column {str} -- name of the column - - Returns: - [bool] -- True if the table has the column. False otherwise. - """ - config = op.get_context().config - engine = engine_from_config( - config.get_section(config.config_ini_section), prefix='sqlalchemy.') - insp = reflection.Inspector.from_engine(engine) - has_column = False - for col in insp.get_columns(table): - if column not in col['name']: - continue - has_column = True - - return has_column - - - -# revision identifiers, used by Alembic. -revision = '3725519a3b83' -down_revision = None -branch_labels = None -depends_on = None - - -def upgrade(): - if not alembic_helpers.table_has_column('attachment', 'filename'): - op.add_column('attachment', sa.Column('filename', sa.Text, nullable = True)) - - -def downgrade(): - if alembic_helpers.table_has_column('attachment', 'filename'): - op.drop_column('attachment', 'filename') diff --git a/alembic/versions/48a836bedb3f_add_na_flag_to_notice_table.py b/alembic/versions/48a836bedb3f_add_na_flag_to_notice_table.py deleted file mode 100644 index 40b0d4f6..00000000 --- a/alembic/versions/48a836bedb3f_add_na_flag_to_notice_table.py +++ /dev/null @@ -1,54 +0,0 @@ -"""add na flag to notice table - -Revision ID: 48a836bedb3f -Revises: 3725519a3b83 -Create Date: 2019-09-25 10:16:10.507488 - -""" -from alembic import op -import sqlalchemy as sa -from sqlalchemy import engine_from_config -from sqlalchemy.engine import reflection - - -def table_has_column(table, column): - """Check table has a column. Usefule before trying to add or drop the column - in an alembic migration file. - - Arguments: - table {str} -- name of the table - column {str} -- name of the column - - Returns: - [bool] -- True if the table has the column. False otherwise. - """ - config = op.get_context().config - engine = engine_from_config( - config.get_section(config.config_ini_section), prefix='sqlalchemy.') - insp = reflection.Inspector.from_engine(engine) - has_column = False - for col in insp.get_columns(table): - if column not in col['name']: - continue - has_column = True - - return has_column - -# revision identifiers, used by Alembic. -revision = '48a836bedb3f' -down_revision = '3725519a3b83' -branch_labels = None -depends_on = None - - -def upgrade(): - if alembic_helpers.table_has_column('notice', 'na_flag'): - op.drop_column('notice', 'na_flag') - op.add_column('notice', sa.Column('na_flag', - sa.Boolean, - default = False)) - - -def downgrade(): - op.drop_column('notice', 'na_flag') - diff --git a/alembic/versions/78823e9293e9_match_local_migrations.py b/alembic/versions/78823e9293e9_match_local_migrations.py new file mode 100644 index 00000000..79af82b7 --- /dev/null +++ b/alembic/versions/78823e9293e9_match_local_migrations.py @@ -0,0 +1,167 @@ +"""Match local migrations + +Revision ID: 78823e9293e9 +Revises: +Create Date: 2023-05-18 11:45:28.896923 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '78823e9293e9' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic### + + #Agencies + op.alter_column('Agencies', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True) + # PRedictions + op.add_column('Predictions', sa.Column('eitLikelihood', postgresql.JSONB(astext_type=sa.Text()), nullable=True)) + op.add_column('Predictions', sa.Column('active', sa.Boolean(), server_default=sa.text('true'), nullable=True)) + op.alter_column('Predictions', 'title', + existing_type=sa.VARCHAR(), + nullable=False) + op.alter_column('Predictions', 'solNum', + existing_type=sa.VARCHAR(), + nullable=False) + op.alter_column('Predictions', 'noticeType', + existing_type=sa.VARCHAR(), + nullable=False) + op.alter_column('Predictions', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + op.alter_column('Predictions', 'history', server_default=sa.text("'[]'::jsonb")) + + op.create_unique_constraint(None, 'Predictions', ['solNum']) + op.drop_column('Predictions', 'feedback') + op.drop_column('Predictions', 'category_list') + + # Surveys + op.alter_column('Surveys', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True) + + # Users + op.alter_column('Users', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True) + # Agency Alias + op.alter_column('agency_alias', 'agency_id', + existing_type=sa.INTEGER(), + nullable=False) + op.alter_column('agency_alias', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + server_default=sa.func.now(), + nullable=False) + op.alter_column('agency_alias', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True) + + + # Notice Type + op.add_column('notice_type', sa.Column('createdAt', sa.DateTime(), default=sa.func.now(), nullable=False)) + op.add_column('notice_type', sa.Column('updatedAt', sa.DateTime(), nullable=True, onupdate=sa.func.now())) + + # Survey Responses + op.alter_column('survey_responses', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False, + existing_server_default=sa.text('now()')) + op.create_index(op.f('ix_survey_responses_solNum'), 'survey_responses', ['solNum'], unique=False) + + # Survey Responses Archive + op.alter_column('survey_responses_archive', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + server_default=sa.func.now(), + nullable=False) + op.alter_column('survey_responses_archive', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=True) + op.alter_column('survey_responses_archive', 'response', server_default=sa.text("'[]'::jsonb")) + + + # Solcitations + op.create_unique_constraint(None, 'solicitations', ['solNum']) + op.alter_column('solicitations', 'history', server_default=sa.text("'[]'::jsonb")) + op.alter_column('solicitations', 'action', server_default=sa.text("'[]'::jsonb")) + op.alter_column('solicitations', 'predictions', server_default=sa.text("'{\"value\": \"red\", \"history\": []}'::jsonb")) + op.alter_column('solicitations', 'compliant', server_default=sa.text("0")) + op.alter_column('solicitations', 'active', server_default=sa.text("true")) + op.alter_column('solicitations', 'na_flag', server_default=sa.text("false")) + op.alter_column('solicitations', 'updateAt', nullable=True) + + # Attachment + op.alter_column('attachment', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + server_default=sa.func.now(), + nullable=False) + + # Notice + op.alter_column('notice', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + server_default=sa.func.now(), + nullable=False) + + + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('survey_responses_archive', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=False) + op.drop_index(op.f('ix_survey_responses_solNum'), table_name='survey_responses') + op.alter_column('survey_responses', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True, + existing_server_default=sa.text('now()')) + op.drop_constraint(None, 'solicitations', type_='unique') + op.drop_column('notice_type', 'updatedAt') + op.drop_column('notice_type', 'createdAt') + op.alter_column('attachment', 'filename', + existing_type=sa.TEXT(), + nullable=False) + op.alter_column('agency_alias', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + op.alter_column('agency_alias', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + op.alter_column('agency_alias', 'agency_id', + existing_type=sa.INTEGER(), + nullable=True) + op.alter_column('Users', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + op.alter_column('Surveys', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + op.add_column('Predictions', sa.Column('category_list', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True)) + op.add_column('Predictions', sa.Column('feedback', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True)) + op.drop_constraint(None, 'Predictions', type_='unique') + op.alter_column('Predictions', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True) + op.alter_column('Predictions', 'noticeType', + existing_type=sa.VARCHAR(), + nullable=True) + op.alter_column('Predictions', 'solNum', + existing_type=sa.VARCHAR(), + nullable=True) + op.alter_column('Predictions', 'title', + existing_type=sa.VARCHAR(), + nullable=True) + op.drop_column('Predictions', 'active') + op.drop_column('Predictions', 'eitLikelihood') + op.alter_column('Agencies', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + # ### end Alembic commands ### diff --git a/manifest.yml b/manifest.yml index cd37a319..fd01a792 100644 --- a/manifest.yml +++ b/manifest.yml @@ -1,7 +1,9 @@ --- applications: - name: srt-fbo-scraper - memory: 2GB + memory: 1GB disk_quota: 4GB no-route: true health-check-type: process + services: + - srt-postgres-dev \ No newline at end of file diff --git a/sql/migrations/r11p0.sql b/sql/migrations/r11p0.sql new file mode 100644 index 00000000..fef8495d --- /dev/null +++ b/sql/migrations/r11p0.sql @@ -0,0 +1,87 @@ +BEGIN; + +CREATE TABLE alembic_version ( + version_num VARCHAR(32) NOT NULL, + CONSTRAINT alembic_version_pkc PRIMARY KEY (version_num) +); + +-- Running upgrade -> 78823e9293e9 + +ALTER TABLE "Agencies" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE "Predictions" ADD COLUMN "eitLikelihood" JSONB; + +ALTER TABLE "Predictions" ADD COLUMN active BOOLEAN DEFAULT true; + +ALTER TABLE "Predictions" ALTER COLUMN title SET NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN "solNum" SET NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN "noticeType" SET NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN history SET DEFAULT '[]'::jsonb; + +ALTER TABLE "Predictions" ADD UNIQUE ("solNum"); + +ALTER TABLE "Predictions" DROP COLUMN feedback; + +ALTER TABLE "Predictions" DROP COLUMN category_list; + +ALTER TABLE "Surveys" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE "Users" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE agency_alias ALTER COLUMN agency_id SET NOT NULL; + +ALTER TABLE agency_alias ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE agency_alias ALTER COLUMN "createdAt" SET DEFAULT now(); + +ALTER TABLE agency_alias ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE notice_type ADD COLUMN "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL DEFAULT now(); + +ALTER TABLE notice_type ADD COLUMN "updatedAt" TIMESTAMP WITHOUT TIME ZONE; + +ALTER TABLE survey_responses ALTER COLUMN "createdAt" SET NOT NULL; + +CREATE INDEX "ix_survey_responses_solNum" ON survey_responses ("solNum"); + +ALTER TABLE survey_responses_archive ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE survey_responses_archive ALTER COLUMN "createdAt" SET DEFAULT now(); + +ALTER TABLE survey_responses_archive ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE survey_responses_archive ALTER COLUMN response SET DEFAULT '[]'::jsonb; + +ALTER TABLE solicitations ADD UNIQUE ("solNum"); + +ALTER TABLE solicitations ALTER COLUMN history SET DEFAULT '[]'::jsonb; + +UPDATE solicitations SET history = '[]'::jsonb WHERE history IS NULL; + +ALTER TABLE solicitations ALTER COLUMN action SET DEFAULT '[]'::jsonb; + +ALTER TABLE solicitations ALTER COLUMN predictions SET DEFAULT '{"value": "red", "history": []}'::jsonb; + +ALTER TABLE solicitations ALTER COLUMN compliant SET DEFAULT 0; + +ALTER TABLE solicitations ALTER COLUMN active SET DEFAULT true; + +ALTER TABLE solicitations ALTER COLUMN na_flag SET DEFAULT false; + +ALTER TABLE attachment ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE attachment ALTER COLUMN "createdAt" SET DEFAULT now(); + +ALTER TABLE notice ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE notice ALTER COLUMN "createdAt" SET DEFAULT now(); + +INSERT INTO alembic_version (version_num) VALUES ('78823e9293e9'); + +COMMIT; + diff --git a/src/fbo_scraper/db/auto_db.py b/src/fbo_scraper/db/auto_db.py new file mode 100644 index 00000000..afac4840 --- /dev/null +++ b/src/fbo_scraper/db/auto_db.py @@ -0,0 +1,244 @@ +# coding: utf-8 +from sqlalchemy import Boolean, Column, DateTime, Float, ForeignKey, Integer, String, Table, Text, text +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import relationship +from sqlalchemy.ext.declarative import declarative_base +from datetime import datetime +from sqlalchemy.sql import func + +Base = declarative_base() +metadata = Base.metadata + + +class Agency(Base): + __tablename__ = 'Agencies' + + id = Column(Integer, primary_key=True, server_default=text("nextval('\"Agencies_id_seq\"'::regclass)")) + agency = Column(String) + acronym = Column(String) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + + +class Prediction(Base): + __tablename__ = 'Predictions' + + id = Column(Integer, primary_key=True, server_default=text("nextval('\"Predictions_id_seq\"'::regclass)")) + title = Column(String, nullable=False) + url = Column(String) + agency = Column(String) + numDocs = Column(Integer) + solNum = Column(String, nullable=False, unique=True) + noticeType = Column(String, nullable=False) + date = Column(DateTime) + office = Column(String) + na_flag = Column(Boolean) + eitLikelihood = Column(JSONB(astext_type=Text())) + undetermined = Column(Boolean) + action = Column(JSONB(astext_type=Text())) + actionStatus = Column(String) + actionDate = Column(DateTime) + history = Column(JSONB(astext_type=Text())) + contactInfo = Column(JSONB(astext_type=Text())) + parseStatus = Column(JSONB(astext_type=Text())) + predictions = Column(JSONB(astext_type=Text())) + reviewRec = Column(String) + searchText = Column(String) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + active = Column(Boolean, server_default=text("true")) + + +class SequelizeMeta(Base): + __tablename__ = 'SequelizeMeta' + + name = Column(String(255), primary_key=True) + + +class Survey(Base): + __tablename__ = 'Surveys' + + id = Column(Integer, primary_key=True, server_default=text("nextval('\"Surveys_id_seq\"'::regclass)")) + question = Column(Text) + choices = Column(JSONB(astext_type=Text())) + section = Column(String(2000)) + type = Column(String(2000)) + answer = Column(Text) + note = Column(Text) + choicesNote = Column(JSONB(astext_type=Text())) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + + +class User(Base): + __tablename__ = 'Users' + + id = Column(Integer, primary_key=True, server_default=text("nextval('\"Users_id_seq\"'::regclass)")) + firstName = Column(String) + lastName = Column(String) + agency = Column(String) + email = Column(String) + password = Column(String) + position = Column(String) + isAccepted = Column(Boolean) + isRejected = Column(Boolean) + userRole = Column(String) + rejectionNote = Column(String) + creationDate = Column(String) + tempPassword = Column(String) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + maxId = Column(String(256)) + + +class AgencyAlias(Base): + __tablename__ = 'agency_alias' + + id = Column(Integer, primary_key=True, server_default=text("nextval('agency_alias_id_seq'::regclass)")) + agency_id = Column(Integer, nullable=False) + alias = Column(String) + createdAt = Column(DateTime) + updatedAt = Column(DateTime) + + +class Model(Base): + __tablename__ = 'model' + + id = Column(Integer, primary_key=True, server_default=text("nextval('model_id_seq'::regclass)")) + results = Column(JSONB(astext_type=Text())) + params = Column(JSONB(astext_type=Text())) + score = Column(Float(53)) + create_date = Column(DateTime, nullable=False) + + +class NoticeType(Base): + __tablename__ = 'notice_type' + + id = Column(Integer, primary_key=True, server_default=text("nextval('notice_type_id_seq'::regclass)")) + notice_type = Column(String(50), index=True) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + + +class Solicitation(Base): + __tablename__ = 'solicitations' + + id = Column(Integer, primary_key=True, server_default=text("nextval('solicitations_id_seq'::regclass)")) + solNum = Column(String, unique=True) + active = Column(Boolean, server_default=text("true")) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + title = Column(String) + url = Column(String) + agency = Column(String) + numDocs = Column(Integer) + notice_type_id = Column(Integer) + noticeType = Column(String) + date = Column(DateTime) + office = Column(String) + na_flag = Column(Boolean, server_default=text("false")) + category_list = Column(JSONB(astext_type=Text())) + undetermined = Column(Boolean) + history = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) + action = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) + actionDate = Column(DateTime) + actionStatus = Column(String) + contactInfo = Column(JSONB(astext_type=Text())) + parseStatus = Column(JSONB(astext_type=Text())) + predictions = Column(JSONB(astext_type=Text()), server_default=text("'{\"value\": \"red\", \"history\": []}'::jsonb")) + reviewRec = Column(String) + searchText = Column(String) + compliant = Column(Integer, server_default=text("0")) + noticeData = Column(JSONB(astext_type=Text())) + agency_id = Column(Integer) + + +t_survey_backup = Table( + 'survey_backup', metadata, + Column('id', Integer), + Column('question', Text), + Column('choices', JSONB(astext_type=Text())), + Column('section', String(2000)), + Column('type', String(2000)), + Column('answer', Text), + Column('note', Text), + Column('choicesNote', JSONB(astext_type=Text())), + Column('createdAt', DateTime), + Column('updatedAt', DateTime) +) + + +class SurveyResponse(Base): + __tablename__ = 'survey_responses' + + id = Column(Integer, primary_key=True, server_default=text("nextval('survey_responses_id_seq'::regclass)")) + solNum = Column(String, index=True) + contemporary_notice_id = Column(Integer) + response = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) + maxId = Column(String(256)) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + + +class SurveyResponsesArchive(Base): + __tablename__ = 'survey_responses_archive' + + id = Column(Integer, primary_key=True, server_default=text("nextval('survey_responses_archive_id_seq'::regclass)")) + solNum = Column(String) + contemporary_notice_id = Column(Integer) + response = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) + maxId = Column(String(256)) + original_created_at = Column(DateTime, server_default=text("CURRENT_TIMESTAMP")) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + + +t_winston_logs = Table( + 'winston_logs', metadata, + Column('timestamp', DateTime(True)), + Column('level', String(255)), + Column('message', Text), + Column('meta', JSONB(astext_type=Text())) +) + + +class Attachment(Base): + __tablename__ = 'attachment' + + id = Column(Integer, primary_key=True, server_default=text("nextval('attachment_id_seq'::regclass)")) + notice_id = Column(Integer) + notice_type_id = Column(ForeignKey('notice_type.id')) + machine_readable = Column(Boolean) + attachment_text = Column(Text) + prediction = Column(Integer) + decision_boundary = Column(Float(53)) + validation = Column(Integer) + attachment_url = Column(Text) + trained = Column(Boolean) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + filename = Column(Text, nullable=False) + solicitation_id = Column(ForeignKey('solicitations.id')) + + notice_type = relationship('NoticeType') + solicitation = relationship('Solicitation') + + +class Notice(Base): + __tablename__ = 'notice' + + id = Column(Integer, primary_key=True, server_default=text("nextval('notice_id_seq'::regclass)")) + notice_type_id = Column(ForeignKey('notice_type.id')) + solicitation_number = Column(String(150), index=True) + agency = Column(String(150)) + date = Column(DateTime) + notice_data = Column(JSONB(astext_type=Text())) + compliant = Column(Integer) + feedback = Column(JSONB(astext_type=Text())) + history = Column(JSONB(astext_type=Text())) + action = Column(JSONB(astext_type=Text())) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + na_flag = Column(Boolean, default=False) + + notice_type = relationship('NoticeType') From 8270574ae9830a49a7b4cc6a3489b21eaabbf5cc Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Fri, 19 May 2023 11:36:27 -0400 Subject: [PATCH 02/15] staging pg migrations --- .../78823e9293e9_match_local_migrations.py | 0 ...c9b_staging_migration_to_match_standard.py | 232 ++++++++++++++++++ manifest.staging.yml | 9 + .../{r11p0.sql => dev/s11p0_dev.sql} | 0 sql/migrations/staging/s11p0_staging.sql | 205 ++++++++++++++++ 5 files changed, 446 insertions(+) rename alembic/{versions => dev}/78823e9293e9_match_local_migrations.py (100%) create mode 100644 alembic/staging/b0cbeeb30c9b_staging_migration_to_match_standard.py create mode 100644 manifest.staging.yml rename sql/migrations/{r11p0.sql => dev/s11p0_dev.sql} (100%) create mode 100644 sql/migrations/staging/s11p0_staging.sql diff --git a/alembic/versions/78823e9293e9_match_local_migrations.py b/alembic/dev/78823e9293e9_match_local_migrations.py similarity index 100% rename from alembic/versions/78823e9293e9_match_local_migrations.py rename to alembic/dev/78823e9293e9_match_local_migrations.py diff --git a/alembic/staging/b0cbeeb30c9b_staging_migration_to_match_standard.py b/alembic/staging/b0cbeeb30c9b_staging_migration_to_match_standard.py new file mode 100644 index 00000000..6507bbb1 --- /dev/null +++ b/alembic/staging/b0cbeeb30c9b_staging_migration_to_match_standard.py @@ -0,0 +1,232 @@ +"""Staging migration to match standard + +Revision ID: b0cbeeb30c9b +Revises: +Create Date: 2023-05-19 10:52:34.529822 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'b0cbeeb30c9b' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + from sqlalchemy.schema import Sequence, CreateSequence + + op.execute(CreateSequence(Sequence('Predictions_id_seq'))) + op.create_table('Predictions', + sa.Column('id', sa.Integer(), server_default=sa.text('nextval(\'"Predictions_id_seq"\'::regclass)'), nullable=False), + sa.Column('title', sa.String(), nullable=False), + sa.Column('url', sa.String(), nullable=True), + sa.Column('agency', sa.String(), nullable=True), + sa.Column('numDocs', sa.Integer(), nullable=True), + sa.Column('solNum', sa.String(), nullable=False), + sa.Column('noticeType', sa.String(), nullable=False), + sa.Column('date', sa.DateTime(), nullable=True), + sa.Column('office', sa.String(), nullable=True), + sa.Column('na_flag', sa.Boolean(), nullable=True), + sa.Column('eitLikelihood', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('undetermined', sa.Boolean(), nullable=True), + sa.Column('action', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('actionStatus', sa.String(), nullable=True), + sa.Column('actionDate', sa.DateTime(), nullable=True), + sa.Column('history', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('contactInfo', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('parseStatus', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('predictions', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('reviewRec', sa.String(), nullable=True), + sa.Column('searchText', sa.String(), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=False), + sa.Column('updatedAt', sa.DateTime(), nullable=True), + sa.Column('active', sa.Boolean(), server_default=sa.text('true'), nullable=True), + sa.PrimaryKeyConstraint('id'), + sa.UniqueConstraint('solNum') + ) + + + op.execute(CreateSequence(Sequence('Surveys_id_seq'))) + op.create_table('Surveys', + sa.Column('id', sa.Integer(), server_default=sa.text('nextval(\'"Surveys_id_seq"\'::regclass)'), nullable=False), + sa.Column('question', sa.Text(), nullable=True), + sa.Column('choices', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('section', sa.String(length=2000), nullable=True), + sa.Column('type', sa.String(length=2000), nullable=True), + sa.Column('answer', sa.Text(), nullable=True), + sa.Column('note', sa.Text(), nullable=True), + sa.Column('choicesNote', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=False), + sa.Column('updatedAt', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + + op.execute(CreateSequence(Sequence('agency_alias_id_seq'))) + op.create_table('agency_alias', + sa.Column('id', sa.Integer(), server_default=sa.text("nextval('agency_alias_id_seq'::regclass)"), nullable=False), + sa.Column('agency_id', sa.Integer(), nullable=False), + sa.Column('alias', sa.String(), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=True), + sa.Column('updatedAt', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + + op.execute(CreateSequence(Sequence('model_id_seq'))) + op.create_table('model', + sa.Column('id', sa.Integer(), server_default=sa.text("nextval('model_id_seq'::regclass)"), nullable=False), + sa.Column('results', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('params', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('score', sa.Float(precision=53), nullable=True), + sa.Column('create_date', sa.DateTime(), nullable=False), + sa.PrimaryKeyConstraint('id') + ) + + op.execute(CreateSequence(Sequence('notice_type_id_seq'))) + op.create_table('notice_type', + sa.Column('id', sa.Integer(), server_default=sa.text("nextval('notice_type_id_seq'::regclass)"), nullable=False), + sa.Column('notice_type', sa.String(length=50), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=False), + sa.Column('updatedAt', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_notice_type_notice_type'), 'notice_type', ['notice_type'], unique=False) + op.create_table('survey_backup', + sa.Column('id', sa.Integer(), nullable=True, primary_key=True), + sa.Column('question', sa.Text(), nullable=True), + sa.Column('choices', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('section', sa.String(length=2000), nullable=True), + sa.Column('type', sa.String(length=2000), nullable=True), + sa.Column('answer', sa.Text(), nullable=True), + sa.Column('note', sa.Text(), nullable=True), + sa.Column('choicesNote', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=True), + sa.Column('updatedAt', sa.DateTime(), nullable=True) + ) + + op.execute(CreateSequence(Sequence('survey_responses_id_seq'))) + op.create_table('survey_responses', + sa.Column('id', sa.Integer(), server_default=sa.text("nextval('survey_responses_id_seq'::regclass)"), nullable=False), + sa.Column('solNum', sa.String(), nullable=True), + sa.Column('contemporary_notice_id', sa.Integer(), nullable=True), + sa.Column('response', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'[]'::jsonb"), nullable=True), + sa.Column('maxId', sa.String(length=256), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=False), + sa.Column('updatedAt', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + + op.create_index(op.f('ix_survey_responses_solNum'), 'survey_responses', ['solNum'], unique=False) + + op.execute(CreateSequence(Sequence('survey_responses_archive_id_seq'))) + op.create_table('survey_responses_archive', + sa.Column('id', sa.Integer(), server_default=sa.text("nextval('survey_responses_archive_id_seq'::regclass)"), nullable=False), + sa.Column('solNum', sa.String(), nullable=True), + sa.Column('contemporary_notice_id', sa.Integer(), nullable=True), + sa.Column('response', postgresql.JSONB(astext_type=sa.Text()), server_default=sa.text("'[]'::jsonb"), nullable=True), + sa.Column('maxId', sa.String(length=256), nullable=True), + sa.Column('original_created_at', sa.DateTime(), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=False), + sa.Column('updatedAt', sa.DateTime(), nullable=True), + sa.PrimaryKeyConstraint('id') + ) + op.create_table('winston_logs', + sa.Column('timestamp', sa.DateTime(timezone=True), nullable=True), + sa.Column('level', sa.String(length=255), nullable=True), + sa.Column('message', sa.Text(), nullable=True), + sa.Column('meta', postgresql.JSONB(astext_type=sa.Text()), nullable=True) + ) + + op.execute(CreateSequence(Sequence('attachment_id_seq'))) + op.create_table('attachment', + sa.Column('id', sa.Integer(), server_default=sa.text("nextval('attachment_id_seq'::regclass)"), nullable=False), + sa.Column('notice_id', sa.Integer(), nullable=True), + sa.Column('notice_type_id', sa.Integer(), nullable=True), + sa.Column('machine_readable', sa.Boolean(), nullable=True), + sa.Column('attachment_text', sa.Text(), nullable=True), + sa.Column('prediction', sa.Integer(), nullable=True), + sa.Column('decision_boundary', sa.Float(precision=53), nullable=True), + sa.Column('validation', sa.Integer(), nullable=True), + sa.Column('attachment_url', sa.Text(), nullable=True), + sa.Column('trained', sa.Boolean(), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=False), + sa.Column('updatedAt', sa.DateTime(), nullable=True), + sa.Column('filename', sa.Text(), nullable=False), + sa.Column('solicitation_id', sa.Integer(), nullable=True), + sa.ForeignKeyConstraint(['notice_type_id'], ['notice_type.id'], ), + sa.ForeignKeyConstraint(['solicitation_id'], ['solicitations.id'], ), + sa.PrimaryKeyConstraint('id') + ) + + op.execute(CreateSequence(Sequence('notice_id_seq'))) + op.create_table('notice', + sa.Column('id', sa.Integer(), server_default=sa.text("nextval('notice_id_seq'::regclass)"), nullable=False), + sa.Column('notice_type_id', sa.Integer(), nullable=True), + sa.Column('solicitation_number', sa.String(length=150), nullable=True), + sa.Column('agency', sa.String(length=150), nullable=True), + sa.Column('date', sa.DateTime(), nullable=True), + sa.Column('notice_data', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('compliant', sa.Integer(), nullable=True), + sa.Column('feedback', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('history', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('action', postgresql.JSONB(astext_type=sa.Text()), nullable=True), + sa.Column('createdAt', sa.DateTime(), nullable=False), + sa.Column('updatedAt', sa.DateTime(), nullable=True), + sa.Column('na_flag', sa.Boolean(), nullable=True), + sa.ForeignKeyConstraint(['notice_type_id'], ['notice_type.id'], ), + sa.PrimaryKeyConstraint('id') + ) + op.create_index(op.f('ix_notice_solicitation_number'), 'notice', ['solicitation_number'], unique=False) + op.alter_column('Agencies', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=True) + op.add_column('Users', sa.Column('maxId', sa.String(length=256), nullable=True)) + op.alter_column('Users', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=True) + op.create_unique_constraint(None, 'solicitations', ['solNum']) + + + # Solcitations + op.create_unique_constraint(None, 'solicitations', ['solNum']) + op.alter_column('solicitations', 'history', server_default=sa.text("'[]'::jsonb")) + op.alter_column('solicitations', 'action', server_default=sa.text("'[]'::jsonb")) + op.alter_column('solicitations', 'predictions', server_default=sa.text("'{\"value\": \"red\", \"history\": []}'::jsonb")) + op.alter_column('solicitations', 'compliant', server_default=sa.text("0")) + op.alter_column('solicitations', 'active', server_default=sa.text("true")) + op.alter_column('solicitations', 'na_flag', server_default=sa.text("false")) + op.alter_column('solicitations', 'updatedAt', nullable=True) + + + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_constraint(None, 'solicitations', type_='unique') + op.alter_column('Users', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=False) + op.drop_column('Users', 'maxId') + op.alter_column('Agencies', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=False) + op.drop_index(op.f('ix_notice_solicitation_number'), table_name='notice') + op.drop_table('notice') + op.drop_table('attachment') + op.drop_table('winston_logs') + op.drop_table('survey_responses_archive') + op.drop_index(op.f('ix_survey_responses_solNum'), table_name='survey_responses') + op.drop_table('survey_responses') + op.drop_table('survey_backup') + op.drop_index(op.f('ix_notice_type_notice_type'), table_name='notice_type') + op.drop_table('notice_type') + op.drop_table('model') + op.drop_table('agency_alias') + op.drop_table('Surveys') + op.drop_table('Predictions') + # ### end Alembic commands ### diff --git a/manifest.staging.yml b/manifest.staging.yml new file mode 100644 index 00000000..536798cb --- /dev/null +++ b/manifest.staging.yml @@ -0,0 +1,9 @@ +--- +applications: +- name: srt-fbo-scraper-staging + memory: 1GB + disk_quota: 4GB + no-route: true + health-check-type: process + services: + - srt-postgres-staging \ No newline at end of file diff --git a/sql/migrations/r11p0.sql b/sql/migrations/dev/s11p0_dev.sql similarity index 100% rename from sql/migrations/r11p0.sql rename to sql/migrations/dev/s11p0_dev.sql diff --git a/sql/migrations/staging/s11p0_staging.sql b/sql/migrations/staging/s11p0_staging.sql new file mode 100644 index 00000000..dd6cb5d9 --- /dev/null +++ b/sql/migrations/staging/s11p0_staging.sql @@ -0,0 +1,205 @@ +BEGIN; + +-- Running upgrade -> b0cbeeb30c9b + +CREATE SEQUENCE "Predictions_id_seq"; + +CREATE TABLE "Predictions" ( + id INTEGER DEFAULT nextval('"Predictions_id_seq"'::regclass) NOT NULL, + title VARCHAR NOT NULL, + url VARCHAR, + agency VARCHAR, + "numDocs" INTEGER, + "solNum" VARCHAR NOT NULL, + "noticeType" VARCHAR NOT NULL, + date TIMESTAMP WITHOUT TIME ZONE, + office VARCHAR, + na_flag BOOLEAN, + "eitLikelihood" JSONB, + undetermined BOOLEAN, + action JSONB, + "actionStatus" VARCHAR, + "actionDate" TIMESTAMP WITHOUT TIME ZONE, + history JSONB, + "contactInfo" JSONB, + "parseStatus" JSONB, + predictions JSONB, + "reviewRec" VARCHAR, + "searchText" VARCHAR, + "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + active BOOLEAN DEFAULT true, + PRIMARY KEY (id), + UNIQUE ("solNum") +); + +CREATE SEQUENCE "Surveys_id_seq"; + +CREATE TABLE "Surveys" ( + id INTEGER DEFAULT nextval('"Surveys_id_seq"'::regclass) NOT NULL, + question TEXT, + choices JSONB, + section VARCHAR(2000), + type VARCHAR(2000), + answer TEXT, + note TEXT, + "choicesNote" JSONB, + "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + PRIMARY KEY (id) +); + +CREATE SEQUENCE agency_alias_id_seq; + +CREATE TABLE agency_alias ( + id INTEGER DEFAULT nextval('agency_alias_id_seq'::regclass) NOT NULL, + agency_id INTEGER NOT NULL, + alias VARCHAR, + "createdAt" TIMESTAMP WITHOUT TIME ZONE, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + PRIMARY KEY (id) +); + +CREATE SEQUENCE model_id_seq; + +CREATE TABLE model ( + id INTEGER DEFAULT nextval('model_id_seq'::regclass) NOT NULL, + results JSONB, + params JSONB, + score FLOAT(53), + create_date TIMESTAMP WITHOUT TIME ZONE NOT NULL, + PRIMARY KEY (id) +); + +CREATE SEQUENCE notice_type_id_seq; + +CREATE TABLE notice_type ( + id INTEGER DEFAULT nextval('notice_type_id_seq'::regclass) NOT NULL, + notice_type VARCHAR(50), + "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + PRIMARY KEY (id) +); + +CREATE INDEX ix_notice_type_notice_type ON notice_type (notice_type); + +CREATE TABLE survey_backup ( + id SERIAL, + question TEXT, + choices JSONB, + section VARCHAR(2000), + type VARCHAR(2000), + answer TEXT, + note TEXT, + "choicesNote" JSONB, + "createdAt" TIMESTAMP WITHOUT TIME ZONE, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + PRIMARY KEY (id) +); + +CREATE SEQUENCE survey_responses_id_seq; + +CREATE TABLE survey_responses ( + id INTEGER DEFAULT nextval('survey_responses_id_seq'::regclass) NOT NULL, + "solNum" VARCHAR, + contemporary_notice_id INTEGER, + response JSONB DEFAULT '[]'::jsonb, + "maxId" VARCHAR(256), + "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + PRIMARY KEY (id) +); + +CREATE INDEX "ix_survey_responses_solNum" ON survey_responses ("solNum"); + +CREATE SEQUENCE survey_responses_archive_id_seq; + +CREATE TABLE survey_responses_archive ( + id INTEGER DEFAULT nextval('survey_responses_archive_id_seq'::regclass) NOT NULL, + "solNum" VARCHAR, + contemporary_notice_id INTEGER, + response JSONB DEFAULT '[]'::jsonb, + "maxId" VARCHAR(256), + original_created_at TIMESTAMP WITHOUT TIME ZONE DEFAULT CURRENT_TIMESTAMP, + "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + PRIMARY KEY (id) +); + +CREATE TABLE winston_logs ( + timestamp TIMESTAMP WITH TIME ZONE, + level VARCHAR(255), + message TEXT, + meta JSONB +); + +CREATE SEQUENCE attachment_id_seq; + +CREATE TABLE attachment ( + id INTEGER DEFAULT nextval('attachment_id_seq'::regclass) NOT NULL, + notice_id INTEGER, + notice_type_id INTEGER, + machine_readable BOOLEAN, + attachment_text TEXT, + prediction INTEGER, + decision_boundary FLOAT(53), + validation INTEGER, + attachment_url TEXT, + trained BOOLEAN, + "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + filename TEXT NOT NULL, + solicitation_id INTEGER, + PRIMARY KEY (id), + FOREIGN KEY(notice_type_id) REFERENCES notice_type (id), + FOREIGN KEY(solicitation_id) REFERENCES solicitations (id) +); + +CREATE SEQUENCE notice_id_seq; + +CREATE TABLE notice ( + id INTEGER DEFAULT nextval('notice_id_seq'::regclass) NOT NULL, + notice_type_id INTEGER, + solicitation_number VARCHAR(150), + agency VARCHAR(150), + date TIMESTAMP WITHOUT TIME ZONE, + notice_data JSONB, + compliant INTEGER, + feedback JSONB, + history JSONB, + action JSONB, + "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL, + "updatedAt" TIMESTAMP WITHOUT TIME ZONE, + na_flag BOOLEAN, + PRIMARY KEY (id), + FOREIGN KEY(notice_type_id) REFERENCES notice_type (id) +); + +CREATE INDEX ix_notice_solicitation_number ON notice (solicitation_number); + +ALTER TABLE "Agencies" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE "Users" ADD COLUMN "maxId" VARCHAR(256); + +ALTER TABLE "Users" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE solicitations ADD UNIQUE ("solNum"); + +ALTER TABLE solicitations ADD UNIQUE ("solNum"); + +ALTER TABLE solicitations ALTER COLUMN history SET DEFAULT '[]'::jsonb; + +ALTER TABLE solicitations ALTER COLUMN action SET DEFAULT '[]'::jsonb; + +ALTER TABLE solicitations ALTER COLUMN predictions SET DEFAULT '{"value": "red", "history": []}'::jsonb; + +ALTER TABLE solicitations ALTER COLUMN compliant SET DEFAULT 0; + +ALTER TABLE solicitations ALTER COLUMN active SET DEFAULT true; + +ALTER TABLE solicitations ALTER COLUMN na_flag SET DEFAULT false; + +ALTER TABLE solicitations ALTER COLUMN "updatedAt" DROP NOT NULL; + +COMMIT; + From c845e775569a2b9a57f60fdc8af67b253d908750 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Fri, 19 May 2023 13:24:45 -0400 Subject: [PATCH 03/15] Needed to alter a few columns to match dev. --- sql/migrations/staging/s11p1_defaults.sql | 96 +++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 sql/migrations/staging/s11p1_defaults.sql diff --git a/sql/migrations/staging/s11p1_defaults.sql b/sql/migrations/staging/s11p1_defaults.sql new file mode 100644 index 00000000..6b4cef59 --- /dev/null +++ b/sql/migrations/staging/s11p1_defaults.sql @@ -0,0 +1,96 @@ +ALTER TABLE "Agencies" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE "Predictions" ADD COLUMN "eitLikelihood" JSONB; + +ALTER TABLE "Predictions" ADD COLUMN active BOOLEAN DEFAULT true; + +ALTER TABLE "Predictions" ALTER COLUMN title SET NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN "solNum" SET NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN "noticeType" SET NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN history SET DEFAULT '[]'::jsonb; + +ALTER TABLE "Predictions" ADD UNIQUE ("solNum"); + +ALTER TABLE "Predictions" DROP COLUMN feedback; + +ALTER TABLE "Predictions" DROP COLUMN category_list; + +ALTER TABLE "Surveys" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE "Users" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE agency_alias ALTER COLUMN agency_id SET NOT NULL; + +ALTER TABLE agency_alias ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE agency_alias ALTER COLUMN "createdAt" SET DEFAULT now(); + +ALTER TABLE agency_alias ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE notice_type ADD COLUMN "createdAt" TIMESTAMP WITHOUT TIME ZONE NOT NULL DEFAULT now(); + +ALTER TABLE notice_type ADD COLUMN "updatedAt" TIMESTAMP WITHOUT TIME ZONE; + +ALTER TABLE survey_responses ALTER COLUMN "createdAt" SET NOT NULL; + +CREATE INDEX "ix_survey_responses_solNum" ON survey_responses ("solNum"); + +ALTER TABLE survey_responses_archive ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE survey_responses_archive ALTER COLUMN "createdAt" SET DEFAULT now(); + +ALTER TABLE survey_responses_archive ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE survey_responses_archive ALTER COLUMN response SET DEFAULT '[]'::jsonb; + +ALTER TABLE solicitations ALTER COLUMN "createdAt" SET DEFAULT now(); + +ALTER TABLE solicitations ADD UNIQUE ("solNum"); + +ALTER TABLE solicitations ALTER COLUMN history SET DEFAULT '[]'::jsonb; + +UPDATE solicitations SET history = '[]'::jsonb WHERE history IS NULL; + +ALTER TABLE solicitations ALTER COLUMN action SET DEFAULT '[]'::jsonb; + +ALTER TABLE solicitations ALTER COLUMN predictions SET DEFAULT '{"value": "red", "history": []}'::jsonb; + +ALTER TABLE solicitations ALTER COLUMN compliant SET DEFAULT 0; + +ALTER TABLE solicitations ALTER COLUMN active SET DEFAULT true; + +ALTER TABLE solicitations ALTER COLUMN na_flag SET DEFAULT false; + +ALTER TABLE solicitations ALTER COLUMN title type character varying; + +ALTER TABLE solicitations ALTER COLUMN url type character varying; + +ALTER TABLE solicitations ALTER COLUMN agency type character varying; + +ALTER TABLE solicitations ALTER COLUMN "noticeType" type character varying; + +ALTER TABLE solicitations ALTER COLUMN office type character varying; + +ALTER TABLE solicitations ALTER COLUMN "actionStatus" type character varying; + +ALTER TABLE solicitations ALTER COLUMN "reviewRec" type character varying; + +ALTER TABLE solicitations ALTER COLUMN "searchText" type character varying; + +ALTER TABLE attachment ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE attachment ALTER COLUMN "createdAt" SET DEFAULT now(); + +ALTER TABLE notice ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE notice ALTER COLUMN "createdAt" SET DEFAULT now(); + +ALTER TABLE notice_type ALTER COLUMN "createdAt" SET DEFAULT now(); + + + From 24d3b7e1ce49e6bb3586e54878990e63d9f787b1 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Mon, 22 May 2023 14:20:07 -0400 Subject: [PATCH 04/15] Updating the db.py to be inline with srt db --- alembic/env.py | 4 +- src/fbo_scraper/db/db.py | 131 +++++++++++++++++++++++++++------------ 2 files changed, 94 insertions(+), 41 deletions(-) diff --git a/alembic/env.py b/alembic/env.py index c4ba1f5d..69408561 100644 --- a/alembic/env.py +++ b/alembic/env.py @@ -22,10 +22,8 @@ config.set_main_option('sqlalchemy.url', get_db_url()) from fbo_scraper.db import db -from fbo_scraper.db import auto_db -#target_metadata = db.Base.metadata -target_metadata = auto_db.Base.metadata +target_metadata = db.Base.metadata # other values from the config, defined by the needs of env.py, diff --git a/src/fbo_scraper/db/db.py b/src/fbo_scraper/db/db.py index ecccb9b0..276e4f15 100644 --- a/src/fbo_scraper/db/db.py +++ b/src/fbo_scraper/db/db.py @@ -2,10 +2,10 @@ from sqlalchemy.ext.declarative import declarative_base from sqlalchemy import Column, Integer, String, ForeignKey, Table, Text, \ - DateTime, Boolean, Float, MetaData, inspect + DateTime, Boolean, Float, MetaData, inspect, text from sqlalchemy.orm import relationship from sqlalchemy.dialects.postgresql import JSONB, ARRAY - +from sqlalchemy.sql import func def now_minus_two(): @@ -39,14 +39,16 @@ class Notice(Base): feedback = Column(JSONB) history = Column(JSONB) action = Column(JSONB) - createdAt = Column(DateTime, nullable = False, default = datetime.datetime.utcnow) - updatedAt = Column(DateTime, nullable = True) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, nullable = True, onupdate=func.now()) na_flag = Column(Boolean, default = False) class NoticeType(Base): __tablename__ = 'notice_type' id = Column(Integer, primary_key = True) notice_type = Column(String(50), index = True) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) class Attachment(Base): __tablename__ = 'attachment' @@ -55,16 +57,18 @@ class Attachment(Base): notice_type_id = Column(Integer, ForeignKey('notice_type.id')) filename = Column(Text, nullable = False) machine_readable = Column(Boolean) - attachment_text = Column(Text, nullable = True) - prediction = Column(Integer, nullable = True) - decision_boundary = Column(Float, nullable = True) - validation = Column(Integer, nullable = True) + attachment_text = Column(Text) + prediction = Column(Integer) + decision_boundary = Column(Float(53)) + validation = Column(Integer) attachment_url = Column(Text) - trained = Column(Boolean, nullable = True) - createdAt = Column(DateTime, nullable = False, default=datetime.datetime.utcnow) - updatedAt = Column(DateTime, nullable = True) + trained = Column(Boolean) + createdAt = Column(DateTime, nullable = False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) solicitation_id = Column(Integer, ForeignKey('solicitations.id')) - solicitaiton = relationship("Solicitation", back_populates = "attachments") + + notice_type = relationship('NoticeType') + solicitation = relationship("Solicitation", back_populates = "attachments") class Model(Base): __tablename__ = 'model' @@ -89,25 +93,26 @@ class Users(Base): rejectionNote = Column(String) creationDate = Column(String) tempPassword = Column(String) - createdAt = Column(DateTime, nullable = False, default = datetime.datetime.utcnow) - updatedAt = Column(DateTime, nullable = True) - maxId = Column(String) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + maxId = Column(String(256)) class Agencies(Base): __tablename__ = 'Agencies' id = Column(Integer, primary_key = True) agency = Column(String) acronym = Column(String) - createdAt = Column(DateTime, nullable = False, default = datetime.datetime.utcnow) - updatedAt = Column(DateTime, nullable = False) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) class AgencyAlias(Base): __tablename__ = "agency_alias" id = Column(Integer, primary_key = True) agency_id = Column(Integer) alias = Column(String) - createdAt = Column(DateTime, nullable = False, default = datetime.datetime.utcnow) - updatedAt = Column(DateTime, nullable = False) + createdAt = Column(DateTime, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + class Surveys(Base): __tablename__ = 'Surveys' @@ -119,65 +124,115 @@ class Surveys(Base): answer = Column(Text) note = Column(Text) choicesNote = Column(JSONB) - createdAt = Column(DateTime, nullable = False, default = datetime.datetime.utcnow) - updatedAt = Column(DateTime, nullable = True) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) class Predictions(Base): __tablename__ = 'Predictions' id = Column(Integer, primary_key=True) - title = Column(String) + title = Column(String, nullable=False) url = Column(String) agency = Column(String) numDocs = Column(Integer) - solNum = Column(String) + solNum = Column(String, nullable=False, unique=True) noticeType = Column(String) date = Column(DateTime) office = Column(String) na_flag = Column(Boolean) - category_list = Column(JSONB) + eitLikelihood = Column(JSONB) undetermined = Column(Boolean) action = Column(JSONB) actionStatus= Column(String) actionDate = Column(DateTime) - feedback = Column(JSONB) history = Column(JSONB) contactInfo = Column(JSONB) parseStatus = Column(JSONB) predictions = Column(JSONB) reviewRec = Column(String) searchText = Column(String) - createdAt = Column(DateTime) - updatedAt = Column(DateTime) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + active = Column(Boolean, server_default=text("true")) class Solicitation(Base): __tablename__ = 'solicitations' id = Column(Integer, primary_key=True) - solNum = Column(String) # TODO: nullable = False?? - active = Column(Boolean) - createdAt = Column(DateTime, nullable = False, default = datetime.datetime.utcnow) - updatedAt = Column(DateTime, nullable = True) + solNum = Column(String, nullable=False, unique=True) + active = Column(Boolean, server_default=text("true")) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) title = Column(String) url = Column(String) agency = Column(String) agency_id = Column(Integer) numDocs = Column(Integer) - noticeData = Column(JSONB) notice_type_id = Column(Integer) noticeType = Column(String) date = Column(DateTime) office = Column(String) - predictions = Column(JSONB) - na_flag = Column(Boolean) + na_flag = Column(Boolean, server_default=text("false")) category_list = Column(JSONB) undetermined = Column(Boolean) - history = Column(JSONB) - action = Column(JSONB) + history = Column(JSONB, server_default=text("'[]'::jsonb")) + action = Column(JSONB, server_default=text("'[]'::jsonb")) actionStatus = Column(String) actionDate = Column(DateTime) contactInfo= Column(JSONB) parseStatus = Column(JSONB) + predictions = Column(JSONB, server_default=text("'{\"value\": \"red\", \"history\": []}'::jsonb")) reviewRec = Column(String) searchText = Column(String) - compliant = Column (Integer) - attachments = relationship("Attachment", back_populates="solicitaiton", cascade="all, delete-orphan"); + compliant = Column(Integer, server_default=text("0")) + noticeData = Column(JSONB) + + attachments = relationship("Attachment", back_populates="solicitation", cascade="all, delete-orphan"); +class SurveyResponse(Base): + __tablename__ = 'survey_responses' + + id = Column(Integer, primary_key=True) + solNum = Column(String, index=True) + contemporary_notice_id = Column(Integer) + response = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) + maxId = Column(String(256)) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + +class SurveyResponsesArchive(Base): + __tablename__ = 'survey_responses_archive' + + id = Column(Integer, primary_key=True) + solNum = Column(String) + contemporary_notice_id = Column(Integer) + response = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) + maxId = Column(String(256)) + original_created_at = Column(DateTime, server_default=text("CURRENT_TIMESTAMP")) + createdAt = Column(DateTime, nullable=False, default=func.now()) + updatedAt = Column(DateTime, onupdate=func.now()) + +t_survey_backup = Table( + 'survey_backup', meta, + Column('id', Integer), + Column('question', Text), + Column('choices', JSONB(astext_type=Text())), + Column('section', String(2000)), + Column('type', String(2000)), + Column('answer', Text), + Column('note', Text), + Column('choicesNote', JSONB(astext_type=Text())), + Column('createdAt', DateTime), + Column('updatedAt', DateTime) +) + +t_winston_logs = Table( + 'winston_logs', meta, + Column('timestamp', DateTime(True)), + Column('level', String(255)), + Column('message', Text), + Column('meta', JSONB(astext_type=Text())) +) + +t_sequelize_meta = Table( + 'SequelizeMeta', meta, + Column('name', String(255), primary_key=True) + ) \ No newline at end of file From 58a99dc1168f705daab33b30c208e913426ac5b4 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 10:22:33 -0400 Subject: [PATCH 05/15] prod migrations --- ...06c9149baecd_prod_db_migration_matching.py | 143 ++++++++++++++++++ sql/migrations/prod/s11_p0_prod.sql | 42 +++++ 2 files changed, 185 insertions(+) create mode 100644 alembic/prod/06c9149baecd_prod_db_migration_matching.py create mode 100644 sql/migrations/prod/s11_p0_prod.sql diff --git a/alembic/prod/06c9149baecd_prod_db_migration_matching.py b/alembic/prod/06c9149baecd_prod_db_migration_matching.py new file mode 100644 index 00000000..d655168c --- /dev/null +++ b/alembic/prod/06c9149baecd_prod_db_migration_matching.py @@ -0,0 +1,143 @@ +"""prod db migration matching + +Revision ID: 06c9149baecd +Revises: +Create Date: 2023-05-22 15:30:25.932722 + +""" +from alembic import op +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '06c9149baecd' +down_revision = None +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('solicitations_pre_dla_update_oct_2021') + op.alter_column('Agencies', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=True) + op.alter_column('Predictions', 'noticeType', + existing_type=sa.VARCHAR(), + nullable=True) + op.alter_column('Predictions', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + op.drop_constraint('uniqueSolNum', 'Predictions', type_='unique') + op.create_unique_constraint(op.f('uq_Predictions_solNum'), 'Predictions', ['solNum']) + op.alter_column('Surveys', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True) + op.alter_column('Users', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=True) + op.alter_column('agency_alias', 'agency_id', + existing_type=sa.INTEGER(), + nullable=True) + op.alter_column('notice_type', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False, + existing_server_default=sa.text('now()')) + op.alter_column('solicitations', 'solNum', + existing_type=sa.VARCHAR(), + nullable=False) + op.alter_column('solicitations', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False, + existing_server_default=sa.text('now()')) + op.drop_constraint('solicitations_solNum_key', 'solicitations', type_='unique') + op.create_unique_constraint(op.f('uq_solicitations_solNum'), 'solicitations', ['solNum']) + op.alter_column('survey_responses', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False, + existing_server_default=sa.text('CURRENT_TIMESTAMP')) + op.drop_index('ix_feedback_solNum', table_name='survey_responses') + op.create_index(op.f('ix_survey_responses_solNum'), 'survey_responses', ['solNum'], unique=False) + op.alter_column('survey_responses_archive', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False, + existing_server_default=sa.text('CURRENT_TIMESTAMP')) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column('survey_responses_archive', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True, + existing_server_default=sa.text('CURRENT_TIMESTAMP')) + op.drop_index(op.f('ix_survey_responses_solNum'), table_name='survey_responses') + op.create_index('ix_feedback_solNum', 'survey_responses', ['solNum'], unique=False) + op.alter_column('survey_responses', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True, + existing_server_default=sa.text('CURRENT_TIMESTAMP')) + op.drop_constraint(op.f('uq_solicitations_solNum'), 'solicitations', type_='unique') + op.create_unique_constraint('solicitations_solNum_key', 'solicitations', ['solNum']) + op.alter_column('solicitations', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True, + existing_server_default=sa.text('now()')) + op.alter_column('solicitations', 'solNum', + existing_type=sa.VARCHAR(), + nullable=True) + op.alter_column('notice_type', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True, + existing_server_default=sa.text('now()')) + op.alter_column('agency_alias', 'agency_id', + existing_type=sa.INTEGER(), + nullable=False) + op.alter_column('Users', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=False) + op.alter_column('Surveys', 'updatedAt', + existing_type=postgresql.TIMESTAMP(), + nullable=False) + op.drop_constraint(op.f('uq_Predictions_solNum'), 'Predictions', type_='unique') + op.create_unique_constraint('uniqueSolNum', 'Predictions', ['solNum']) + op.alter_column('Predictions', 'createdAt', + existing_type=postgresql.TIMESTAMP(), + nullable=True) + op.alter_column('Predictions', 'noticeType', + existing_type=sa.VARCHAR(), + nullable=False) + op.alter_column('Agencies', 'updatedAt', + existing_type=postgresql.TIMESTAMP(timezone=True), + nullable=False) + op.create_table('solicitations_pre_dla_update_oct_2021', + sa.Column('id', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('solNum', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('active', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('updatedAt', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('createdAt', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('title', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('url', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('agency', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('numDocs', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('notice_type_id', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('noticeType', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('date', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('office', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('na_flag', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('category_list', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column('undetermined', sa.BOOLEAN(), autoincrement=False, nullable=True), + sa.Column('history', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column('action', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column('actionDate', postgresql.TIMESTAMP(), autoincrement=False, nullable=True), + sa.Column('actionStatus', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('contactInfo', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column('parseStatus', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column('predictions', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column('reviewRec', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('searchText', sa.VARCHAR(), autoincrement=False, nullable=True), + sa.Column('compliant', sa.INTEGER(), autoincrement=False, nullable=True), + sa.Column('noticeData', postgresql.JSONB(astext_type=sa.Text()), autoincrement=False, nullable=True), + sa.Column('agency_id', sa.INTEGER(), autoincrement=False, nullable=True) + ) + # ### end Alembic commands ### diff --git a/sql/migrations/prod/s11_p0_prod.sql b/sql/migrations/prod/s11_p0_prod.sql new file mode 100644 index 00000000..2b6c1083 --- /dev/null +++ b/sql/migrations/prod/s11_p0_prod.sql @@ -0,0 +1,42 @@ +BEGIN; + +-- Running upgrade -> 06c9149baecd + +DROP TABLE solicitations_pre_dla_update_oct_2021; + +ALTER TABLE "Agencies" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN "noticeType" DROP NOT NULL; + +ALTER TABLE "Predictions" ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE "Predictions" DROP CONSTRAINT "uniqueSolNum"; + +ALTER TABLE "Predictions" ADD CONSTRAINT "uq_Predictions_solNum" UNIQUE ("solNum"); + +ALTER TABLE "Surveys" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE "Users" ALTER COLUMN "updatedAt" DROP NOT NULL; + +ALTER TABLE agency_alias ALTER COLUMN agency_id DROP NOT NULL; + +ALTER TABLE notice_type ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE solicitations ALTER COLUMN "solNum" SET NOT NULL; + +ALTER TABLE solicitations ALTER COLUMN "createdAt" SET NOT NULL; + +ALTER TABLE solicitations DROP CONSTRAINT "solicitations_solNum_key"; + +ALTER TABLE solicitations ADD CONSTRAINT "uq_solicitations_solNum" UNIQUE ("solNum"); + +ALTER TABLE survey_responses ALTER COLUMN "createdAt" SET NOT NULL; + +DROP INDEX "ix_feedback_solNum"; + +CREATE INDEX "ix_survey_responses_solNum" ON survey_responses ("solNum"); + +ALTER TABLE survey_responses_archive ALTER COLUMN "createdAt" SET NOT NULL; + +COMMIT; + From c62501599056b725edeeb680dc95d070ba8a1389 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 10:23:18 -0400 Subject: [PATCH 06/15] Add error handling for file name issues --- .gitignore | 2 +- src/fbo_scraper/get_opps.py | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b9ca5cdd..61a7c65d 100644 --- a/.gitignore +++ b/.gitignore @@ -84,7 +84,7 @@ celerybeat-schedule # Environments .env -.venv +.venv* env/ venv/ ENV/ diff --git a/src/fbo_scraper/get_opps.py b/src/fbo_scraper/get_opps.py index 87c36eaa..f84084b1 100644 --- a/src/fbo_scraper/get_opps.py +++ b/src/fbo_scraper/get_opps.py @@ -7,6 +7,8 @@ import shutil import hashlib import urllib +import errno +from pathlib import Path sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) ) from fbo_scraper.get_doc_text import get_doc_text @@ -176,12 +178,36 @@ def get_docs(opp, out_path): if match and len(match.groups()) > 0: real_filename = urllib.parse.unquote(match.group(1)).replace("+", " ") # have to replace + with space because parse doesn't do that real_filename_with_path = os.path.join(out_path, real_filename) - os.rename(filename, real_filename_with_path) + try: + os.rename(filename, real_filename_with_path) + except OSError as e: + if e.errno == errno.ENAMETOOLONG: + logger.warning(f"Filename {real_filename_with_path} is too long. Skipping.") + real_filename_with_path = handle_file_too_long(real_filename_with_path) + os.rename(filename, real_filename_with_path) + else: + raise logger.info("Downloaded file {}".format(real_filename_with_path)) filelist.append( (real_filename_with_path, file_url) ) http.clear() return filelist +def handle_file_too_long(filepath): + path_f = Path(filepath) + stem = path_f.stem + suffix = path_f.suffix + path = path_f.parent + + # if the filename is too long, try to shorten it by removing the middle + # of the filename this should preserve the beginning and end of the filename + + new_stem = stem[:int(len(stem)/2)]+ '--' + stem[-20:] + new_filename = new_stem + suffix + + return Path(path, new_filename) + + + def get_attachment_data(file_name, url): text = get_doc_text(file_name) fn = os.path.basename(file_name) From b495561834fae0a02519961c2e0e0ac591991455 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 10:27:32 -0400 Subject: [PATCH 07/15] Add error handling for file name issues --- .gitignore | 2 +- src/fbo_scraper/get_opps.py | 28 +++++++++++++++++++++++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b9ca5cdd..61a7c65d 100644 --- a/.gitignore +++ b/.gitignore @@ -84,7 +84,7 @@ celerybeat-schedule # Environments .env -.venv +.venv* env/ venv/ ENV/ diff --git a/src/fbo_scraper/get_opps.py b/src/fbo_scraper/get_opps.py index 87c36eaa..f84084b1 100644 --- a/src/fbo_scraper/get_opps.py +++ b/src/fbo_scraper/get_opps.py @@ -7,6 +7,8 @@ import shutil import hashlib import urllib +import errno +from pathlib import Path sys.path.append( os.path.dirname( os.path.dirname( os.path.abspath(__file__) ) ) ) from fbo_scraper.get_doc_text import get_doc_text @@ -176,12 +178,36 @@ def get_docs(opp, out_path): if match and len(match.groups()) > 0: real_filename = urllib.parse.unquote(match.group(1)).replace("+", " ") # have to replace + with space because parse doesn't do that real_filename_with_path = os.path.join(out_path, real_filename) - os.rename(filename, real_filename_with_path) + try: + os.rename(filename, real_filename_with_path) + except OSError as e: + if e.errno == errno.ENAMETOOLONG: + logger.warning(f"Filename {real_filename_with_path} is too long. Skipping.") + real_filename_with_path = handle_file_too_long(real_filename_with_path) + os.rename(filename, real_filename_with_path) + else: + raise logger.info("Downloaded file {}".format(real_filename_with_path)) filelist.append( (real_filename_with_path, file_url) ) http.clear() return filelist +def handle_file_too_long(filepath): + path_f = Path(filepath) + stem = path_f.stem + suffix = path_f.suffix + path = path_f.parent + + # if the filename is too long, try to shorten it by removing the middle + # of the filename this should preserve the beginning and end of the filename + + new_stem = stem[:int(len(stem)/2)]+ '--' + stem[-20:] + new_filename = new_stem + suffix + + return Path(path, new_filename) + + + def get_attachment_data(file_name, url): text = get_doc_text(file_name) fn = os.path.basename(file_name) From 644a826958f0f3a6c87ad34514ef82aad5771e85 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 13:59:46 -0400 Subject: [PATCH 08/15] Fix the logging, and add the smartie-log to be rotated --- src/fbo_scraper/json_log_formatter.py | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/fbo_scraper/json_log_formatter.py b/src/fbo_scraper/json_log_formatter.py index 091a7a88..d807beba 100644 --- a/src/fbo_scraper/json_log_formatter.py +++ b/src/fbo_scraper/json_log_formatter.py @@ -3,7 +3,7 @@ from datetime import datetime from dateutil import parser from sys import stdout - +from logging.handlers import TimedRotatingFileHandler import re class CustomJsonFormatter(jsonlogger.JsonFormatter): @@ -50,16 +50,6 @@ def process_log_record(self, log_record): for key in to_be_removed: del log_record[key] - # TODO: we *should* be able to get this to work as a dict, but cloud.gov doesn't do it for me. - extra = "" - for key in log_record['meta']: - extra = "{} {}:{} |".format(extra, key, log_record['meta'][key]) - if not extra == "": - extra = " [{} ]".format(extra) - log_record['message'] = "{}{}".format(log_record['message'], extra) - - del log_record['meta'] - return log_record @@ -72,13 +62,13 @@ def configureLogger(logger, log_file_level = logging.INFO, stdout_level = 11): # json output setup logHandler = logging.StreamHandler(stdout) - formatter = CustomJsonFormatter('(timestamp) (level) (message) (filename) (lineno)') # jsonlogger.JsonFormatter() + formatter = CustomJsonFormatter('%(timestamp)s %(level)s %(message)s %(filename)s %(lineno)s') # jsonlogger.JsonFormatter() logHandler.setFormatter(formatter) logHandler.setLevel(stdout_level) logger.addHandler(logHandler) # file handler - fh = logging.FileHandler(r'smartie-logger.log') + fh = TimedRotatingFileHandler(r'smartie-logger.log', when='midnight', backupCount=14) fh.setFormatter( logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) fh.setLevel(log_file_level) logger.addHandler(fh) From f00705bdd18ba79be6812e566b4d366de977b803 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 14:13:12 -0400 Subject: [PATCH 09/15] Moved logs to specific directory --- logs/__placeholder.txt | 0 src/fbo_scraper/json_log_formatter.py | 3 ++- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 logs/__placeholder.txt diff --git a/logs/__placeholder.txt b/logs/__placeholder.txt new file mode 100644 index 00000000..e69de29b diff --git a/src/fbo_scraper/json_log_formatter.py b/src/fbo_scraper/json_log_formatter.py index d807beba..300d9b47 100644 --- a/src/fbo_scraper/json_log_formatter.py +++ b/src/fbo_scraper/json_log_formatter.py @@ -5,6 +5,7 @@ from sys import stdout from logging.handlers import TimedRotatingFileHandler import re +from pathlib import Path class CustomJsonFormatter(jsonlogger.JsonFormatter): @@ -68,7 +69,7 @@ def configureLogger(logger, log_file_level = logging.INFO, stdout_level = 11): logger.addHandler(logHandler) # file handler - fh = TimedRotatingFileHandler(r'smartie-logger.log', when='midnight', backupCount=14) + fh = TimedRotatingFileHandler(Path('logs', 'smartie-logger.log'), when='midnight', backupCount=14) fh.setFormatter( logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')) fh.setLevel(log_file_level) logger.addHandler(fh) From 017581f7b9dec98d1300cb208351c05dab31bb15 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 14:18:08 -0400 Subject: [PATCH 10/15] Creating a prod specifc manifest file --- .dockerignore | 2 +- manifest.yml => manifest.prod.yml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) rename manifest.yml => manifest.prod.yml (67%) diff --git a/.dockerignore b/.dockerignore index c39f40e9..e0d839d7 100644 --- a/.dockerignore +++ b/.dockerignore @@ -13,6 +13,6 @@ docs/ *.md env/ venv/ -.venv/ +.venv* crontab-test .vscode/ \ No newline at end of file diff --git a/manifest.yml b/manifest.prod.yml similarity index 67% rename from manifest.yml rename to manifest.prod.yml index fd01a792..82919b4e 100644 --- a/manifest.yml +++ b/manifest.prod.yml @@ -1,9 +1,9 @@ --- applications: -- name: srt-fbo-scraper +- name: srt-fbo-scraper-prod memory: 1GB disk_quota: 4GB no-route: true health-check-type: process services: - - srt-postgres-dev \ No newline at end of file + - srt-postgres-prod \ No newline at end of file From 9b928fdcac21b3f16da6f9705d1979563e0960b7 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 14:19:34 -0400 Subject: [PATCH 11/15] Migrate the changes to db.py --- src/fbo_scraper/db/auto_db.py | 244 ---------------------------------- 1 file changed, 244 deletions(-) delete mode 100644 src/fbo_scraper/db/auto_db.py diff --git a/src/fbo_scraper/db/auto_db.py b/src/fbo_scraper/db/auto_db.py deleted file mode 100644 index afac4840..00000000 --- a/src/fbo_scraper/db/auto_db.py +++ /dev/null @@ -1,244 +0,0 @@ -# coding: utf-8 -from sqlalchemy import Boolean, Column, DateTime, Float, ForeignKey, Integer, String, Table, Text, text -from sqlalchemy.dialects.postgresql import JSONB -from sqlalchemy.orm import relationship -from sqlalchemy.ext.declarative import declarative_base -from datetime import datetime -from sqlalchemy.sql import func - -Base = declarative_base() -metadata = Base.metadata - - -class Agency(Base): - __tablename__ = 'Agencies' - - id = Column(Integer, primary_key=True, server_default=text("nextval('\"Agencies_id_seq\"'::regclass)")) - agency = Column(String) - acronym = Column(String) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - - -class Prediction(Base): - __tablename__ = 'Predictions' - - id = Column(Integer, primary_key=True, server_default=text("nextval('\"Predictions_id_seq\"'::regclass)")) - title = Column(String, nullable=False) - url = Column(String) - agency = Column(String) - numDocs = Column(Integer) - solNum = Column(String, nullable=False, unique=True) - noticeType = Column(String, nullable=False) - date = Column(DateTime) - office = Column(String) - na_flag = Column(Boolean) - eitLikelihood = Column(JSONB(astext_type=Text())) - undetermined = Column(Boolean) - action = Column(JSONB(astext_type=Text())) - actionStatus = Column(String) - actionDate = Column(DateTime) - history = Column(JSONB(astext_type=Text())) - contactInfo = Column(JSONB(astext_type=Text())) - parseStatus = Column(JSONB(astext_type=Text())) - predictions = Column(JSONB(astext_type=Text())) - reviewRec = Column(String) - searchText = Column(String) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - active = Column(Boolean, server_default=text("true")) - - -class SequelizeMeta(Base): - __tablename__ = 'SequelizeMeta' - - name = Column(String(255), primary_key=True) - - -class Survey(Base): - __tablename__ = 'Surveys' - - id = Column(Integer, primary_key=True, server_default=text("nextval('\"Surveys_id_seq\"'::regclass)")) - question = Column(Text) - choices = Column(JSONB(astext_type=Text())) - section = Column(String(2000)) - type = Column(String(2000)) - answer = Column(Text) - note = Column(Text) - choicesNote = Column(JSONB(astext_type=Text())) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - - -class User(Base): - __tablename__ = 'Users' - - id = Column(Integer, primary_key=True, server_default=text("nextval('\"Users_id_seq\"'::regclass)")) - firstName = Column(String) - lastName = Column(String) - agency = Column(String) - email = Column(String) - password = Column(String) - position = Column(String) - isAccepted = Column(Boolean) - isRejected = Column(Boolean) - userRole = Column(String) - rejectionNote = Column(String) - creationDate = Column(String) - tempPassword = Column(String) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - maxId = Column(String(256)) - - -class AgencyAlias(Base): - __tablename__ = 'agency_alias' - - id = Column(Integer, primary_key=True, server_default=text("nextval('agency_alias_id_seq'::regclass)")) - agency_id = Column(Integer, nullable=False) - alias = Column(String) - createdAt = Column(DateTime) - updatedAt = Column(DateTime) - - -class Model(Base): - __tablename__ = 'model' - - id = Column(Integer, primary_key=True, server_default=text("nextval('model_id_seq'::regclass)")) - results = Column(JSONB(astext_type=Text())) - params = Column(JSONB(astext_type=Text())) - score = Column(Float(53)) - create_date = Column(DateTime, nullable=False) - - -class NoticeType(Base): - __tablename__ = 'notice_type' - - id = Column(Integer, primary_key=True, server_default=text("nextval('notice_type_id_seq'::regclass)")) - notice_type = Column(String(50), index=True) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - - -class Solicitation(Base): - __tablename__ = 'solicitations' - - id = Column(Integer, primary_key=True, server_default=text("nextval('solicitations_id_seq'::regclass)")) - solNum = Column(String, unique=True) - active = Column(Boolean, server_default=text("true")) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - title = Column(String) - url = Column(String) - agency = Column(String) - numDocs = Column(Integer) - notice_type_id = Column(Integer) - noticeType = Column(String) - date = Column(DateTime) - office = Column(String) - na_flag = Column(Boolean, server_default=text("false")) - category_list = Column(JSONB(astext_type=Text())) - undetermined = Column(Boolean) - history = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) - action = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) - actionDate = Column(DateTime) - actionStatus = Column(String) - contactInfo = Column(JSONB(astext_type=Text())) - parseStatus = Column(JSONB(astext_type=Text())) - predictions = Column(JSONB(astext_type=Text()), server_default=text("'{\"value\": \"red\", \"history\": []}'::jsonb")) - reviewRec = Column(String) - searchText = Column(String) - compliant = Column(Integer, server_default=text("0")) - noticeData = Column(JSONB(astext_type=Text())) - agency_id = Column(Integer) - - -t_survey_backup = Table( - 'survey_backup', metadata, - Column('id', Integer), - Column('question', Text), - Column('choices', JSONB(astext_type=Text())), - Column('section', String(2000)), - Column('type', String(2000)), - Column('answer', Text), - Column('note', Text), - Column('choicesNote', JSONB(astext_type=Text())), - Column('createdAt', DateTime), - Column('updatedAt', DateTime) -) - - -class SurveyResponse(Base): - __tablename__ = 'survey_responses' - - id = Column(Integer, primary_key=True, server_default=text("nextval('survey_responses_id_seq'::regclass)")) - solNum = Column(String, index=True) - contemporary_notice_id = Column(Integer) - response = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) - maxId = Column(String(256)) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - - -class SurveyResponsesArchive(Base): - __tablename__ = 'survey_responses_archive' - - id = Column(Integer, primary_key=True, server_default=text("nextval('survey_responses_archive_id_seq'::regclass)")) - solNum = Column(String) - contemporary_notice_id = Column(Integer) - response = Column(JSONB(astext_type=Text()), server_default=text("'[]'::jsonb")) - maxId = Column(String(256)) - original_created_at = Column(DateTime, server_default=text("CURRENT_TIMESTAMP")) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - - -t_winston_logs = Table( - 'winston_logs', metadata, - Column('timestamp', DateTime(True)), - Column('level', String(255)), - Column('message', Text), - Column('meta', JSONB(astext_type=Text())) -) - - -class Attachment(Base): - __tablename__ = 'attachment' - - id = Column(Integer, primary_key=True, server_default=text("nextval('attachment_id_seq'::regclass)")) - notice_id = Column(Integer) - notice_type_id = Column(ForeignKey('notice_type.id')) - machine_readable = Column(Boolean) - attachment_text = Column(Text) - prediction = Column(Integer) - decision_boundary = Column(Float(53)) - validation = Column(Integer) - attachment_url = Column(Text) - trained = Column(Boolean) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - filename = Column(Text, nullable=False) - solicitation_id = Column(ForeignKey('solicitations.id')) - - notice_type = relationship('NoticeType') - solicitation = relationship('Solicitation') - - -class Notice(Base): - __tablename__ = 'notice' - - id = Column(Integer, primary_key=True, server_default=text("nextval('notice_id_seq'::regclass)")) - notice_type_id = Column(ForeignKey('notice_type.id')) - solicitation_number = Column(String(150), index=True) - agency = Column(String(150)) - date = Column(DateTime) - notice_data = Column(JSONB(astext_type=Text())) - compliant = Column(Integer) - feedback = Column(JSONB(astext_type=Text())) - history = Column(JSONB(astext_type=Text())) - action = Column(JSONB(astext_type=Text())) - createdAt = Column(DateTime, nullable=False, default=func.now()) - updatedAt = Column(DateTime, onupdate=func.now()) - na_flag = Column(Boolean, default=False) - - notice_type = relationship('NoticeType') From 0e9c019f19748083db7348dade58287f24cd4175 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 14:22:00 -0400 Subject: [PATCH 12/15] Better log message --- src/fbo_scraper/get_opps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fbo_scraper/get_opps.py b/src/fbo_scraper/get_opps.py index f84084b1..cb5794d6 100644 --- a/src/fbo_scraper/get_opps.py +++ b/src/fbo_scraper/get_opps.py @@ -182,7 +182,7 @@ def get_docs(opp, out_path): os.rename(filename, real_filename_with_path) except OSError as e: if e.errno == errno.ENAMETOOLONG: - logger.warning(f"Filename {real_filename_with_path} is too long. Skipping.") + logger.warning(f"Filename {real_filename_with_path} is too long. Shortening Name.") real_filename_with_path = handle_file_too_long(real_filename_with_path) os.rename(filename, real_filename_with_path) else: From 834a50765f9181952f9cdd3a92eb13d8d24dc1dd Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 14:40:05 -0400 Subject: [PATCH 13/15] Add documentation to for file length handling function --- src/fbo_scraper/get_opps.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/src/fbo_scraper/get_opps.py b/src/fbo_scraper/get_opps.py index cb5794d6..3f9e861c 100644 --- a/src/fbo_scraper/get_opps.py +++ b/src/fbo_scraper/get_opps.py @@ -192,16 +192,18 @@ def get_docs(opp, out_path): http.clear() return filelist -def handle_file_too_long(filepath): +def handle_file_too_long(filepath: os.path) -> Path: + """ + If the filepath is too long, shorten it by removing the middle of the filename. + This should preserve the beginning and end of the filename. + :param filepath: the filepath to shorten + """ path_f = Path(filepath) stem = path_f.stem suffix = path_f.suffix path = path_f.parent - # if the filename is too long, try to shorten it by removing the middle - # of the filename this should preserve the beginning and end of the filename - - new_stem = stem[:int(len(stem)/2)]+ '--' + stem[-20:] + new_stem = stem[:int(len(stem)/2)]+ '...' + stem[-20:] new_filename = new_stem + suffix return Path(path, new_filename) From 6419e2720621abf580806f0c17eeb0c2959c9dee Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 15:01:35 -0400 Subject: [PATCH 14/15] 2GB memory for prod scraper --- manifest.prod.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/manifest.prod.yml b/manifest.prod.yml index 82919b4e..ee5bb44e 100644 --- a/manifest.prod.yml +++ b/manifest.prod.yml @@ -1,7 +1,7 @@ --- applications: - name: srt-fbo-scraper-prod - memory: 1GB + memory: 2GB disk_quota: 4GB no-route: true health-check-type: process From c7cb2ff64eb0ae57b093a2dcf7b227c3ab8e6910 Mon Sep 17 00:00:00 2001 From: BuckinghamAJ Date: Tue, 23 May 2023 15:09:53 -0400 Subject: [PATCH 15/15] Removing specifying the 53 auto generated --- src/fbo_scraper/db/db.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fbo_scraper/db/db.py b/src/fbo_scraper/db/db.py index 276e4f15..19b65c67 100644 --- a/src/fbo_scraper/db/db.py +++ b/src/fbo_scraper/db/db.py @@ -59,7 +59,7 @@ class Attachment(Base): machine_readable = Column(Boolean) attachment_text = Column(Text) prediction = Column(Integer) - decision_boundary = Column(Float(53)) + decision_boundary = Column(Float) validation = Column(Integer) attachment_url = Column(Text) trained = Column(Boolean)