Skip to content

Commit

Permalink
updated more config lists, and various code updates
Browse files Browse the repository at this point in the history
  • Loading branch information
tylerprogramming committed May 4, 2024
1 parent 87740b4 commit 69c5c35
Show file tree
Hide file tree
Showing 12 changed files with 202 additions and 28 deletions.
26 changes: 13 additions & 13 deletions ai_agency_01_workout/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,20 +6,20 @@

SEED = 41

# config_list = [
# {
# "model": os.getenv("model_gpt_4"),
# "api_key": os.getenv("OPENAI_API_KEY"),
# }
# ]

config_list = [
{
"model": "NULL",
"base_url": os.getenv("base_url"),
"api_key": "NULL",
},
]
{
"model": os.getenv("model_gpt_4"),
"api_key": os.getenv("OPENAI_API_KEY"),
}
]

# config_list = [
# {
# "model": "NULL",
# "base_url": os.getenv("base_url"),
# "api_key": "NULL",
# },
# ]

llm_config = {
"config_list": config_list,
Expand Down
2 changes: 1 addition & 1 deletion ai_agency_01_workout/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ def index():


if __name__ == '__main__':
app.run(debug=True)
app.run(debug=True, port=5001)
31 changes: 31 additions & 0 deletions ai_agency_05_pdf/pdfsummary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import os

os.environ["OPENAI_API_KEY"] = "sk-1111"
os.environ["base_url"] = "http://localhost:1234/v1"

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyMuPDFLoader
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_openai import OpenAI

if __name__ == "__main__":
print("starting")
pdf_path = "/Users/tylerreed/PycharmProjects/ai/ai_agency_05_pdf/ancient_rome.pdf"
loader = PyMuPDFLoader(file_path=pdf_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()

# stored in ram in local machine
vectorstore = FAISS.from_documents(documents=docs, embedding=embeddings)
vectorstore.save_local("faiss_index_react")

my_vectorstore = FAISS.load_local("faiss_index_react", embeddings)
qa = RetrievalQA.from_chain_type(llm=OpenAI(), retriever=my_vectorstore.as_retriever(), chain_type="map_reduce", )
response = qa.invoke("Summarize Julius Caesar")
print(response)
print("done")
20 changes: 10 additions & 10 deletions autogen_graphs/test.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
| Domain | Title | Authors | Summary | Link |
| --- | --- | --- | --- | --- |
| Computer Science | Connecting NeRFs, Images, and Text | Francesco Ballerini, Pierluigi Zama Ramirez, Roberto Mirabella, Samuele Salti, Luigi Di Stefano | The paper explores a novel research direction that aims to connect the NeRF modality with other modalities, similar to established methodologies for images and text. | [Link](http://arxiv.org/abs/2404.07993v1) |
| Computer Science | OpenBias: Open-set Bias Detection in Text-to-Image Generative Models | Moreno D'Incà, Elia Peruzzo, Massimiliano Mancini, Dejia Xu, Vidit Goel, Xingqian Xu, Zhangyang Wang, Humphrey Shi, Nicu Sebe | The paper presents OpenBias, a new pipeline that identifies and quantifies the severity of biases agnostically, without access to any precompiled set. | [Link](http://arxiv.org/abs/2404.07990v1) |
| Computer Science | View Selection for 3D Captioning via Diffusion Ranking | Tiange Luo, Justin Johnson, Honglak Lee | The paper presents DiffuRank, a method that leverages a pre-trained text-to-3D model to assess the alignment between 3D objects and their 2D rendered views. | [Link](http://arxiv.org/abs/2404.07984v1) |
| Computer Science | Two Effects, One Trigger: On the Modality Gap, Object Bias, and Information Imbalance in Contrastive Vision-Language Representation Learning | Simon Schrodi, David T. Hoffmann, Max Argus, Volker Fischer, Thomas Brox | The paper investigates the modality gap and object bias in contrastive vision-language models like CLIP. | [Link](http://arxiv.org/abs/2404.07983v1) |
| Computer Science | Manipulating Large Language Models to Increase Product Visibility | Aounon Kumar, Himabindu Lakkaraju | The paper investigates whether recommendations from LLMs can be manipulated to enhance a product's visibility. | [Link](http://arxiv.org/abs/2404.07981v1) |
| Computer Science | LLoCO: Learning Long Contexts Offline | Sijun Tan, Xiuyu Li, Shishir Patil, Ziyang Wu, Tianjun Zhang, Kurt Keutzer, Joseph E. Gonzalez, Raluca Ada Popa | The paper introduces LLoCO, a technique that combines context compression, retrieval, and parameter-efficient finetuning using LoRA. | [Link](http://arxiv.org/abs/2404.07979v1) |
| Physics | On average output entropy of a quantum channel | M. E. Shirokov | The paper describes analytical properties of the average output entropy of a quantum channel as a function of a pair (channel, input ensemble). | [Link](http://arxiv.org/abs/2404.07978v1) |
| Computer Science | Gaga: Group Any Gaussians via 3D-aware Memory Bank | Weijie Lyu, Xueting Li, Abhijit Kundu, Yi-Hsuan Tsai, Ming-Hsuan Yang | The paper introduces Gaga, a framework that reconstructs and segments open-world 3D scenes by leveraging inconsistent 2D masks predicted by zero-shot segmentation models. | [Link](http://arxiv.org/abs/2404.07977v1) |
| Computer Science | OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments | Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Victor Zhong, Tao Yu | The paper introduces OSWorld, the first-of-its-kind scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems. | [Link](http://arxiv.org/abs/2404.07972v1) |
| Computer Science | Ferret-v2: An Improved Baseline for Referring and Grounding with Large Language Models | Haotian Zhang, Haoxuan You, Philipp Dufter, Bowen Zhang, Chen Chen, Hong-You Chen, Tsu-Jui Fu, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Yinfei Yang, Zhe Gan, William Yang Wang | The paper unveils Ferret-v2, a significant upgrade to Ferret, with three key designs: any resolution grounding and referring, multi-granularity visual encoding, and a three-stage training paradigm. | [Link](http://arxiv.org/abs/2404.07973v1) |
| Computer Vision | OpenBias: Open-set Bias Detection in Text-to-Image Generative Models | Moreno D'Incà, Elia Peruzzo, Massimiliano Mancini, Dejia Xu, Vidit Goel, Xingqian Xu, Zhangyang Wang, Humphrey Shi, Nicu Sebe | The paper presents OpenBias, a pipeline for open-set bias detection in text-to-image generative models. It uses a Large Language Model (LLM) to propose biases given a set of captions, generates images using the same captions, and uses a Vision Question Answering model to recognize the biases. | [Link](http://arxiv.org/abs/2404.07990v1) |
| AI & Marketing | Manipulating Large Language Models to Increase Product Visibility | Aounon Kumar, Himabindu Lakkaraju | The authors investigate whether recommendations from LLMs can be manipulated to enhance a product's visibility. They demonstrate that adding a strategic text sequence to a product's information page can significantly increase its likelihood of being listed as the LLM's top recommendation. | [Link](http://arxiv.org/abs/2404.07981v1) |
| AI & Data Processing | LLoCO: Learning Long Contexts Offline | Sijun Tan, Xiuyu Li, Shishir Patil, Ziyang Wu, Tianjun Zhang, Kurt Keutzer, Joseph E. Gonzalez, Raluca Ada Popa | The paper presents LLoCO, a technique that combines context compression, retrieval, and parameter-efficient finetuning using LoRA. It extends the effective context window of a 4k token LLaMA2-7B model to handle up to 128k tokens. | [Link](http://arxiv.org/abs/2404.07979v1) |
| AI & HCI | OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments | Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Victor Zhong, Tao Yu | The authors introduce OSWorld, a scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems. | [Link](http://arxiv.org/abs/2404.07972v1) |
| AI & Computer Vision | Ferret-v2: An Improved Baseline for Referring and Grounding with Large Language Models | Haotian Zhang, Haoxuan You, Philipp Dufter, Bowen Zhang, Chen Chen, Hong-You Chen, Tsu-Jui Fu, William Yang Wang, Shih-Fu Chang, Zhe Gan, Yinfei Yang | The paper presents Ferret-v2, an upgrade to Ferret, with three key designs: any resolution grounding and referring, multi-granularity visual encoding, and a three-stage training paradigm. | [Link](http://arxiv.org/abs/2404.07973v1) |
| AI & Online Safety | Leveraging Large Language Models (LLMs) to Support Collaborative Human-AI Online Risk Data Annotation | Jinkyung Park, Pamela Wisniewski, Vivek Singh | The authors discuss the potential for leveraging LLMs as interactive research tools to facilitate collaboration between human coders and AI to effectively annotate online risk data at scale. | [Link](http://arxiv.org/abs/2404.07926v1) |
| AI & Language Processing | LaVy: Vietnamese Multimodal Large Language Model | Chi Tran, Huong Le Thanh | The authors introduce LaVy, a state-of-the-art Vietnamese Multimodal Large Language Model, and LaVy-Bench, a benchmark for evaluating MLLMs's understanding on Vietnamese visual language tasks. | [Link](http://arxiv.org/abs/2404.07922v1) |
| AI & Security | AmpleGCG: Learning a Universal and Transferable Generative Model of Adversarial Suffixes for Jailbreaking Both Open and Closed LLMs | Zeyi Liao, Huan Sun | The authors present AmpleGCG, a high-dimensional representation of human value distributions in LLMs, orthogonal to model architecture and training data. | [Link](http://arxiv.org/abs/2404.07921v1) |
| AI & Ethics | High-Dimension Human Value Representation in Large Language Models | Samuel Cahyawijaya, Delong Chen, Yejin Bang, Leila Khalatbari, Bryan Wilie, Ziwei Ji, Etsuko Ishii, Pascale Fung | The authors propose UniVaR, a high-dimensional representation of human value distributions in LLMs, to understand the scope and nature of human values injected into these models before their release. | [Link](http://arxiv.org/abs/2404.07900v1) |
| AI & Translation | Guiding Large Language Models to Post-Edit Machine Translation with Error Annotations | Dayeon Ki, Marine Carpuat | The authors exploit the complementary strengths of LLMs and supervised MT by guiding LLMs to automatically post-edit MT with external feedback on its quality, derived from Multidimensional Quality Metric (MQM) annotations. | [Link](http://arxiv.org/abs/2404.07851v1) |
18 changes: 18 additions & 0 deletions autogen_lmstudio_connection/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import autogen


def main():
config_list = autogen.config_list_from_json(env_or_file="OAI_CONFIG_LIST.json")

# Create the agent that uses the LLM.
assistant = autogen.AssistantAgent("assistant", llm_config={"config_list": config_list})

# Create the agent that represents the user in the conversation.
user_proxy = autogen.UserProxyAgent("user_proxy",
code_execution_config={"work_dir": "coding", "use_docker": False})

user_proxy.initiate_chat(assistant, message="Generate a function that acts like the 8 Ball with random phrases.")


if __name__ == "__main__":
main()
55 changes: 55 additions & 0 deletions autogen_logging/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import json
import autogen
import pandas as pd
import sqlite3

# setup the llm_configuration
llm_config = {
"config_list": autogen.config_list_from_json(
env_or_file="OAI_CONFIG_LIST.json",
),
"temperature": 0,
}

# Start logging
logging_session_id = autogen.runtime_logging.start(config={"dbname": "logs.db"})
print("Started Logging session ID: " + str(logging_session_id))

# Create an agent workflow and run it
assistant = autogen.AssistantAgent(name="assistant", llm_config=llm_config)
user_proxy = autogen.UserProxyAgent(
name="user_proxy",
code_execution_config=False,
human_input_mode="NEVER",
is_termination_msg=lambda msg: "TERMINATE" in msg["content"],
)

user_proxy.initiate_chat(
assistant, message="What is the height of the Sears Tower? Only respond with the answer and terminate"
)

# Stop logging
autogen.runtime_logging.stop()


# create function to get log
def get_log(dbname="logs.db", table="chat_completions"):
con = sqlite3.connect(dbname)
query = f"SELECT request, response, cost, start_time, end_time from {table}"
cursor = con.execute(query)
rows = cursor.fetchall()
column_names = [description[0] for description in cursor.description]

data = [dict(zip(column_names, row)) for row in rows]

con.close()

return data


def str_to_dict(s):
return json.loads(s)


# use pandas to get extra information and print out to terminal

38 changes: 38 additions & 0 deletions autogen_math_solving/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import autogen
from autogen.agentchat.contrib.math_user_proxy_agent import MathUserProxyAgent

# configuration
config_list = autogen.config_list_from_json(
"OAI_CONFIG_LIST.json",
)

llm_config = {
"timeout": 600,
"seed": 42,
"config_list": config_list,
}

# create assistants
assistant = autogen.AssistantAgent(
name="assistant",
system_message="You are a helpful assistant.",
llm_config=llm_config
)


math_proxy_agent = MathUserProxyAgent(
name="math_proxy_agent",
human_input_mode="NEVER",
code_execution_config={"use_docker": False},
)

# the math problem and initiate chat
# math_problem = (
# """
# Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$.
# Express your answer in interval notation.
# """
# )

math_problem = "Problem: If $725x + 727y = 1500$ and $729x+ 731y = 1508$, what is the value of $x - y$ ?"
math_proxy_agent.initiate_chat(assistant, problem=math_problem)
4 changes: 2 additions & 2 deletions autogen_multiple_configs/config_list_from_json.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import autogen

cheap_config_list = autogen.config_list_from_json(
env_or_file="OAI_CONFIG_LIST",
env_or_file="OAI_CONFIG_LIST.json",
filter_dict={
"model": {
"gpt-3.5-turbo",
Expand All @@ -10,7 +10,7 @@
)

costly_config_list = autogen.config_list_from_json(
env_or_file="OAI_CONFIG_LIST",
env_or_file="OAI_CONFIG_LIST.json",
filter_dict={
"model": {
"gpt-4",
Expand Down
1 change: 1 addition & 0 deletions autogen_multiple_configs/config_list_openai_aoai.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
key_file_path=".",
openai_api_key_file="/txt/key_openai.txt",
aoai_api_key_file="/txt/key_aoai.txt",
openai_api_base_file="/txt/base_openai.txt",
aoai_api_base_file="/txt/base_aoai.txt",
exclude='aoai'
)
Expand Down
4 changes: 2 additions & 2 deletions autogen_multiple_configs/get_config_list.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import autogen

api_keys = ["SAMPLE_API_KEY"]
api_bases = ["http://localhost:8001"]
base_urls = ["http://localhost:8001"]
api_type = "openai"
api_version = "api-preview-01"

config_list = autogen.get_config_list(
api_keys,
api_bases,
base_urls,
api_type,
api_version
)
Expand Down
1 change: 1 addition & 0 deletions autogen_multiple_configs/txt/base_openai.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
http://localhost:1234/v1
30 changes: 30 additions & 0 deletions autogen_text_to_video/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import torch
import cv2
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video

# pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
# pipe.enable_model_cpu_offload()

# prompt = "Spiderman is surfing"
# video_frames = pipe(prompt, num_inference_steps=25).frames
# video_path = export_to_video(video_frames)

pipe = DiffusionPipeline.from_pretrained("ali-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16)
pipe.enable_model_cpu_offload()

# memory optimization
pipe.unet.enable_forward_chunking(chunk_size=1, dim=1)
pipe.enable_vae_slicing()

# prompt = "spiderman surfing a wave"
# video_frames = pipe(prompt, num_frames=24).frames[0]

prompt = "a cat surfing a wave"
video_frames = pipe(prompt, num_frames=100).frames[0]

video_path = export_to_video(video_frames, fps=30, output_video_path="vid1.mp4")


# UPSCALE THIS AS WELL

0 comments on commit 69c5c35

Please sign in to comment.