forked from tylerprogramming/ai
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
updated more config lists, and various code updates
- Loading branch information
1 parent
87740b4
commit 69c5c35
Showing
12 changed files
with
202 additions
and
28 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,4 +37,4 @@ def index(): | |
|
||
|
||
if __name__ == '__main__': | ||
app.run(debug=True) | ||
app.run(debug=True, port=5001) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import os | ||
|
||
os.environ["OPENAI_API_KEY"] = "sk-1111" | ||
os.environ["base_url"] = "http://localhost:1234/v1" | ||
|
||
from langchain.text_splitter import RecursiveCharacterTextSplitter | ||
from langchain_community.document_loaders import PyMuPDFLoader | ||
from langchain_openai import OpenAIEmbeddings | ||
from langchain_community.vectorstores import FAISS | ||
from langchain.chains import RetrievalQA | ||
from langchain_openai import OpenAI | ||
|
||
if __name__ == "__main__": | ||
print("starting") | ||
pdf_path = "/Users/tylerreed/PycharmProjects/ai/ai_agency_05_pdf/ancient_rome.pdf" | ||
loader = PyMuPDFLoader(file_path=pdf_path) | ||
documents = loader.load() | ||
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30) | ||
docs = text_splitter.split_documents(documents) | ||
|
||
embeddings = OpenAIEmbeddings() | ||
|
||
# stored in ram in local machine | ||
vectorstore = FAISS.from_documents(documents=docs, embedding=embeddings) | ||
vectorstore.save_local("faiss_index_react") | ||
|
||
my_vectorstore = FAISS.load_local("faiss_index_react", embeddings) | ||
qa = RetrievalQA.from_chain_type(llm=OpenAI(), retriever=my_vectorstore.as_retriever(), chain_type="map_reduce", ) | ||
response = qa.invoke("Summarize Julius Caesar") | ||
print(response) | ||
print("done") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,12 @@ | ||
| Domain | Title | Authors | Summary | Link | | ||
| --- | --- | --- | --- | --- | | ||
| Computer Science | Connecting NeRFs, Images, and Text | Francesco Ballerini, Pierluigi Zama Ramirez, Roberto Mirabella, Samuele Salti, Luigi Di Stefano | The paper explores a novel research direction that aims to connect the NeRF modality with other modalities, similar to established methodologies for images and text. | [Link](http://arxiv.org/abs/2404.07993v1) | | ||
| Computer Science | OpenBias: Open-set Bias Detection in Text-to-Image Generative Models | Moreno D'Incà, Elia Peruzzo, Massimiliano Mancini, Dejia Xu, Vidit Goel, Xingqian Xu, Zhangyang Wang, Humphrey Shi, Nicu Sebe | The paper presents OpenBias, a new pipeline that identifies and quantifies the severity of biases agnostically, without access to any precompiled set. | [Link](http://arxiv.org/abs/2404.07990v1) | | ||
| Computer Science | View Selection for 3D Captioning via Diffusion Ranking | Tiange Luo, Justin Johnson, Honglak Lee | The paper presents DiffuRank, a method that leverages a pre-trained text-to-3D model to assess the alignment between 3D objects and their 2D rendered views. | [Link](http://arxiv.org/abs/2404.07984v1) | | ||
| Computer Science | Two Effects, One Trigger: On the Modality Gap, Object Bias, and Information Imbalance in Contrastive Vision-Language Representation Learning | Simon Schrodi, David T. Hoffmann, Max Argus, Volker Fischer, Thomas Brox | The paper investigates the modality gap and object bias in contrastive vision-language models like CLIP. | [Link](http://arxiv.org/abs/2404.07983v1) | | ||
| Computer Science | Manipulating Large Language Models to Increase Product Visibility | Aounon Kumar, Himabindu Lakkaraju | The paper investigates whether recommendations from LLMs can be manipulated to enhance a product's visibility. | [Link](http://arxiv.org/abs/2404.07981v1) | | ||
| Computer Science | LLoCO: Learning Long Contexts Offline | Sijun Tan, Xiuyu Li, Shishir Patil, Ziyang Wu, Tianjun Zhang, Kurt Keutzer, Joseph E. Gonzalez, Raluca Ada Popa | The paper introduces LLoCO, a technique that combines context compression, retrieval, and parameter-efficient finetuning using LoRA. | [Link](http://arxiv.org/abs/2404.07979v1) | | ||
| Physics | On average output entropy of a quantum channel | M. E. Shirokov | The paper describes analytical properties of the average output entropy of a quantum channel as a function of a pair (channel, input ensemble). | [Link](http://arxiv.org/abs/2404.07978v1) | | ||
| Computer Science | Gaga: Group Any Gaussians via 3D-aware Memory Bank | Weijie Lyu, Xueting Li, Abhijit Kundu, Yi-Hsuan Tsai, Ming-Hsuan Yang | The paper introduces Gaga, a framework that reconstructs and segments open-world 3D scenes by leveraging inconsistent 2D masks predicted by zero-shot segmentation models. | [Link](http://arxiv.org/abs/2404.07977v1) | | ||
| Computer Science | OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments | Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Victor Zhong, Tao Yu | The paper introduces OSWorld, the first-of-its-kind scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems. | [Link](http://arxiv.org/abs/2404.07972v1) | | ||
| Computer Science | Ferret-v2: An Improved Baseline for Referring and Grounding with Large Language Models | Haotian Zhang, Haoxuan You, Philipp Dufter, Bowen Zhang, Chen Chen, Hong-You Chen, Tsu-Jui Fu, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Yinfei Yang, Zhe Gan, William Yang Wang | The paper unveils Ferret-v2, a significant upgrade to Ferret, with three key designs: any resolution grounding and referring, multi-granularity visual encoding, and a three-stage training paradigm. | [Link](http://arxiv.org/abs/2404.07973v1) | | ||
| Computer Vision | OpenBias: Open-set Bias Detection in Text-to-Image Generative Models | Moreno D'Incà, Elia Peruzzo, Massimiliano Mancini, Dejia Xu, Vidit Goel, Xingqian Xu, Zhangyang Wang, Humphrey Shi, Nicu Sebe | The paper presents OpenBias, a pipeline for open-set bias detection in text-to-image generative models. It uses a Large Language Model (LLM) to propose biases given a set of captions, generates images using the same captions, and uses a Vision Question Answering model to recognize the biases. | [Link](http://arxiv.org/abs/2404.07990v1) | | ||
| AI & Marketing | Manipulating Large Language Models to Increase Product Visibility | Aounon Kumar, Himabindu Lakkaraju | The authors investigate whether recommendations from LLMs can be manipulated to enhance a product's visibility. They demonstrate that adding a strategic text sequence to a product's information page can significantly increase its likelihood of being listed as the LLM's top recommendation. | [Link](http://arxiv.org/abs/2404.07981v1) | | ||
| AI & Data Processing | LLoCO: Learning Long Contexts Offline | Sijun Tan, Xiuyu Li, Shishir Patil, Ziyang Wu, Tianjun Zhang, Kurt Keutzer, Joseph E. Gonzalez, Raluca Ada Popa | The paper presents LLoCO, a technique that combines context compression, retrieval, and parameter-efficient finetuning using LoRA. It extends the effective context window of a 4k token LLaMA2-7B model to handle up to 128k tokens. | [Link](http://arxiv.org/abs/2404.07979v1) | | ||
| AI & HCI | OSWorld: Benchmarking Multimodal Agents for Open-Ended Tasks in Real Computer Environments | Tianbao Xie, Danyang Zhang, Jixuan Chen, Xiaochuan Li, Siheng Zhao, Ruisheng Cao, Toh Jing Hua, Zhoujun Cheng, Dongchan Shin, Fangyu Lei, Yitao Liu, Yiheng Xu, Shuyan Zhou, Silvio Savarese, Caiming Xiong, Victor Zhong, Tao Yu | The authors introduce OSWorld, a scalable, real computer environment for multimodal agents, supporting task setup, execution-based evaluation, and interactive learning across various operating systems. | [Link](http://arxiv.org/abs/2404.07972v1) | | ||
| AI & Computer Vision | Ferret-v2: An Improved Baseline for Referring and Grounding with Large Language Models | Haotian Zhang, Haoxuan You, Philipp Dufter, Bowen Zhang, Chen Chen, Hong-You Chen, Tsu-Jui Fu, William Yang Wang, Shih-Fu Chang, Zhe Gan, Yinfei Yang | The paper presents Ferret-v2, an upgrade to Ferret, with three key designs: any resolution grounding and referring, multi-granularity visual encoding, and a three-stage training paradigm. | [Link](http://arxiv.org/abs/2404.07973v1) | | ||
| AI & Online Safety | Leveraging Large Language Models (LLMs) to Support Collaborative Human-AI Online Risk Data Annotation | Jinkyung Park, Pamela Wisniewski, Vivek Singh | The authors discuss the potential for leveraging LLMs as interactive research tools to facilitate collaboration between human coders and AI to effectively annotate online risk data at scale. | [Link](http://arxiv.org/abs/2404.07926v1) | | ||
| AI & Language Processing | LaVy: Vietnamese Multimodal Large Language Model | Chi Tran, Huong Le Thanh | The authors introduce LaVy, a state-of-the-art Vietnamese Multimodal Large Language Model, and LaVy-Bench, a benchmark for evaluating MLLMs's understanding on Vietnamese visual language tasks. | [Link](http://arxiv.org/abs/2404.07922v1) | | ||
| AI & Security | AmpleGCG: Learning a Universal and Transferable Generative Model of Adversarial Suffixes for Jailbreaking Both Open and Closed LLMs | Zeyi Liao, Huan Sun | The authors present AmpleGCG, a high-dimensional representation of human value distributions in LLMs, orthogonal to model architecture and training data. | [Link](http://arxiv.org/abs/2404.07921v1) | | ||
| AI & Ethics | High-Dimension Human Value Representation in Large Language Models | Samuel Cahyawijaya, Delong Chen, Yejin Bang, Leila Khalatbari, Bryan Wilie, Ziwei Ji, Etsuko Ishii, Pascale Fung | The authors propose UniVaR, a high-dimensional representation of human value distributions in LLMs, to understand the scope and nature of human values injected into these models before their release. | [Link](http://arxiv.org/abs/2404.07900v1) | | ||
| AI & Translation | Guiding Large Language Models to Post-Edit Machine Translation with Error Annotations | Dayeon Ki, Marine Carpuat | The authors exploit the complementary strengths of LLMs and supervised MT by guiding LLMs to automatically post-edit MT with external feedback on its quality, derived from Multidimensional Quality Metric (MQM) annotations. | [Link](http://arxiv.org/abs/2404.07851v1) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
import autogen | ||
|
||
|
||
def main(): | ||
config_list = autogen.config_list_from_json(env_or_file="OAI_CONFIG_LIST.json") | ||
|
||
# Create the agent that uses the LLM. | ||
assistant = autogen.AssistantAgent("assistant", llm_config={"config_list": config_list}) | ||
|
||
# Create the agent that represents the user in the conversation. | ||
user_proxy = autogen.UserProxyAgent("user_proxy", | ||
code_execution_config={"work_dir": "coding", "use_docker": False}) | ||
|
||
user_proxy.initiate_chat(assistant, message="Generate a function that acts like the 8 Ball with random phrases.") | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import json | ||
import autogen | ||
import pandas as pd | ||
import sqlite3 | ||
|
||
# setup the llm_configuration | ||
llm_config = { | ||
"config_list": autogen.config_list_from_json( | ||
env_or_file="OAI_CONFIG_LIST.json", | ||
), | ||
"temperature": 0, | ||
} | ||
|
||
# Start logging | ||
logging_session_id = autogen.runtime_logging.start(config={"dbname": "logs.db"}) | ||
print("Started Logging session ID: " + str(logging_session_id)) | ||
|
||
# Create an agent workflow and run it | ||
assistant = autogen.AssistantAgent(name="assistant", llm_config=llm_config) | ||
user_proxy = autogen.UserProxyAgent( | ||
name="user_proxy", | ||
code_execution_config=False, | ||
human_input_mode="NEVER", | ||
is_termination_msg=lambda msg: "TERMINATE" in msg["content"], | ||
) | ||
|
||
user_proxy.initiate_chat( | ||
assistant, message="What is the height of the Sears Tower? Only respond with the answer and terminate" | ||
) | ||
|
||
# Stop logging | ||
autogen.runtime_logging.stop() | ||
|
||
|
||
# create function to get log | ||
def get_log(dbname="logs.db", table="chat_completions"): | ||
con = sqlite3.connect(dbname) | ||
query = f"SELECT request, response, cost, start_time, end_time from {table}" | ||
cursor = con.execute(query) | ||
rows = cursor.fetchall() | ||
column_names = [description[0] for description in cursor.description] | ||
|
||
data = [dict(zip(column_names, row)) for row in rows] | ||
|
||
con.close() | ||
|
||
return data | ||
|
||
|
||
def str_to_dict(s): | ||
return json.loads(s) | ||
|
||
|
||
# use pandas to get extra information and print out to terminal | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
import autogen | ||
from autogen.agentchat.contrib.math_user_proxy_agent import MathUserProxyAgent | ||
|
||
# configuration | ||
config_list = autogen.config_list_from_json( | ||
"OAI_CONFIG_LIST.json", | ||
) | ||
|
||
llm_config = { | ||
"timeout": 600, | ||
"seed": 42, | ||
"config_list": config_list, | ||
} | ||
|
||
# create assistants | ||
assistant = autogen.AssistantAgent( | ||
name="assistant", | ||
system_message="You are a helpful assistant.", | ||
llm_config=llm_config | ||
) | ||
|
||
|
||
math_proxy_agent = MathUserProxyAgent( | ||
name="math_proxy_agent", | ||
human_input_mode="NEVER", | ||
code_execution_config={"use_docker": False}, | ||
) | ||
|
||
# the math problem and initiate chat | ||
# math_problem = ( | ||
# """ | ||
# Find all $x$ that satisfy the inequality $(2x+10)(x+3)<(3x+9)(x+8)$. | ||
# Express your answer in interval notation. | ||
# """ | ||
# ) | ||
|
||
math_problem = "Problem: If $725x + 727y = 1500$ and $729x+ 731y = 1508$, what is the value of $x - y$ ?" | ||
math_proxy_agent.initiate_chat(assistant, problem=math_problem) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
http://localhost:1234/v1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import torch | ||
import cv2 | ||
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler | ||
from diffusers.utils import export_to_video | ||
|
||
# pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16") | ||
# pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | ||
# pipe.enable_model_cpu_offload() | ||
|
||
# prompt = "Spiderman is surfing" | ||
# video_frames = pipe(prompt, num_inference_steps=25).frames | ||
# video_path = export_to_video(video_frames) | ||
|
||
pipe = DiffusionPipeline.from_pretrained("ali-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16) | ||
pipe.enable_model_cpu_offload() | ||
|
||
# memory optimization | ||
pipe.unet.enable_forward_chunking(chunk_size=1, dim=1) | ||
pipe.enable_vae_slicing() | ||
|
||
# prompt = "spiderman surfing a wave" | ||
# video_frames = pipe(prompt, num_frames=24).frames[0] | ||
|
||
prompt = "a cat surfing a wave" | ||
video_frames = pipe(prompt, num_frames=100).frames[0] | ||
|
||
video_path = export_to_video(video_frames, fps=30, output_video_path="vid1.mp4") | ||
|
||
|
||
# UPSCALE THIS AS WELL |