From 3dd27b85c88f209c2159aa1b53f65e2a7b371260 Mon Sep 17 00:00:00 2001 From: lironsh Date: Mon, 13 Jan 2025 17:09:50 +0200 Subject: [PATCH 1/9] feat: Add evaluation result to the prompt's context after each step - Add the result to the `PromptCreator` - Change the test - Update the snapshot --- src/utils/PromptCreator.test.ts | 4 ++-- src/utils/PromptCreator.ts | 1 + src/utils/__snapshots__/PromptCreator.test.ts.snap | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/utils/PromptCreator.test.ts b/src/utils/PromptCreator.test.ts index c460052..a2c0dab 100644 --- a/src/utils/PromptCreator.test.ts +++ b/src/utils/PromptCreator.test.ts @@ -83,12 +83,12 @@ describe('PromptCreator', () => { { step: 'navigate to login screen', code: 'await element(by.id("login")).tap();', - result: undefined + result: 'success' }, { step: 'enter username', code: 'await element(by.id("username")).typeText("john_doe");', - result: undefined + result: 'john doe' } ]; diff --git a/src/utils/PromptCreator.ts b/src/utils/PromptCreator.ts index 86c0a19..b72c248 100644 --- a/src/utils/PromptCreator.ts +++ b/src/utils/PromptCreator.ts @@ -107,6 +107,7 @@ export class PromptCreator { "```", previousStep.code, "```", + ...(previousStep.result ? [`- Result: ${previousStep.result}`] : []), "" ]).flat(), "" diff --git a/src/utils/__snapshots__/PromptCreator.test.ts.snap b/src/utils/__snapshots__/PromptCreator.test.ts.snap index 8f8afa8..ec26b5a 100644 --- a/src/utils/__snapshots__/PromptCreator.test.ts.snap +++ b/src/utils/__snapshots__/PromptCreator.test.ts.snap @@ -673,6 +673,7 @@ No snapshot image is attached for this intent. \`\`\` await element(by.id("login")).tap(); \`\`\` +- Result: success #### Step 2 - Intent: "enter username" @@ -680,6 +681,7 @@ await element(by.id("login")).tap(); \`\`\` await element(by.id("username")).typeText("john_doe"); \`\`\` +- Result: john doe ## Available Testing Framework API From bad37325c4decdc657c9f802778261edba803ae1 Mon Sep 17 00:00:00 2001 From: Asaf Korem Date: Fri, 17 Jan 2025 14:41:17 +0200 Subject: [PATCH 2/9] fix: add prompt instruction to redeclare variables if needed. --- src/utils/PromptCreator.ts | 1 + .../__snapshots__/PromptCreator.test.ts.snap | 30 +++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/utils/PromptCreator.ts b/src/utils/PromptCreator.ts index b72c248..877f57c 100644 --- a/src/utils/PromptCreator.ts +++ b/src/utils/PromptCreator.ts @@ -217,6 +217,7 @@ export class PromptCreator { } steps.push( "If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence.", + "Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step.", "Wrap the generated code with backticks, without any additional formatting.", "Do not provide any additional code beyond the minimal executable code required to perform the intent." ); diff --git a/src/utils/__snapshots__/PromptCreator.test.ts.snap b/src/utils/__snapshots__/PromptCreator.test.ts.snap index ec26b5a..bf1cac8 100644 --- a/src/utils/__snapshots__/PromptCreator.test.ts.snap +++ b/src/utils/__snapshots__/PromptCreator.test.ts.snap @@ -123,8 +123,9 @@ Please follow these steps carefully: 1. Analyze the provided intent and the view hierarchy to understand the required action. 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. -4. Wrap the generated code with backticks, without any additional formatting. -5. Do not provide any additional code beyond the minimal executable code required to perform the intent. +4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. +5. Wrap the generated code with backticks, without any additional formatting. +6. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -251,8 +252,9 @@ Please follow these steps carefully: 1. Analyze the provided intent and the view hierarchy to understand the required action. 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. -4. Wrap the generated code with backticks, without any additional formatting. -5. Do not provide any additional code beyond the minimal executable code required to perform the intent. +4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. +5. Wrap the generated code with backticks, without any additional formatting. +6. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -393,8 +395,9 @@ Please follow these steps carefully: 1. Analyze the provided intent and the view hierarchy to understand the required action. 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. -4. Wrap the generated code with backticks, without any additional formatting. -5. Do not provide any additional code beyond the minimal executable code required to perform the intent. +4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. +5. Wrap the generated code with backticks, without any additional formatting. +6. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -509,8 +512,9 @@ Please follow these steps carefully: 5. If the visual assertion fails, return code that throws an informative error explaining the failure. 6. If visual validation is not possible, proceed to generate the minimal executable code required to perform the intent. 7. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. -8. Wrap the generated code with backticks, without any additional formatting. -9. Do not provide any additional code beyond the minimal executable code required to perform the intent. +8. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. +9. Wrap the generated code with backticks, without any additional formatting. +10. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -626,8 +630,9 @@ Please follow these steps carefully: 1. Analyze the provided intent and the view hierarchy to understand the required action. 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. -4. Wrap the generated code with backticks, without any additional formatting. -5. Do not provide any additional code beyond the minimal executable code required to perform the intent. +4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. +5. Wrap the generated code with backticks, without any additional formatting. +6. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -757,8 +762,9 @@ Please follow these steps carefully: 1. Analyze the provided intent and the view hierarchy to understand the required action. 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. -4. Wrap the generated code with backticks, without any additional formatting. -5. Do not provide any additional code beyond the minimal executable code required to perform the intent. +4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. +5. Wrap the generated code with backticks, without any additional formatting. +6. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt From 2bfaaf7190c79e26460480e415ef6472ee9cb1d7 Mon Sep 17 00:00:00 2001 From: Asaf Korem Date: Fri, 17 Jan 2025 14:42:25 +0200 Subject: [PATCH 3/9] 0.0.29 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index b9afc4b..69a1a5d 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "detox-copilot", - "version": "0.0.28", + "version": "0.0.29", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "detox-copilot", - "version": "0.0.28", + "version": "0.0.29", "license": "MIT", "devDependencies": { "@types/jest": "^29.5.12", diff --git a/package.json b/package.json index 92bc4aa..530eeba 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "detox-copilot", - "version": "0.0.28", + "version": "0.0.29", "description": "A flexible plugin that drives your tests with human-written commands, enhanced by the power of large language models (LLMs)", "keywords": [ "detox", From 2a89951b25e3614e488cf3f1dd77d78cdb22b9b3 Mon Sep 17 00:00:00 2001 From: Asaf Korem Date: Fri, 17 Jan 2025 14:44:34 +0200 Subject: [PATCH 4/9] feat(prompt): enforce API usage and step independence in LLM instructions --- src/utils/PromptCreator.ts | 1 + .../__snapshots__/PromptCreator.test.ts.snap | 30 +++++++++++-------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/src/utils/PromptCreator.ts b/src/utils/PromptCreator.ts index 877f57c..2c75891 100644 --- a/src/utils/PromptCreator.ts +++ b/src/utils/PromptCreator.ts @@ -218,6 +218,7 @@ export class PromptCreator { steps.push( "If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence.", "Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step.", + "Use the provided framework APIs as much as possible - prefer using the documented API methods over creating custom implementations.", "Wrap the generated code with backticks, without any additional formatting.", "Do not provide any additional code beyond the minimal executable code required to perform the intent." ); diff --git a/src/utils/__snapshots__/PromptCreator.test.ts.snap b/src/utils/__snapshots__/PromptCreator.test.ts.snap index bf1cac8..b11a72e 100644 --- a/src/utils/__snapshots__/PromptCreator.test.ts.snap +++ b/src/utils/__snapshots__/PromptCreator.test.ts.snap @@ -124,8 +124,9 @@ Please follow these steps carefully: 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. 4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. -5. Wrap the generated code with backticks, without any additional formatting. -6. Do not provide any additional code beyond the minimal executable code required to perform the intent. +5. Use the provided framework APIs as much as possible - prefer using the documented API methods over creating custom implementations. +6. Wrap the generated code with backticks, without any additional formatting. +7. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -253,8 +254,9 @@ Please follow these steps carefully: 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. 4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. -5. Wrap the generated code with backticks, without any additional formatting. -6. Do not provide any additional code beyond the minimal executable code required to perform the intent. +5. Use the provided framework APIs as much as possible - prefer using the documented API methods over creating custom implementations. +6. Wrap the generated code with backticks, without any additional formatting. +7. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -396,8 +398,9 @@ Please follow these steps carefully: 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. 4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. -5. Wrap the generated code with backticks, without any additional formatting. -6. Do not provide any additional code beyond the minimal executable code required to perform the intent. +5. Use the provided framework APIs as much as possible - prefer using the documented API methods over creating custom implementations. +6. Wrap the generated code with backticks, without any additional formatting. +7. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -513,8 +516,9 @@ Please follow these steps carefully: 6. If visual validation is not possible, proceed to generate the minimal executable code required to perform the intent. 7. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. 8. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. -9. Wrap the generated code with backticks, without any additional formatting. -10. Do not provide any additional code beyond the minimal executable code required to perform the intent. +9. Use the provided framework APIs as much as possible - prefer using the documented API methods over creating custom implementations. +10. Wrap the generated code with backticks, without any additional formatting. +11. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -631,8 +635,9 @@ Please follow these steps carefully: 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. 4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. -5. Wrap the generated code with backticks, without any additional formatting. -6. Do not provide any additional code beyond the minimal executable code required to perform the intent. +5. Use the provided framework APIs as much as possible - prefer using the documented API methods over creating custom implementations. +6. Wrap the generated code with backticks, without any additional formatting. +7. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt @@ -763,8 +768,9 @@ Please follow these steps carefully: 2. Generate the minimal executable code required to perform the intent using the available API. 3. If you cannot generate the relevant code due to ambiguity or invalid intent, return code that throws an informative error explaining the problem in one sentence. 4. Each step must be completely independent - do not rely on any variables or assignments from previous steps. Even if a variable was declared or assigned in a previous step, you must redeclare and reassign it in your current step. -5. Wrap the generated code with backticks, without any additional formatting. -6. Do not provide any additional code beyond the minimal executable code required to perform the intent. +5. Use the provided framework APIs as much as possible - prefer using the documented API methods over creating custom implementations. +6. Wrap the generated code with backticks, without any additional formatting. +7. Do not provide any additional code beyond the minimal executable code required to perform the intent. ### Verify the prompt From cf4054624ac7115339d006e2c222677be7c66bea Mon Sep 17 00:00:00 2001 From: Asaf Korem Date: Fri, 17 Jan 2025 14:54:50 +0200 Subject: [PATCH 5/9] 0.0.30 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 69a1a5d..deb34c3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "detox-copilot", - "version": "0.0.29", + "version": "0.0.30", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "detox-copilot", - "version": "0.0.29", + "version": "0.0.30", "license": "MIT", "devDependencies": { "@types/jest": "^29.5.12", diff --git a/package.json b/package.json index 530eeba..4655796 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "detox-copilot", - "version": "0.0.29", + "version": "0.0.30", "description": "A flexible plugin that drives your tests with human-written commands, enhanced by the power of large language models (LLMs)", "keywords": [ "detox", From 44334c8e20e644afc334357348bdbb94499e6d81 Mon Sep 17 00:00:00 2001 From: Asaf Korem Date: Sat, 18 Jan 2025 00:01:12 +0200 Subject: [PATCH 6/9] feat: support cache-mode config. --- src/Copilot.ts | 3 +- src/actions/StepPerformer.test.ts | 68 ++++++++++++++++++++++++++++- src/actions/StepPerformer.ts | 21 +++++++-- src/integration tests/index.test.ts | 59 +++++++++++++++++++++++++ src/types.ts | 27 ++++++++++++ 5 files changed, 173 insertions(+), 5 deletions(-) diff --git a/src/Copilot.ts b/src/Copilot.ts index ad7615d..708dbe3 100644 --- a/src/Copilot.ts +++ b/src/Copilot.ts @@ -33,7 +33,8 @@ export class Copilot { this.codeEvaluator, this.snapshotManager, config.promptHandler, - this.cacheHandler + this.cacheHandler, + config.options?.cacheMode ); } diff --git a/src/actions/StepPerformer.test.ts b/src/actions/StepPerformer.test.ts index 48a7f81..28d909c 100644 --- a/src/actions/StepPerformer.test.ts +++ b/src/actions/StepPerformer.test.ts @@ -3,7 +3,7 @@ import {PromptCreator} from '@/utils/PromptCreator'; import {CodeEvaluator} from '@/utils/CodeEvaluator'; import {SnapshotManager} from '@/utils/SnapshotManager'; import {CacheHandler} from '@/utils/CacheHandler'; -import {PromptHandler, TestingFrameworkAPICatalog} from '@/types'; +import {CacheMode, PromptHandler, TestingFrameworkAPICatalog} from '@/types'; import * as crypto from 'crypto'; import {dummyContext, dummyBarContext1, dummyBarContext2} from "../test-utils/APICatalogTestUtils"; @@ -26,9 +26,13 @@ describe('StepPerformer', () => { let mockSnapshotManager: jest.Mocked; let mockPromptHandler: jest.Mocked; let mockCacheHandler: jest.Mocked; + let uuidCounter = 0; beforeEach(() => { jest.resetAllMocks(); + uuidCounter = 0; + + (crypto.randomUUID as jest.Mock).mockImplementation(() => `uuid-${uuidCounter++}`); const apiCatalog: TestingFrameworkAPICatalog = { context: {}, @@ -318,4 +322,66 @@ describe('StepPerformer', () => { }); }); + + describe('cache modes', () => { + const testCacheModes = async (cacheMode: CacheMode) => { + const generatedKeys: string[] = []; + mockCacheHandler.addToTemporaryCache.mockImplementation((key: string) => { + generatedKeys.push(key); + }); + + stepPerformer = new StepPerformer( + mockContext, + mockPromptCreator, + mockCodeEvaluator, + mockSnapshotManager, + mockPromptHandler, + mockCacheHandler, + cacheMode + ); + + setupMocks({ + promptResult: '```\nconst code = true;\n```', + codeEvaluationResult: 'success' + }); + await stepPerformer.perform(INTENT); + return generatedKeys[0]; + }; + + it('should include view hierarchy hash in cache key when mode is full', async () => { + const cacheKey = await testCacheModes('full'); + const parsedKey = JSON.parse(cacheKey); + expect(parsedKey).toHaveProperty('viewHierarchyHash'); + expect(parsedKey.viewHierarchyHash).toBe('hash'); + }); + + it('should not include view hierarchy hash in cache key when mode is lightweight', async () => { + const cacheKey = await testCacheModes('lightweight'); + const parsedKey = JSON.parse(cacheKey); + expect(parsedKey).not.toHaveProperty('viewHierarchyHash'); + }); + + it('should generate unique cache keys when mode is disabled', async () => { + const firstKey = await testCacheModes('disabled'); + const secondKey = await testCacheModes('disabled'); + expect(firstKey).not.toBe(secondKey); + }); + + it('should not use cache when mode is disabled', async () => { + stepPerformer = new StepPerformer( + mockContext, + mockPromptCreator, + mockCodeEvaluator, + mockSnapshotManager, + mockPromptHandler, + mockCacheHandler, + 'disabled' + ); + + setupMocks({ cacheExists: true }); + await stepPerformer.perform(INTENT); + + expect(mockPromptHandler.runPrompt).toHaveBeenCalled(); + }); + }); }); diff --git a/src/actions/StepPerformer.ts b/src/actions/StepPerformer.ts index c39d5e5..a561cdb 100644 --- a/src/actions/StepPerformer.ts +++ b/src/actions/StepPerformer.ts @@ -2,13 +2,15 @@ import {PromptCreator} from '@/utils/PromptCreator'; import {CodeEvaluator} from '@/utils/CodeEvaluator'; import {SnapshotManager} from '@/utils/SnapshotManager'; import {CacheHandler} from '@/utils/CacheHandler'; -import {CodeEvaluationResult, PreviousStep, PromptHandler} from '@/types'; +import {CacheMode, CodeEvaluationResult, PreviousStep, PromptHandler} from '@/types'; import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; import {extractCodeBlock} from '@/utils/extractCodeBlock'; export class StepPerformer { + private readonly cacheMode: CacheMode; + constructor( private context: any, private promptCreator: PromptCreator, @@ -16,7 +18,9 @@ export class StepPerformer { private snapshotManager: SnapshotManager, private promptHandler: PromptHandler, private cacheHandler: CacheHandler, + cacheMode: CacheMode = 'full', ) { + this.cacheMode = cacheMode; } extendJSContext(newContext: any): void { @@ -30,8 +34,19 @@ export class StepPerformer { } private generateCacheKey(step: string, previous: PreviousStep[], viewHierarchy: string): string { - const viewHierarchyHash = crypto.createHash('md5').update(viewHierarchy).digest('hex'); - return JSON.stringify({step, previous, viewHierarchyHash}); + if (this.cacheMode === 'disabled') { + // Return a unique key that won't match any cached value + return crypto.randomUUID(); + } + + const cacheKeyData: any = {step, previous}; + + if (this.cacheMode === 'full') { + const viewHierarchyHash = crypto.createHash('md5').update(viewHierarchy).digest('hex'); + cacheKeyData.viewHierarchyHash = viewHierarchyHash; + } + + return JSON.stringify(cacheKeyData); } private async captureSnapshotAndViewHierarchy() { diff --git a/src/integration tests/index.test.ts b/src/integration tests/index.test.ts index 0901316..71b0832 100644 --- a/src/integration tests/index.test.ts +++ b/src/integration tests/index.test.ts @@ -356,4 +356,63 @@ describe('Copilot Integration Tests', () => { expect(spyStepPerformer).toHaveBeenCalledTimes(1); }); }); + + describe('Cache Modes', () => { + beforeEach(() => { + mockPromptHandler.runPrompt.mockResolvedValue('// No operation'); + }); + + it('should use full cache mode by default', async () => { + copilot.init({ + frameworkDriver: mockFrameworkDriver, + promptHandler: mockPromptHandler + }); + copilot.start(); + + await copilot.perform('Tap on the login button'); + copilot.end(); + + expect(Object.keys(mockedCacheFile || {})[0]).toContain('viewHierarchyHash'); + }); + + it('should not include view hierarchy in cache key when using lightweight mode', async () => { + copilot.init({ + frameworkDriver: mockFrameworkDriver, + promptHandler: mockPromptHandler, + options: { + cacheMode: 'lightweight' + } + }); + copilot.start(); + + await copilot.perform('Tap on the login button'); + copilot.end(); + + const cacheKeys = Object.keys(mockedCacheFile || {}); + expect(cacheKeys[0]).not.toContain('viewHierarchyHash'); + }); + + it('should not use cache when cache mode is disabled', async () => { + copilot.init({ + frameworkDriver: mockFrameworkDriver, + promptHandler: mockPromptHandler, + options: { + cacheMode: 'disabled' + } + }); + copilot.start(); + + // First call + await copilot.perform('Tap on the login button'); + copilot.end(); + + // Second call with same intent + copilot.start(); + await copilot.perform('Tap on the login button'); + copilot.end(); + + // Should call runPrompt twice since cache is disabled + expect(mockPromptHandler.runPrompt).toHaveBeenCalledTimes(2); + }); + }); }); diff --git a/src/types.ts b/src/types.ts index ebbd3c9..22f85e2 100644 --- a/src/types.ts +++ b/src/types.ts @@ -135,8 +135,30 @@ export interface PromptHandler { isSnapshotImageSupported: () => boolean; } +/** + * The cache mode for the Copilot. + * - 'disabled': No caching is used + * - 'lightweight': Cache is used but only based on steps (without view hierarchy) + * - 'full': Cache is used with view hierarchy (default) + */ +export type CacheMode = 'disabled' | 'lightweight' | 'full'; + +/** + * Configuration options for the Copilot behavior. + */ +export interface CopilotOptions { + /** + * The cache mode to use. + * @default 'full' + */ + cacheMode?: CacheMode; +} + /** * Configuration options for Copilot. + * @property frameworkDriver The testing driver to use for interacting with the underlying testing framework. + * @property promptHandler The prompt handler to use for interacting with the AI service + * @property options Additional options for configuring Copilot behavior */ export interface Config { /** @@ -148,6 +170,11 @@ export interface Config { * The prompt handler to use for interacting with the AI service */ promptHandler: PromptHandler; + + /** + * Additional options for configuring Copilot behavior + */ + options?: CopilotOptions; } /** From 0a1bd91d7221e5988c53ed9fbe45fcae2f46053d Mon Sep 17 00:00:00 2001 From: Asaf Korem Date: Sat, 18 Jan 2025 16:54:15 +0200 Subject: [PATCH 7/9] docs: update API docs. --- src/types.ts | 9 +- website/docs/API/basic-interface-overview.md | 235 +++++++++++++++++-- 2 files changed, 223 insertions(+), 21 deletions(-) diff --git a/src/types.ts b/src/types.ts index 22f85e2..1747bf0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -137,11 +137,12 @@ export interface PromptHandler { /** * The cache mode for the Copilot. - * - 'disabled': No caching is used - * - 'lightweight': Cache is used but only based on steps (without view hierarchy) - * - 'full': Cache is used with view hierarchy (default) + * - 'full': Cache is used with the screen state (default) + * - 'lightweight': Cache is used but only based on steps (without screen state) + * - 'disabled': No caching is used + * @default 'full' */ -export type CacheMode = 'disabled' | 'lightweight' | 'full'; +export type CacheMode = 'full' | 'lightweight' | 'disabled'; /** * Configuration options for the Copilot behavior. diff --git a/website/docs/API/basic-interface-overview.md b/website/docs/API/basic-interface-overview.md index 41c6bf2..4743f16 100644 --- a/website/docs/API/basic-interface-overview.md +++ b/website/docs/API/basic-interface-overview.md @@ -6,38 +6,239 @@ sidebar_position: 1 --- # Basic Interface Overview -The Copilot class serves as the core of the testing process, allowing seamless interaction between natural language prompts and your testing framework. Below is an overview of its main lifecycle commands that help control the test flow: -## 1. `init(config: Config): void` - The init method initializes the Copilot instance with the provided configuration. This must be called before using Copilot to ensure it is set up with the necessary framework drivers and prompt handlers. +The Testing Copilot provides a simple yet powerful interface for controlling your test flows. This document covers the core API methods and configuration options. + +## API Methods + +### init() + +```typescript +init(config: Config): void +``` + +Initializes the Copilot instance. Must be called before any other methods and only once in your test environment. + +:::note +This must be called once before using any other Copilot methods, as it sets up the instance and configuration. +::: + +Basic initialization example: +```typescript +import copilot from 'detox-copilot'; +import { DetoxDriver } from 'your-testing-framework-driver'; // Replace with your actual driver +import { OpenAIHandler } from 'your-ai-service-handler'; // Replace with your actual handler + +copilot.init({ + frameworkDriver: new DetoxDriver(), + promptHandler: new OpenAIHandler({ + apiKey: process.env.OPENAI_API_KEY + }) +}); +``` + +See [Configuration](#configuration) for more information on the `config` object. + +### isInitialized() + +```typescript +isInitialized(): boolean +``` + +Checks if the Copilot instance has been initialized. ```typescript -Copilot.init(config); +if (!copilot.isInitialized()) { + // Initialize copilot + copilot.init(config); +} ``` -## 2. `start(): void` - The start method begins a new test flow, resetting previous steps and clearing any temporary cache. It must be called before performing any steps in the test. +This is useful for ensuring that the Copilot is properly initialized before performing any actions. + +### start() +```typescript +start(): void +``` + +Begins a new test flow, resetting previous steps and clearing temporary cache. + +:::note +Must be called before performing any steps. If called while a flow is already active, it will throw an error. +::: + +Starting a new test flow: ```typescript copilot.start(); ``` -Note: Calling start after an active test flow has already been started will result in an error. Be sure to call end() before starting a new flow. -## 3. `performStep(step: string): Promise` - The performStep method allows Copilot to perform a test step based on a natural language prompt. The input step is parsed and evaluated by Copilot, interacting with the underlying framework to execute the corresponding action. +### perform() + +```typescript +perform(...steps: string[]): Promise +``` + +Executes one or more test steps using natural language. Returns the result of the last step. + +:::note +Requires an active test flow (initiated by `start()`), otherwise it will throw an error. +::: + +Single step example: +```typescript +// Perform a simple click action +const result = await copilot.perform("Click the login button"); +``` + +Multiple steps example: +```typescript +// Execute multiple steps in sequence +const result = await copilot.perform( + "Click the login button", + "Type 'user@example.com' into the email field", + "The login form should be visible" +); +``` + +### end() + +```typescript +end(isCacheDisabled?: boolean): void +``` + +Concludes the test flow and optionally disables caching of the results. +Ending with default cache behavior: ```typescript -const result = await copilot.performStep("Click the login button"); +// Save results to cache (default behavior) +copilot.end(); ``` -If Copilot is not running (i.e., start() has not been called), an error will be thrown. -## 4. `end(saveToCache: boolean = true): void` - The end method concludes the test flow. It can optionally save temporary data to the main cache, ensuring any relevant information is retained for future tests. +Ending with cache disabled: +```typescript +// Skip saving to cache +copilot.end(true); +``` + +Ending with cache disabled is usually done when a test is failing and you want to ensure that the next test run is not affected by the previous test's results. + +### extendAPICatalog() + +```typescript +extendAPICatalog(categories: TestingFrameworkAPICatalogCategory[], context?: any): void +``` + +Extends the API catalog with additional testing framework capabilities. + +```typescript +copilot.extendAPICatalog([ + { + title: 'Deeplink Actions', + items: [ + { + signature: 'navigateToDeeplink(url: string)', + description: 'Navigates to a given deeplink URL', + example: 'await navigateToDeeplink("/home");', + guidelines: [ + 'This action should be used to navigate to a specific screen in the app.', + 'The URL should be a relative path, starting with a forward slash.' + ] + } + ] + } +]); +``` + +This is useful for adding custom actions to the Copilot's API catalog, which can be used in natural language prompts. + +## Configuration + +### Config Interface + +The configuration interface defines how to set up Copilot with your testing framework and AI service: ```typescript -copilot.end(true); // Save to cache +interface Config { + frameworkDriver: TestingFrameworkDriver; + promptHandler: PromptHandler; + options?: CopilotOptions; +} + +interface CopilotOptions { + cacheMode?: 'full' | 'lightweight' | 'disabled'; +} ``` -Note: The end method should be called when the test flow is complete, and start() must be invoked again before starting a new test. -## Error Handling -If any method is called out of sequence, such as trying to perform steps without starting Copilot, or attempting to start Copilot while it is already running, the class will throw a CopilotError. This ensures that the test flow is controlled and prevents inconsistent states. +#### Cache Modes + +Cache mode is used to determine how the Copilot will cache the code generated for each step. +Default cache mode is `full`. + +- **full**: Cache is used with the screen state (default) +- **lightweight**: Cache is used but only based on steps (without screen state) +- **disabled**: No caching is used + +### Framework Drivers + +:::note Available Drivers +The `frameworkDriver` supports various testing frameworks, see [Framework Drivers](/docs/API/framework-driver) for more information. +::: + +Basic driver initialization: +```typescript +const driver = new DetoxDriver(); +``` + +### Prompt Handlers + +The `promptHandler` manages communication with AI services. + +Setting up OpenAI as the AI service: +```typescript +const handler = new OpenAIHandler({ + apiKey: process.env.OPENAI_API_KEY +}); +``` + +## API Basic Usage Notes + +- Always call methods in sequence: `init` → `start` → `perform` → `end` +- Handle errors appropriately using try-catch blocks +- Clean up resources by calling `end()` after each test flow +- Use multiple steps in a single `perform` call for related actions + +### Error Handling + +The Copilot will throw a `CopilotError` when: +- Methods are called out of sequence +- A flow is started while another is active +- Steps are performed without an active flow (e.g. `perform` without `start`) +- Configuration is invalid or missing required fields + +Complete flow with error handling: +```typescript +// Check initialization +if (!copilot.isInitialized()) { + copilot.init(config); +} + +// Start the flow +copilot.start(); + +try { + // Perform steps, if any error occurs, the flow will be ended and the error will be thrown + const result = await copilot.perform( + "Click the login button", + "Type 'test@example.com' into the email field", + "The login form should be visible" + ); +} catch (error) { + // Disable cache on error to avoid caching the failed flow + copilot.end(true); + throw error; +} + +// End the flow (with default cache behavior) +copilot.end(); +``` From 01938428159d1874645db3e31e90e0175d886dd0 Mon Sep 17 00:00:00 2001 From: Asaf Korem Date: Sun, 19 Jan 2025 11:11:51 +0200 Subject: [PATCH 8/9] 0.0.31 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index deb34c3..07a2175 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "detox-copilot", - "version": "0.0.30", + "version": "0.0.31", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "detox-copilot", - "version": "0.0.30", + "version": "0.0.31", "license": "MIT", "devDependencies": { "@types/jest": "^29.5.12", diff --git a/package.json b/package.json index 4655796..057ecd1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "detox-copilot", - "version": "0.0.30", + "version": "0.0.31", "description": "A flexible plugin that drives your tests with human-written commands, enhanced by the power of large language models (LLMs)", "keywords": [ "detox", From 926f550177585d5c1c2bb944d2b48d782a32458e Mon Sep 17 00:00:00 2001 From: Asaf Korem Date: Sun, 19 Jan 2025 23:25:21 +0200 Subject: [PATCH 9/9] feat(driver): add framework info fields in TestingFrameworkAPICatalog --- src/test-utils/APICatalogTestUtils.ts | 2 ++ src/types.ts | 4 ++++ src/utils/PromptCreator.ts | 19 ++++++++++++++++++- .../__snapshots__/PromptCreator.test.ts.snap | 6 ++++++ website/docs/API/framework-driver.md | 12 ++++++++---- 5 files changed, 38 insertions(+), 5 deletions(-) diff --git a/src/test-utils/APICatalogTestUtils.ts b/src/test-utils/APICatalogTestUtils.ts index dd8a73d..9babb9f 100644 --- a/src/test-utils/APICatalogTestUtils.ts +++ b/src/test-utils/APICatalogTestUtils.ts @@ -42,6 +42,8 @@ export const dummyBarContext2 = {bar: jest.fn()}; export const promptCreatorConstructorMockAPI: TestingFrameworkAPICatalog = { context: {}, + name: 'Test Framework', + description: 'A testing framework for unit testing purposes', categories: [ { title: 'Actions', diff --git a/src/types.ts b/src/types.ts index 1747bf0..6ca6af0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -74,10 +74,14 @@ export interface TestingFrameworkDriver { /** * Represents the available API of the testing framework that can be used by Copilot. + * @property name Optional name of the testing framework (e.g. "Detox", "Jest", etc.). + * @property description Optional description of the testing framework's purpose and capabilities. * @property context The available variables of the testing framework (i.e. exposes the matching function, expect, etc.). * @property categories The available categories of the testing framework API. */ export type TestingFrameworkAPICatalog = { + name?: string; + description?: string; context: any; categories: TestingFrameworkAPICatalogCategory[]; } diff --git a/src/utils/PromptCreator.ts b/src/utils/PromptCreator.ts index 2c75891..e24d461 100644 --- a/src/utils/PromptCreator.ts +++ b/src/utils/PromptCreator.ts @@ -50,13 +50,30 @@ export class PromptCreator { } private createBasePrompt(): string[] { - return [ + const basePrompt = [ "# Test Code Generation", "", "You are an AI assistant tasked with generating test code for an application using the provided UI testing framework API.", "Please generate the minimal executable code to perform the desired intent based on the given information and context.", "" ]; + + if (this.apiCatalog.name || this.apiCatalog.description) { + basePrompt.push("## Testing Framework"); + basePrompt.push(""); + + if (this.apiCatalog.name) { + basePrompt.push(`Framework: ${this.apiCatalog.name}`); + basePrompt.push(""); + } + + if (this.apiCatalog.description) { + basePrompt.push(`Description: ${this.apiCatalog.description}`); + basePrompt.push(""); + } + } + + return basePrompt; } private createContext( diff --git a/src/utils/__snapshots__/PromptCreator.test.ts.snap b/src/utils/__snapshots__/PromptCreator.test.ts.snap index b11a72e..3172f88 100644 --- a/src/utils/__snapshots__/PromptCreator.test.ts.snap +++ b/src/utils/__snapshots__/PromptCreator.test.ts.snap @@ -6,6 +6,12 @@ exports[`PromptCreator constructor should merge redundant categories 1`] = ` You are an AI assistant tasked with generating test code for an application using the provided UI testing framework API. Please generate the minimal executable code to perform the desired intent based on the given information and context. +## Testing Framework + +Framework: Test Framework + +Description: A testing framework for unit testing purposes + ## Context ### Intent to perform diff --git a/website/docs/API/framework-driver.md b/website/docs/API/framework-driver.md index 418a571..1f03a57 100644 --- a/website/docs/API/framework-driver.md +++ b/website/docs/API/framework-driver.md @@ -20,10 +20,12 @@ By implementing a custom driver, you enable **Copilot** to communicate with your The `TestingFrameworkDriver` interface defines the essential methods that a driver should implement: - **`captureSnapshotImage`**: Takes a snapshot of the current screen and returns the path to the saved image. If the driver does not support snapshot functionality, it should return `undefined`. -- **`captureViewHierarchyString`**: Returns the current view hierarchy in a string representation, which helps the AI understand the structure of the app’s UI. -- **`apiCatalog`**: Provides access to the available methods of the testing framework's API, such as matchers and actions. +- **`captureViewHierarchyString`**: Returns the current view hierarchy in a string representation, which helps the AI understand the structure of the app's UI. +- **`apiCatalog`**: Provides access to the available methods of the testing framework's API, such as matchers and actions. The catalog can also include optional framework information: + - `name`: The name of the testing framework (e.g., "Detox", "Jest") + - `description`: A description of the framework's purpose and capabilities -Here’s the interface definition for the driver: +Here's the interface definition for the driver: ```typescript /** @@ -42,7 +44,7 @@ export interface TestingFrameworkDriver { captureViewHierarchyString: () => Promise; /** - * The available guides methods of the testing framework. + * The available API methods of the testing framework. */ apiCatalog: TestingFrameworkAPICatalog; } @@ -58,6 +60,8 @@ const detox = require('../..'); const detoxCopilotFrameworkDriver = { apiCatalog: { context: { ...detox, jestExpect }, + name: 'Detox', + description: 'End-to-end testing and automation framework for mobile apps', categories: [ { title: 'Matchers',