diff --git a/src/Copilot.ts b/src/Copilot.ts index ad7615d..708dbe3 100644 --- a/src/Copilot.ts +++ b/src/Copilot.ts @@ -33,7 +33,8 @@ export class Copilot { this.codeEvaluator, this.snapshotManager, config.promptHandler, - this.cacheHandler + this.cacheHandler, + config.options?.cacheMode ); } diff --git a/src/actions/StepPerformer.test.ts b/src/actions/StepPerformer.test.ts index 48a7f81..28d909c 100644 --- a/src/actions/StepPerformer.test.ts +++ b/src/actions/StepPerformer.test.ts @@ -3,7 +3,7 @@ import {PromptCreator} from '@/utils/PromptCreator'; import {CodeEvaluator} from '@/utils/CodeEvaluator'; import {SnapshotManager} from '@/utils/SnapshotManager'; import {CacheHandler} from '@/utils/CacheHandler'; -import {PromptHandler, TestingFrameworkAPICatalog} from '@/types'; +import {CacheMode, PromptHandler, TestingFrameworkAPICatalog} from '@/types'; import * as crypto from 'crypto'; import {dummyContext, dummyBarContext1, dummyBarContext2} from "../test-utils/APICatalogTestUtils"; @@ -26,9 +26,13 @@ describe('StepPerformer', () => { let mockSnapshotManager: jest.Mocked; let mockPromptHandler: jest.Mocked; let mockCacheHandler: jest.Mocked; + let uuidCounter = 0; beforeEach(() => { jest.resetAllMocks(); + uuidCounter = 0; + + (crypto.randomUUID as jest.Mock).mockImplementation(() => `uuid-${uuidCounter++}`); const apiCatalog: TestingFrameworkAPICatalog = { context: {}, @@ -318,4 +322,66 @@ describe('StepPerformer', () => { }); }); + + describe('cache modes', () => { + const testCacheModes = async (cacheMode: CacheMode) => { + const generatedKeys: string[] = []; + mockCacheHandler.addToTemporaryCache.mockImplementation((key: string) => { + generatedKeys.push(key); + }); + + stepPerformer = new StepPerformer( + mockContext, + mockPromptCreator, + mockCodeEvaluator, + mockSnapshotManager, + mockPromptHandler, + mockCacheHandler, + cacheMode + ); + + setupMocks({ + promptResult: '```\nconst code = true;\n```', + codeEvaluationResult: 'success' + }); + await stepPerformer.perform(INTENT); + return generatedKeys[0]; + }; + + it('should include view hierarchy hash in cache key when mode is full', async () => { + const cacheKey = await testCacheModes('full'); + const parsedKey = JSON.parse(cacheKey); + expect(parsedKey).toHaveProperty('viewHierarchyHash'); + expect(parsedKey.viewHierarchyHash).toBe('hash'); + }); + + it('should not include view hierarchy hash in cache key when mode is lightweight', async () => { + const cacheKey = await testCacheModes('lightweight'); + const parsedKey = JSON.parse(cacheKey); + expect(parsedKey).not.toHaveProperty('viewHierarchyHash'); + }); + + it('should generate unique cache keys when mode is disabled', async () => { + const firstKey = await testCacheModes('disabled'); + const secondKey = await testCacheModes('disabled'); + expect(firstKey).not.toBe(secondKey); + }); + + it('should not use cache when mode is disabled', async () => { + stepPerformer = new StepPerformer( + mockContext, + mockPromptCreator, + mockCodeEvaluator, + mockSnapshotManager, + mockPromptHandler, + mockCacheHandler, + 'disabled' + ); + + setupMocks({ cacheExists: true }); + await stepPerformer.perform(INTENT); + + expect(mockPromptHandler.runPrompt).toHaveBeenCalled(); + }); + }); }); diff --git a/src/actions/StepPerformer.ts b/src/actions/StepPerformer.ts index c39d5e5..a561cdb 100644 --- a/src/actions/StepPerformer.ts +++ b/src/actions/StepPerformer.ts @@ -2,13 +2,15 @@ import {PromptCreator} from '@/utils/PromptCreator'; import {CodeEvaluator} from '@/utils/CodeEvaluator'; import {SnapshotManager} from '@/utils/SnapshotManager'; import {CacheHandler} from '@/utils/CacheHandler'; -import {CodeEvaluationResult, PreviousStep, PromptHandler} from '@/types'; +import {CacheMode, CodeEvaluationResult, PreviousStep, PromptHandler} from '@/types'; import * as fs from 'fs'; import * as path from 'path'; import * as crypto from 'crypto'; import {extractCodeBlock} from '@/utils/extractCodeBlock'; export class StepPerformer { + private readonly cacheMode: CacheMode; + constructor( private context: any, private promptCreator: PromptCreator, @@ -16,7 +18,9 @@ export class StepPerformer { private snapshotManager: SnapshotManager, private promptHandler: PromptHandler, private cacheHandler: CacheHandler, + cacheMode: CacheMode = 'full', ) { + this.cacheMode = cacheMode; } extendJSContext(newContext: any): void { @@ -30,8 +34,19 @@ export class StepPerformer { } private generateCacheKey(step: string, previous: PreviousStep[], viewHierarchy: string): string { - const viewHierarchyHash = crypto.createHash('md5').update(viewHierarchy).digest('hex'); - return JSON.stringify({step, previous, viewHierarchyHash}); + if (this.cacheMode === 'disabled') { + // Return a unique key that won't match any cached value + return crypto.randomUUID(); + } + + const cacheKeyData: any = {step, previous}; + + if (this.cacheMode === 'full') { + const viewHierarchyHash = crypto.createHash('md5').update(viewHierarchy).digest('hex'); + cacheKeyData.viewHierarchyHash = viewHierarchyHash; + } + + return JSON.stringify(cacheKeyData); } private async captureSnapshotAndViewHierarchy() { diff --git a/src/integration tests/index.test.ts b/src/integration tests/index.test.ts index 0901316..71b0832 100644 --- a/src/integration tests/index.test.ts +++ b/src/integration tests/index.test.ts @@ -356,4 +356,63 @@ describe('Copilot Integration Tests', () => { expect(spyStepPerformer).toHaveBeenCalledTimes(1); }); }); + + describe('Cache Modes', () => { + beforeEach(() => { + mockPromptHandler.runPrompt.mockResolvedValue('// No operation'); + }); + + it('should use full cache mode by default', async () => { + copilot.init({ + frameworkDriver: mockFrameworkDriver, + promptHandler: mockPromptHandler + }); + copilot.start(); + + await copilot.perform('Tap on the login button'); + copilot.end(); + + expect(Object.keys(mockedCacheFile || {})[0]).toContain('viewHierarchyHash'); + }); + + it('should not include view hierarchy in cache key when using lightweight mode', async () => { + copilot.init({ + frameworkDriver: mockFrameworkDriver, + promptHandler: mockPromptHandler, + options: { + cacheMode: 'lightweight' + } + }); + copilot.start(); + + await copilot.perform('Tap on the login button'); + copilot.end(); + + const cacheKeys = Object.keys(mockedCacheFile || {}); + expect(cacheKeys[0]).not.toContain('viewHierarchyHash'); + }); + + it('should not use cache when cache mode is disabled', async () => { + copilot.init({ + frameworkDriver: mockFrameworkDriver, + promptHandler: mockPromptHandler, + options: { + cacheMode: 'disabled' + } + }); + copilot.start(); + + // First call + await copilot.perform('Tap on the login button'); + copilot.end(); + + // Second call with same intent + copilot.start(); + await copilot.perform('Tap on the login button'); + copilot.end(); + + // Should call runPrompt twice since cache is disabled + expect(mockPromptHandler.runPrompt).toHaveBeenCalledTimes(2); + }); + }); }); diff --git a/src/types.ts b/src/types.ts index ebbd3c9..1747bf0 100644 --- a/src/types.ts +++ b/src/types.ts @@ -135,8 +135,31 @@ export interface PromptHandler { isSnapshotImageSupported: () => boolean; } +/** + * The cache mode for the Copilot. + * - 'full': Cache is used with the screen state (default) + * - 'lightweight': Cache is used but only based on steps (without screen state) + * - 'disabled': No caching is used + * @default 'full' + */ +export type CacheMode = 'full' | 'lightweight' | 'disabled'; + +/** + * Configuration options for the Copilot behavior. + */ +export interface CopilotOptions { + /** + * The cache mode to use. + * @default 'full' + */ + cacheMode?: CacheMode; +} + /** * Configuration options for Copilot. + * @property frameworkDriver The testing driver to use for interacting with the underlying testing framework. + * @property promptHandler The prompt handler to use for interacting with the AI service + * @property options Additional options for configuring Copilot behavior */ export interface Config { /** @@ -148,6 +171,11 @@ export interface Config { * The prompt handler to use for interacting with the AI service */ promptHandler: PromptHandler; + + /** + * Additional options for configuring Copilot behavior + */ + options?: CopilotOptions; } /** diff --git a/website/docs/API/basic-interface-overview.md b/website/docs/API/basic-interface-overview.md index 41c6bf2..4743f16 100644 --- a/website/docs/API/basic-interface-overview.md +++ b/website/docs/API/basic-interface-overview.md @@ -6,38 +6,239 @@ sidebar_position: 1 --- # Basic Interface Overview -The Copilot class serves as the core of the testing process, allowing seamless interaction between natural language prompts and your testing framework. Below is an overview of its main lifecycle commands that help control the test flow: -## 1. `init(config: Config): void` - The init method initializes the Copilot instance with the provided configuration. This must be called before using Copilot to ensure it is set up with the necessary framework drivers and prompt handlers. +The Testing Copilot provides a simple yet powerful interface for controlling your test flows. This document covers the core API methods and configuration options. + +## API Methods + +### init() + +```typescript +init(config: Config): void +``` + +Initializes the Copilot instance. Must be called before any other methods and only once in your test environment. + +:::note +This must be called once before using any other Copilot methods, as it sets up the instance and configuration. +::: + +Basic initialization example: +```typescript +import copilot from 'detox-copilot'; +import { DetoxDriver } from 'your-testing-framework-driver'; // Replace with your actual driver +import { OpenAIHandler } from 'your-ai-service-handler'; // Replace with your actual handler + +copilot.init({ + frameworkDriver: new DetoxDriver(), + promptHandler: new OpenAIHandler({ + apiKey: process.env.OPENAI_API_KEY + }) +}); +``` + +See [Configuration](#configuration) for more information on the `config` object. + +### isInitialized() + +```typescript +isInitialized(): boolean +``` + +Checks if the Copilot instance has been initialized. ```typescript -Copilot.init(config); +if (!copilot.isInitialized()) { + // Initialize copilot + copilot.init(config); +} ``` -## 2. `start(): void` - The start method begins a new test flow, resetting previous steps and clearing any temporary cache. It must be called before performing any steps in the test. +This is useful for ensuring that the Copilot is properly initialized before performing any actions. + +### start() +```typescript +start(): void +``` + +Begins a new test flow, resetting previous steps and clearing temporary cache. + +:::note +Must be called before performing any steps. If called while a flow is already active, it will throw an error. +::: + +Starting a new test flow: ```typescript copilot.start(); ``` -Note: Calling start after an active test flow has already been started will result in an error. Be sure to call end() before starting a new flow. -## 3. `performStep(step: string): Promise` - The performStep method allows Copilot to perform a test step based on a natural language prompt. The input step is parsed and evaluated by Copilot, interacting with the underlying framework to execute the corresponding action. +### perform() + +```typescript +perform(...steps: string[]): Promise +``` + +Executes one or more test steps using natural language. Returns the result of the last step. + +:::note +Requires an active test flow (initiated by `start()`), otherwise it will throw an error. +::: + +Single step example: +```typescript +// Perform a simple click action +const result = await copilot.perform("Click the login button"); +``` + +Multiple steps example: +```typescript +// Execute multiple steps in sequence +const result = await copilot.perform( + "Click the login button", + "Type 'user@example.com' into the email field", + "The login form should be visible" +); +``` + +### end() + +```typescript +end(isCacheDisabled?: boolean): void +``` + +Concludes the test flow and optionally disables caching of the results. +Ending with default cache behavior: ```typescript -const result = await copilot.performStep("Click the login button"); +// Save results to cache (default behavior) +copilot.end(); ``` -If Copilot is not running (i.e., start() has not been called), an error will be thrown. -## 4. `end(saveToCache: boolean = true): void` - The end method concludes the test flow. It can optionally save temporary data to the main cache, ensuring any relevant information is retained for future tests. +Ending with cache disabled: +```typescript +// Skip saving to cache +copilot.end(true); +``` + +Ending with cache disabled is usually done when a test is failing and you want to ensure that the next test run is not affected by the previous test's results. + +### extendAPICatalog() + +```typescript +extendAPICatalog(categories: TestingFrameworkAPICatalogCategory[], context?: any): void +``` + +Extends the API catalog with additional testing framework capabilities. + +```typescript +copilot.extendAPICatalog([ + { + title: 'Deeplink Actions', + items: [ + { + signature: 'navigateToDeeplink(url: string)', + description: 'Navigates to a given deeplink URL', + example: 'await navigateToDeeplink("/home");', + guidelines: [ + 'This action should be used to navigate to a specific screen in the app.', + 'The URL should be a relative path, starting with a forward slash.' + ] + } + ] + } +]); +``` + +This is useful for adding custom actions to the Copilot's API catalog, which can be used in natural language prompts. + +## Configuration + +### Config Interface + +The configuration interface defines how to set up Copilot with your testing framework and AI service: ```typescript -copilot.end(true); // Save to cache +interface Config { + frameworkDriver: TestingFrameworkDriver; + promptHandler: PromptHandler; + options?: CopilotOptions; +} + +interface CopilotOptions { + cacheMode?: 'full' | 'lightweight' | 'disabled'; +} ``` -Note: The end method should be called when the test flow is complete, and start() must be invoked again before starting a new test. -## Error Handling -If any method is called out of sequence, such as trying to perform steps without starting Copilot, or attempting to start Copilot while it is already running, the class will throw a CopilotError. This ensures that the test flow is controlled and prevents inconsistent states. +#### Cache Modes + +Cache mode is used to determine how the Copilot will cache the code generated for each step. +Default cache mode is `full`. + +- **full**: Cache is used with the screen state (default) +- **lightweight**: Cache is used but only based on steps (without screen state) +- **disabled**: No caching is used + +### Framework Drivers + +:::note Available Drivers +The `frameworkDriver` supports various testing frameworks, see [Framework Drivers](/docs/API/framework-driver) for more information. +::: + +Basic driver initialization: +```typescript +const driver = new DetoxDriver(); +``` + +### Prompt Handlers + +The `promptHandler` manages communication with AI services. + +Setting up OpenAI as the AI service: +```typescript +const handler = new OpenAIHandler({ + apiKey: process.env.OPENAI_API_KEY +}); +``` + +## API Basic Usage Notes + +- Always call methods in sequence: `init` → `start` → `perform` → `end` +- Handle errors appropriately using try-catch blocks +- Clean up resources by calling `end()` after each test flow +- Use multiple steps in a single `perform` call for related actions + +### Error Handling + +The Copilot will throw a `CopilotError` when: +- Methods are called out of sequence +- A flow is started while another is active +- Steps are performed without an active flow (e.g. `perform` without `start`) +- Configuration is invalid or missing required fields + +Complete flow with error handling: +```typescript +// Check initialization +if (!copilot.isInitialized()) { + copilot.init(config); +} + +// Start the flow +copilot.start(); + +try { + // Perform steps, if any error occurs, the flow will be ended and the error will be thrown + const result = await copilot.perform( + "Click the login button", + "Type 'test@example.com' into the email field", + "The login form should be visible" + ); +} catch (error) { + // Disable cache on error to avoid caching the failed flow + copilot.end(true); + throw error; +} + +// End the flow (with default cache behavior) +copilot.end(); +```