Skip to content

Find Duplicate

Find documents that are duplicates of a given document using AI-powered semantic understanding.

Endpoint

POST /v20241104/find-duplicate

API Request

  • Required Parameters


    document_id string required

    ID of the document to find duplicate documents for.

    MinLength: 1   MaxLength: 128


    Optional Parameters


    filter object

    JSON object specifying the filter criteria.


    language string (enum)

    Language code (of a supported language) that indicates the language of the documents to search.

    Default is en.

    Info
    • Ignored if multilingual_search is true.

    limit integer

    Limit on the maximum number of results to return per request. Default is 10.

    Additional results can be fetched if more are available.

    Min: 1   Max: 100


    page integer

    A cursor used for pagination when there are multiple pages of results. Default is 1.

    Info
    • Don't include this parameter in the initial API request.
    • Set this to the next_page value from the previous API response to fetch the subsequent page of results.

    Min: 1


    sort array of objects

    An array of JSON objects specifying sort criteria for the search results.

    By default, search results are sorted in descending order of relevance score _score. This parameter allows you to override that behavior.


    tenant_id array of strings

    Array specifying one or more Tenant IDs.

    Info
    • Only documents with a tenant_id matching one of the values in the array will be considered.

    • Example (single tenant ID):

      "tenant_id": ["tenant123"],
      

    • Example (multiple tenant IDs):

      "tenant_id": ["tenant123", "tenant124"],
      

    • For each tenant ID string:

      MinLength: 1   MaxLength: 250

    MinLength: 1   MaxLength: 10

  • POST /v20241104/find-duplicate


    curl -X POST "https://api.gainly.ai/v20241104/find-duplicate" \
      -H "Content-Type: application/json" \
      -H "X-API-Key: YOUR_API_KEY_HERE" \  # (1)!
      -d '{
        "document_id": "mQfrepQBP4_EawzZWKQt"
      }'
    
    1. Replace YOUR_API_KEY_HERE with the value of your API key.
    # Prompt for AI coding assistants/IDEs (e.g., ChatGPT, Claude, GitHub Copilot, Cursor, Windsurf)
    
    Using the Gainly API:
    2. Write code to call the find_duplicate operation (see OpenAPI spec: https://api.gainly.ai/v20241104/openapi.json)
    3. Implement authentication using the header "X-API-Key" as described in the docs: https://docs.gainly.ai/latest/api-reference/authentication/
    4. Implement rate limit handling as described in the docs: https://docs.gainly.ai/latest/api-reference/rate-limits/
    5. Implement error handling
    6. Handle the response according to the FindDuplicateResults schema in the OpenAPI spec
    7. Implement pagination to handle multiple pages of results as described in the docs: https://docs.gainly.ai/latest/docs/pagination/
    
    using System.Net.Http;
    using System.Text.Json;
    using System.Text;
    
    var client = new HttpClient();
    
    var url = "https://api.gainly.ai/v20241104/find-duplicate";
    
    var payload = new {
        document_id = "mQfrepQBP4_EawzZWKQt"
    };
    
    var content = new StringContent(
        JsonSerializer.Serialize(payload),
        Encoding.UTF8,
        "application/json"
    );
    
    client.DefaultRequestHeaders.Add("X-API-Key", "YOUR_API_KEY_HERE"); // (1)!
    
    var response = await client.PostAsync(url, content);
    var result = await response.Content.ReadAsStringAsync();
    Console.WriteLine(result);
    
    1. Replace YOUR_API_KEY_HERE with the value of your API key.
    package main
    
    import (
        "bytes"
        "encoding/json"
        "fmt"
        "net/http"
    )
    
    func main() {
        url := "https://api.gainly.ai/v20241104/find-duplicate"
    
        payload := map[string]interface{}{
            "document_id": "mQfrepQBP4_EawzZWKQt",
        }
    
        jsonData, _ := json.Marshal(payload)
    
        req, _ := http.NewRequest("POST", url, bytes.NewBuffer(jsonData))
        req.Header.Set("Content-Type", "application/json")
        req.Header.Set("X-API-Key", "YOUR_API_KEY_HERE") // (1)!
    
        resp, _ := http.DefaultClient.Do(req)
        defer resp.Body.Close()
    
        var result map[string]interface{}
        json.NewDecoder(resp.Body).Decode(&result)
        fmt.Println(result)
    }
    
    1. Replace YOUR_API_KEY_HERE with the value of your API key.
    import java.net.http.HttpClient;
    import java.net.http.HttpRequest;
    import java.net.http.HttpResponse;
    import java.net.URI;
    
    var client = HttpClient.newHttpClient();
    
    var url = "https://api.gainly.ai/v20241104/find-duplicate";
    
    var payload = """
        {
            "document_id": "mQfrepQBP4_EawzZWKQt"
        }
        """;
    
    var request = HttpRequest.newBuilder()
        .uri(URI.create(url))
        .header("Content-Type", "application/json")
        .header("X-API-Key", "YOUR_API_KEY_HERE") // (1)!
        .POST(HttpRequest.BodyPublishers.ofString(payload))
        .build();
    
    var response = client.send(request, HttpResponse.BodyHandlers.ofString());
    System.out.println(response.body());
    
    1. Replace YOUR_API_KEY_HERE with the value of your API key.
    const axios = require('axios');  // or: import axios from 'axios';
    
    const url = 'https://api.gainly.ai/v20241104/find-duplicate';
    
    const payload = {
        document_id: 'mQfrepQBP4_EawzZWKQt'
    };
    
    const headers = {
        'Content-Type': 'application/json',
        'X-API-Key': 'YOUR_API_KEY_HERE' // (1)!
    };
    
    axios.post(url, payload, { headers })
        .then(response => console.log(response.data))
        .catch(error => console.error('Error:', error.message));
    
    1. Replace YOUR_API_KEY_HERE with the value of your API key.
    <?php
    
    $client = new \GuzzleHttp\Client();
    
    $url = 'https://api.gainly.ai/v20241104/find-duplicate';
    
    $payload = [
        'document_id' => 'mQfrepQBP4_EawzZWKQt'
    ];
    
    $response = $client->request('POST', $url, [
        'json' => $payload,
        'headers' => [
            'Content-Type' => 'application/json',
            'X-API-Key' => 'YOUR_API_KEY_HERE' # (1)!
        ],
    ]);
    
    echo $response->getBody();
    
    1. Replace YOUR_API_KEY_HERE with the value of your API key.
    import requests
    
    url = "https://api.gainly.ai/v20241104/find-duplicate"
    
    payload = {
        "document_id": "mQfrepQBP4_EawzZWKQt"
    }
    
    headers = {
        "Content-Type": "application/json",
        "X-API-Key": "YOUR_API_KEY_HERE" # (1)!
    }
    
    response = requests.post(url, json=payload, headers=headers)
    data = response.json()
    print(data)
    
    1. Replace YOUR_API_KEY_HERE with the value of your API key.
    require 'json'
    require 'uri'
    require 'net/http'
    require 'openssl'
    
    url = URI('https://api.gainly.ai/v20241104/find-duplicate')
    
    http = Net::HTTP.new(url.host, url.port)
    http.use_ssl = true
    
    request = Net::HTTP::Post.new(url)
    request['Content-Type'] = 'application/json'
    request['X-API-Key'] = 'YOUR_API_KEY_HERE' # (1)!
    request.body = {
        document_id: 'mQfrepQBP4_EawzZWKQt'
    }.to_json
    
    response = http.request(request)
    puts response.read_body
    
    1. Replace YOUR_API_KEY_HERE with the value of your API key.

API Response

{
    "object": "find_duplicate_result",
    "url": "/v20241104/find-duplicate",
    "data": [
        {
            "id": "11ZVdpQBQ1TpH-jr8AR-",
            "title": "The Wildebeest’s Role in the Tourism Industry",
            "confidence_level": "very_high",
            "source_uri": "/doc/wildebeest-tourism-20250117",
            "metadata": null,
            "tenant_id": null,
            "language": "en",
            "created_at": "2025-01-17T22:15:30.933737Z",
            "updated_at": "2025-01-17T22:15:30.933737Z"
        }
    ],
    "document_id": "mQfrepQBP4_EawzZWKQt",
    "tenant_id": null,
    "filter": null,
    "sort": null,
    "language": "en",
    "limit": 10,
    "total_number_of_results": 10,
    "has_more": false,
    "next_page": null,
    "page": 1,
    "token_usage": {
        "semantic_tokens": 0,
        "llm_tokens": {
            "llm_output_tokens": 0,
            "llm_input_tokens": 0,
            "model": null
        }
    },
    "livemode": false
}

Confidence Level

confidence_level represents Gainly's assessment of how likely a document is a duplicate of the source document.

It will have one of the following values:

  • very_high
  • high
  • medium
  • low
  • not_available

Pagination

Please see pagination for details on navigating multiple pages of results.