Update confluence collator documentation & fix edge case (#2463)

* update confluence config & docs

Signed-off-by: Alex McKay <amckay@spotify.com>

* add unit tests for collator factory

Signed-off-by: Alex McKay <amckay@spotify.com>

* make cql docs reference a hyperlink

Signed-off-by: Alex McKay <amckay@spotify.com>

---------

Signed-off-by: Alex McKay <amckay@spotify.com>
This commit is contained in:
Alex McKay 2025-01-10 13:05:50 -05:00 committed by GitHub
parent 6bf6fe4d21
commit 5431e37f9d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 210 additions and 12 deletions

View File

@ -0,0 +1,5 @@
---
'@backstage-community/plugin-search-backend-module-confluence-collator': patch
---
Update config with links to confluence docs, and ensure omitting the optional 'spaces' & 'query' config sections returns all results (instead of throwing an error)

View File

@ -61,11 +61,14 @@ There is some configuration that needs to be setup to use this action, these are
confluence:
baseUrl: 'http://confluence.example.com'
auth:
type: 'bearer' # can also be 'basic' or 'userpass'
token: '${CONFLUENCE_TOKEN}'
spaces: [] # Warning, it is highly recommended to safely list the spaces that you want to index, either all documents will be indexed.
query: '' # If your spaces contain documents you don't want to index, you can use a CQL query to more precisely select them. This is combined with the spaces parameter above
spaces: [] # It is highly recommended to safely list the spaces that you want to index, otherwise all spaces will be indexed.
query: '' # If your spaces contain documents you don't want to index, you can use a CQL query to more precisely select them. This is combined with the spaces parameter above.
```
Documentation about CQL can be found [here](https://developer.atlassian.com/server/confluence/advanced-searching-using-cql)
The sections below will go into more details about the Base URL and Auth Methods.
#### Base URL
@ -110,7 +113,7 @@ confluence:
password: 'your-password'
```
**Note:** For `basic` and `bearer` authorization methods you will need an access token for authorization with `Read` permissions. You can create a Personal Access Token (PAT) in Confluence. The value used should be the raw token as it will be encoded for you by the action.
**Note:** For `basic` and `bearer` authorization methods you will need an access token for authorization with `Read` permissions. You can create a Personal Access Token (PAT) [in Confluence](https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/). The value used should be the raw token as it will be encoded for you by the action.
#### Search Schedule

View File

@ -19,11 +19,11 @@ export interface Config {
search?: {
collators?: {
/**
* Configuration options for `@backstage/plugin-search-backend-module-techdocs`
* Configuration options for `@backstage/plugin-search-backend-module-confluence-collator`
*/
confluence?: {
/**
* The schedule for how often to run the collation job.
* The schedule for how often to run the collation job for Confluence.
*/
schedule?: SchedulerServiceTaskScheduleDefinitionConfig;
};
@ -40,27 +40,29 @@ export interface Config {
*/
auth: {
/**
* Authentication method - basic, userpass
* Authentication method - basic, bearer, or userpass
*/
type: 'basic' | 'bearer' | 'userpass';
/**
* Confluence bearer authentication token
* Confluence bearer authentication token with `Read` permissions, only required if type is set to 'basic' or 'bearer'.
* Reference the Confluence documentation to generate an API token:
* https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/
* @visibility secret
*/
token?: string;
/**
* Email used with the token for the basic auth method
* Email associated with the token, only required if type is set to 'basic'.
* @visibility secret
*/
email?: string;
/**
* Confluence basic authentication username.
* Confluence basic authentication username, only required if type is set to 'userpass'.
* While Confluence supports BASIC authentication, using an API token is preferred.
* See: https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/
*/
username?: string;
/**
* Confluence basic authentication password.
* Confluence basic authentication password, only required if type is set to 'userpass'.
* While Confluence supports BASIC authentication, using an API token is preferred.
* See: https://support.atlassian.com/atlassian-account/docs/manage-api-tokens-for-your-atlassian-account/
* @visibility secret
@ -68,11 +70,14 @@ export interface Config {
password?: string;
};
/**
* Spaces to index
* Array of Confluence spaces to index. If omitted, all spaces will be indexed.
* See: https://support.atlassian.com/confluence-cloud/docs/use-spaces-to-organize-your-work/
*/
spaces?: string[];
/**
* CQL query to select the pages to index. It is combined with spaces parameter above when finding documents.
* CQL query to select the pages to index. It is combined via 'AND' with spaces parameter above when finding documents.
* Reference the Confluence documentation for information about CQL syntax:
* https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/
*/
query?: string;
/**

View File

@ -0,0 +1,177 @@
/*
* Copyright 2025 The Backstage Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import {
ConfluenceCollatorFactory,
ConfluenceCollatorFactoryOptions,
} from './ConfluenceCollatorFactory';
import {
mockServices,
registerMswTestHooks,
} from '@backstage/backend-test-utils';
import { TestPipeline } from '@backstage/plugin-search-backend-node';
import { ConfigReader } from '@backstage/config';
import { setupServer } from 'msw/node';
import { rest, RestRequest } from 'msw';
const logger = mockServices.logger.mock();
const BASE_URL = 'http://confluence.example.com';
const CONFLUENCE_API_PATH =
'/rest/api/content/search?limit=1000&status=current&cql=';
const testSearchQuery = (
request: RestRequest | undefined,
expectedSearch: unknown,
) => {
if (!request) {
expect(request).not.toBeFalsy();
return;
}
const executedSearch: { [key: string]: string } = {};
request.url.searchParams.forEach((value: string, key: string) => {
executedSearch[key] = value;
});
expect(executedSearch).toEqual(expectedSearch);
};
describe('ConfluenceCollatorFactory', () => {
const config = new ConfigReader({
confluence: {
baseUrl: BASE_URL,
auth: {
type: 'basic',
token: 'AA',
email: 'user@example.com',
},
},
});
const options: ConfluenceCollatorFactoryOptions = {
logger,
};
const factory = ConfluenceCollatorFactory.fromConfig(config, options);
const worker = setupServer();
registerMswTestHooks(worker);
it('has expected collator factory type', () => {
expect(factory.type).toBe('confluence');
});
it('throws if auth fields are missing based on provided type', () => {
// missing email
const malformedBasicAuthConfig = new ConfigReader({
confluence: {
baseUrl: BASE_URL,
auth: {
type: 'basic',
token: 'AA',
},
},
});
expect(() =>
ConfluenceCollatorFactory.fromConfig(malformedBasicAuthConfig, options),
).toThrow();
// missing password
const malformedUserpassAuthConfig = new ConfigReader({
confluence: {
baseUrl: BASE_URL,
auth: {
type: 'userpass',
username: 'user',
},
},
});
expect(() =>
ConfluenceCollatorFactory.fromConfig(
malformedUserpassAuthConfig,
options,
),
).toThrow();
// missing token
const malformedBearerAuthConfig = new ConfigReader({
confluence: {
baseUrl: BASE_URL,
auth: {
type: 'bearer',
},
},
});
expect(() =>
ConfluenceCollatorFactory.fromConfig(malformedBearerAuthConfig, options),
).toThrow();
});
it('uses default CQL query when `spaces` & `query` are both omitted from config', async () => {
let request;
worker.use(
rest.get(BASE_URL + CONFLUENCE_API_PATH, (req, res, ctx) => {
request = req;
return res(ctx.status(200), ctx.json({}));
}),
);
const collator = await factory.getCollator();
const pipeline = TestPipeline.fromCollator(collator);
await pipeline.execute();
const expectedSearch = {
limit: '1000',
status: 'current',
cql: 'type IN (page, blogpost, comment, attachment)',
};
testSearchQuery(request, expectedSearch);
});
it('combines values from `spaces` & `query` when both are present in config', async () => {
const configWithQuery = new ConfigReader({
confluence: {
baseUrl: BASE_URL,
auth: {
type: 'basic',
token: 'AA',
email: 'user@example.com',
},
spaces: ['SPACE1', 'SPACE2'],
query: 'type = page',
},
});
const factoryWithConfig = ConfluenceCollatorFactory.fromConfig(
configWithQuery,
options,
);
let request;
worker.use(
rest.get(BASE_URL + CONFLUENCE_API_PATH, (req, res, ctx) => {
request = req;
return res(ctx.status(200), ctx.json({}));
}),
);
const collator = await factoryWithConfig.getCollator();
const pipeline = TestPipeline.fromCollator(collator);
await pipeline.execute();
const expectedSearch = {
limit: '1000',
status: 'current',
cql: '(space="SPACE1" or space="SPACE2") and (type = page)',
};
testSearchQuery(request, expectedSearch);
});
});

View File

@ -254,6 +254,14 @@ export class ConfluenceCollatorFactory implements DocumentCollatorFactory {
if (additionalQuery !== '') {
query = `(${spaceQuery}) and (${additionalQuery})`;
}
// If no query is provided, default to fetching all pages, blogposts, comments and attachments (which encompasses all content)
// https://developer.atlassian.com/server/confluence/advanced-searching-using-cql/#type
if (query === '') {
this.logger.info(
`No confluence query nor spaces provided via config, so will index all pages, blogposts, comments and attachments`,
);
query = 'type IN (page, blogpost, comment, attachment)';
}
return query;
}