Backup Cloudant database to IBM Cloud Object Storage with IBM Cloud Function
Traditional backup approach of database is done via storage (folder where data is kept). This approach is straight forward for database single instance or active-passive. IBM Cloudant is built using a master-master (also known as, “master-less”) clustering framework that can span multiple racks, data centers, cloud providers, or devices. This gives a challenge for operation team to perform backup in manageable and cost effective way.
IBM Cloudant provides a supported tool for snapshot backup and restore — couchbackup. Couchbackup is a node.js library, either you can run it as command line or embedded as library (via npm) in node.js application. As backup is perform in periodical basis, therefore it is good choice to leverage on IBM Cloud Function to execute it.
There is sample script to use couchbackup library to perform backup, I am going to do a slight modification to make it run in IBM Cloud Function.
The sample script looks like below:
/*
Backup directly from Cloudant to an S3 bucket via a stream.
@param {string} couchHost - URL of database root
@param {string} couchDatabase - backup source database
@param {object} s3Client - S3 client object
@param {string} s3Bucket - Destination S3 bucket (must exist)
@param {string} s3Key - Destination object's key (shouldn't exist)
@param {boolean} shallow - Whether to use couchbackup's shallow mode
@returns {Promise}
*/
function backupToS3(sourceUrl, s3Client, s3Bucket, s3Key, shallow) {
return new Promise((resolve, reject) => {
debug('Setting up S3 upload to ${s3Bucket}/${s3Key}');// A pass through stream that has couchbackup's output
// written to it and it then read by the S3 upload client.
// It has a 10MB internal buffer.
const streamToUpload = new stream.PassThrough({highWaterMark: 10485760});// Set up S3 upload.
const params = {
Bucket: s3Bucket,
Key: s3Key,
Body: streamToUpload
};
s3Client.upload(params, function(err, data) {
debug('S3 upload done');
if (err) {
debug(err);
reject(new Error('S3 upload failed'));
return;
}
debug('S3 upload succeeded');
debug(data);
resolve();
}).httpUploadProgress = (progress) => {
debug('S3 upload progress: ${progress}');
};debug('Starting streaming data from ${sourceUrl}');
couchbackup.backup(
sourceUrl,
streamToUpload,
(err, obj) => {
if (err) {
debug(err);
reject(new Error('CouchBackup failed with an error'));
return;
}
debug('Download from ${sourceUrl} complete.');
streamToUpload.end(); // must call end() to complete S3 upload.
// resolve() is called by the S3 upload
}
);
});
}
IBM Cloud Function have some libraries pre-installed in the Node.js. However it is not sufficient to complete this package. Therefore I am going to use another approach — Packaging an action as a Nodejs module.
Consider a directory with the following files:
- Save the following code in a file named
package.json
.
{
"name": "couchbackup",
"main": "app.js",
"version": "0.0.1",
"dependencies": {
"@cloudant/couchbackup": "^2.3.1",
"ibm-cos-sdk": "^1.1.4",
"moment": "^2.23.0",
"promise": "^8.0.2",
"stream": "0.0.2"
},
"engines": {
"node": "10.x"
}
}
2. Save the following code in a file named app.js
.
const AWS = require('ibm-cos-sdk');
var stream = require('stream');
const couchbackup = require('@cloudant/couchbackup');
const moment = require('moment');function couchBackupAction(params) {const bucket = params.bucket;
const i = params.key.lastIndexOf('.');
const key = (i < 0) ? params.key + '-' + moment().format('YYYYMMDD-hhmmss') : params.key.substring(0, i) + '-' + moment().format('YYYYMMDD-hhmmss') + params.key.substring(i);
const cloudant_url = params.cloudant_url;
const config = params.config;
const s3 = new AWS.S3(config);return new Promise((resolve, reject) => {console.log('Setting up S3 upload to ' + bucket + '/' + key);// A pass through stream that has couchbackup's output
// written to it and it then read by the S3 upload client.
// It has a 10MB internal buffer.
const streamToUpload = new stream.PassThrough({
highWaterMark: 10485760
});// Set up S3 upload.
const params = {
Bucket: bucket,
Key: key,
Body: streamToUpload
};
s3.upload(params, function (err, data) {
console.log('S3 upload done');
if (err) {
console.log(err);
reject(new Error('S3 upload failed'));
return;
}
console.log('S3 upload succeeded');
console.log(data);
resolve();
}).httpUploadProgress = (progress) => {
console.log('S3 upload progress: ' + progress);
};console.log('Starting streaming data from ' + cloudant_url);
couchbackup.backup(
cloudant_url,
streamToUpload,
(err, obj) => {
if (err) {
console.log(err);
reject(new Error('CouchBackup failed with an error'));
return;
}
console.log('Download from ' + cloudant_url + ' complete.');
streamToUpload.end(); // must call end() to complete S3 upload.
// resolve() is called by the S3 upload
}
);
});
}exports.main = couchBackupAction;
3. Install all dependencies locally.
npm install
4. Create a .zip
archive containing all files, including all dependencies.
zip -r action.zip *
5. Create the action, I named it packageCouchbackup
When you create an action from a .zip
archive, you must set a value for the --kind
parameter to specify your Node.js runtime version, in this case I am using nodejs:10
.
ibmcloud fn action update packageCouchbackup action.zip --kind nodejs:10
6. The newly created action will be appeared in IBM Cloud Function dashboard.
This action is ready for testing. Before that, we need some information to pass to action as parameters in JSON format.
{
"bucket": <BUCKET_NAME>,
"key": <KEY>,
"cloudant_url": <CLOUDANT_DB_URL>,
"config": {
"endpoint": <IBM_COS_ENDPOINT>,
"apiKeyId": <IBM_COS_APIKEY>,
"ibmAuthEndpoint": "https://iam.ng.bluemix.net/oidc/token",
"serviceInstanceId": <RESOURCE_INSTANCE_ID>
}
}
IBM Cloud Object Storage API key and Resource Instance ID can be found at IBM Cloud Object Storage Dashboard — Service Credentials section and Endpoint section.
Please take note that to get the correct endpoint based on the location of bucket, upon bucket creation.
In this case I have a bucket ongkw-cloudant-backup
. Now we can perform our test at IBM Cloud Function dashboard.
- Click on
Change Input
to enter the parameters in JSON format.
2. Click Invoke to test the action.
3. The activations is successfully being executed in less than 2 seconds.
4. IBM Cloud Object Storage bucket have a newly created backup file from Cloudant database.
There are further enhancement can be done such as enabling the web action in IBM Cloud Function, or using IBM Workload Schedule to invoke this action in schedule manner.
Full source code can be found at https://github.com/ongkhaiwei/couchbackupfn