Automate GCP BigQuery Table View Creation

In this quick demo, we are going to use a Python Script to automate GCP BigQuery Table View Creation for all tables in a dataset in a different GCP project.

Before running the script you need to create Application Default Credentials to let Python script use them to authenticate.

The user account or service account you are going to use to create application default credentials should have write permissions on source dataset and destination dataset.

Following is the scrip that creates views for all tables in a dataset in a different project or in the same project with authorized view permissions.

import sys
from google.cloud import bigquery

def remove_and_create_view_datasets(bq_client, project, dataset):
        """Deletes and re-creates view dataset in destination project"""
        dataset_ref = bq_client.dataset(dataset)
        print('Deleting Dataset {} in Project {}.'.format(dataset_ref.dataset_id, project) + "\n")
        try:
                bq_client.delete_dataset(dataset_ref, delete_contents='true')
                print('Dataset {} deleted in Project {}.'.format(dataset_ref.dataset_id, project) + "\n")
        except Exception as e:
                print(e)
        dataset = bigquery.Dataset(dataset_ref)
        dataset = bq_client.create_dataset(dataset)
        print('Dataset {} created in Project {}.'.format(dataset.dataset_id, project) + "\n")

def list_tables_in_dataset(bq_client, dataset):
        """Lists Tables in a Dataset"""
        table_list = list(bq_client.list_tables(dataset))
        tables = []
        for table in table_list:
                tables.append(table.table_id)
        return(tables)

def create_view(bq_client, sproject, vproject, dataset, table):
        """Creates a View"""
        print("Creating a view in " + vproject + " for " + sproject + dataset + table + "\n" )
        query = "select * from `" + sproject + "." + dataset + "." + table + "`"
        dataset_ref = bq_client.dataset(dataset)
        table_ref = dataset_ref.table(table)
        table = bigquery.Table(table_ref)
        table.view_query = query
        table.view_query_legacy_sql = False
        table = bq_client.create_table(table)
        print('{} view created in Dataset {}'.format(table.table_id, dataset) + " in " + vproject + "\n")

def add_view_permissions(sbq_client, vbq_client, sproject, vproject, dataset):
        """Adds Auth View Permissions"""
        dataset_ref = sbq_client.get_dataset(sproject + "." + dataset)
        entries = list(dataset_ref.access_entries)
        tables = list_tables_in_dataset(vbq_client, dataset)
        for table in tables:
            entry = bigquery.AccessEntry(
                    None,
                    entity_type = 'view',
                    entity_id = { 'projectId': vproject,
                                  'datasetId': dataset,
                                  'tableId': table 
                                }
            )
            if entry not in dataset_ref.access_entries:
                entries.append(entry)
            else:
                print(entry)
                print('Permission already there for View {}.'.format(dataset_ref.dataset_id) + "\n")
        dataset_ref.access_entries = entries
        dataset = sbq_client.update_dataset(dataset_ref, ['access_entries'])
        print('Auth View Permission added for {}'.format(dataset.dataset_id) + " in " + sproject+ "\n")

def remove_view_permissions(bq_client, sproject, vproject, dataset):
        """Remove Stale View Permissions"""
        dataset_ref = bq_client.get_dataset(bq_client.dataset(dataset))
        newentries = []
        for entry in dataset_ref.access_entries:
                if "'projectId': '" + vproject + "', 'datasetId': '" + dataset + "'," not in str(entry):
                        newentries.append(entry)
        dataset_ref.access_entries = newentries
        dataset = bq_client.update_dataset(dataset_ref, ['access_entries'])
        print('Auth View Permissions Cleared for {}'.format(dataset.dataset_id) + " in " + sproject + "\n")

def dataset_views(sproject, vproject, dataset):
        """Creates views for all tables in a dataset"""
        source_bq_client = bigquery.Client(sproject)
        view_bq_client = bigquery.Client(vproject)
        remove_and_create_view_datasets(view_bq_client, vproject, dataset)
        remove_view_permissions(source_bq_client, sproject, vproject, dataset)
        tables = list_tables_in_dataset(source_bq_client, dataset)
        for table in tables:
                try:
                        create_view(view_bq_client, sproject, vproject, dataset, table)
                except Exception as e:
                        print(e)
        add_view_permissions(source_bq_client, view_bq_client, sproject, vproject, dataset)

def scripthelp():
        """Prints Help Info"""
        filename = os.path.basename(__file__)
        print("\nScript for creating BigQuery views\n")
        print("\nFor creating views for a dataset\n")
        print("\nUsage: " + filename + " <source_project_id> <view_project_id> <dataset>\n")

def main():
        if len(sys.argv) == 4:
                dataset_views(sys.argv[1], sys.argv[2], sys.argv[3])
        elif len(sys.argv) == 1 or sys.argv[1] == "-help" or sys.argv[1] == "help" or sys.argv[1] == "--help" or sys.argv[1] == "-h":
                scripthelp()
        else:
                scripthelp()

if __name__ == "__main__":
    main()

Following is the help info of the script.

sudheerv@cloudshell:~/views$ python create_view.py -h
Script for creating BigQuery views
For creating views for a dataset

Usage: create_view.py <source_project_id> <view_project_id> <dataset>

It needs source project destination project and dataset name as arguments.

When you run it will run views for all tables in a dataset in destination project. Before creating views, it will create a dataset with same name to create views in them. When you re-run the script it will delete and re-create destination dataset.

To test this script we are going to create views for sales dataset in devops-counsel-demo project.

source dataset
script output

Script has created a dataset called sales and created view in sales dataset in devops-gcp-training-001 project.

view details

Script also has created authorized view permissions for fruits views, which is very important for allowing users, who have access only in view project.

authorized view premissions

In the above demo we have successfully used a Python Script to automate GCP BigQuery Table View Creation for all tables in a dataset in a different GCP project.

you can find the above code in this repository.

You can find out more information on BigQuery views here.

More on BigQuery on DevOps Counsel:

Pseudonymization of BigQuery Table Data

GCP BigQuery Python Code Samples

GCP BigQuery Dynamic Data Masking

Leave a Reply

%d