feat: Publish new bigtable APIs for types and aggregates

Bigtable aggregates will allow users to configure column families whose cells accumulate values via an aggregation function rather than simply overwrite them PiperOrigin-RevId: 613716423
googleapis · Mar 7, 2024 · 66fc31d · 66fc31d
1 parent 8e2fbae
commit 66fc31d
Show file tree

Hide file tree

Showing 5 changed files with 221 additions and 1 deletion.
diff --git a/google/bigtable/admin/v2/BUILD.bazel b/google/bigtable/admin/v2/BUILD.bazel
@@ -46,6 +46,7 @@ proto_library(
  "common.proto",
  "instance.proto",
  "table.proto",
+ "types.proto",
  ],
  deps = [
  "//google/api:annotations_proto",

diff --git a/google/bigtable/admin/v2/bigtable_table_admin.proto b/google/bigtable/admin/v2/bigtable_table_admin.proto
@@ -771,6 +771,12 @@ message ModifyColumnFamiliesRequest {
  // family exists.
  bool drop = 4;
  }
+
+ // Optional. A mask specifying which fields (e.g. `gc_rule`) in the `update`
+ // mod should be updated, ignored for other modification types. If unset or
+ // empty, we treat it as updating `gc_rule` to be backward compatible.
+ google.protobuf.FieldMask update_mask = 6
+ [(google.api.field_behavior) = OPTIONAL];
  }
 
  // Required. The unique name of the table whose families should be modified.

diff --git a/google/bigtable/admin/v2/table.proto b/google/bigtable/admin/v2/table.proto
@@ -18,6 +18,7 @@ package google.bigtable.admin.v2;
 
 import "google/api/field_behavior.proto";
 import "google/api/resource.proto";
+import "google/bigtable/admin/v2/types.proto";
 import "google/protobuf/duration.proto";
 import "google/protobuf/timestamp.proto";
 import "google/rpc/status.proto";
@@ -268,6 +269,18 @@ message ColumnFamily {
  // so it's possible for reads to return a cell even if it matches the active
  // GC expression for its family.
  GcRule gc_rule = 1;
+
+ // The type of data stored in each of this family's cell values, including its
+ // full encoding. If omitted, the family only serves raw untyped bytes.
+ //
+ // For now, only the `Aggregate` type is supported.
+ //
+ // `Aggregate` can only be set at family creation and is immutable afterwards.
+ //
+ //
+ // If `value_type` is `Aggregate`, written data must be compatible with:
+ // * `value_type.input_type` for `AddInput` mutations
+ Type value_type = 3;
 }
 
 // Rule for determining which cells to delete during garbage collection.

diff --git a/google/bigtable/admin/v2/types.proto b/google/bigtable/admin/v2/types.proto
@@ -0,0 +1,149 @@
+// Copyright 2023 Google LLC
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+syntax = "proto3";
+
+package google.bigtable.admin.v2;
+
+import "google/api/field_behavior.proto";
+
+option csharp_namespace = "Google.Cloud.Bigtable.Admin.V2";
+option go_package = "google.golang.org/genproto/googleapis/bigtable/admin/v2;admin";
+option java_multiple_files = true;
+option java_outer_classname = "TypesProto";
+option java_package = "com.google.bigtable.admin.v2";
+option php_namespace = "Google\\Cloud\\Bigtable\\Admin\\V2";
+option ruby_package = "Google::Cloud::Bigtable::Admin::V2";
+
+// `Type` represents the type of data that is written to, read from, or stored
+// in Bigtable. It is heavily based on the GoogleSQL standard to help maintain
+// familiarity and consistency across products and features.
+//
+// For compatibility with Bigtable's existing untyped APIs, each `Type` includes
+// an `Encoding` which describes how to convert to/from the underlying data.
+// This might involve composing a series of steps into an "encoding chain," for
+// example to convert from INT64 -> STRING -> raw bytes. In most cases, a "link"
+// in the encoding chain will be based an on existing GoogleSQL conversion
+// function like `CAST`.
+//
+// Each link in the encoding chain also defines the following properties:
+// * Natural sort: Does the encoded value sort consistently with the original
+// typed value? Note that Bigtable will always sort data based on the raw
+// encoded value, *not* the decoded type.
+// - Example: STRING values sort in the same order as their UTF-8 encodings.
+// - Counterexample: Encoding INT64 to a fixed-width STRING does *not*
+// preserve sort order when dealing with negative numbers.
+// INT64(1) > INT64(-1), but STRING("-00001") > STRING("00001).
+// - The overall encoding chain sorts naturally if *every* link does.
+// * Self-delimiting: If we concatenate two encoded values, can we always tell
+// where the first one ends and the second one begins?
+// - Example: If we encode INT64s to fixed-width STRINGs, the first value
+// will always contain exactly N digits, possibly preceded by a sign.
+// - Counterexample: If we concatenate two UTF-8 encoded STRINGs, we have
+// no way to tell where the first one ends.
+// - The overall encoding chain is self-delimiting if *any* link is.
+// * Compatibility: Which other systems have matching encoding schemes? For
+// example, does this encoding have a GoogleSQL equivalent? HBase? Java?
+message Type {
+ // Bytes
+ // Values of type `Bytes` are stored in `Value.bytes_value`.
+ message Bytes {
+ // Rules used to convert to/from lower level types.
+ message Encoding {
+ // Leaves the value "as-is"
+ // * Natural sort? Yes
+ // * Self-delimiting? No
+ // * Compatibility? N/A
+ message Raw {}
+
+ // Which encoding to use.
+ oneof encoding {
+ // Use `Raw` encoding.
+ Raw raw = 1;
+ }
+ }
+
+ // The encoding to use when converting to/from lower level types.
+ Encoding encoding = 1;
+ }
+
+ // Int64
+ // Values of type `Int64` are stored in `Value.int_value`.
+ message Int64 {
+ // Rules used to convert to/from lower level types.
+ message Encoding {
+ // Encodes the value as an 8-byte big endian twos complement `Bytes`
+ // value.
+ // * Natural sort? No (positive values only)
+ // * Self-delimiting? Yes
+ // * Compatibility?
+ // - BigQuery Federation `BINARY` encoding
+ // - HBase `Bytes.toBytes`
+ // - Java `ByteBuffer.putLong()` with `ByteOrder.BIG_ENDIAN`
+ message BigEndianBytes {
+ // The underlying `Bytes` type, which may be able to encode further.
+ Bytes bytes_type = 1;
+ }
+
+ // Which encoding to use.
+ oneof encoding {
+ // Use `BigEndianBytes` encoding.
+ BigEndianBytes big_endian_bytes = 1;
+ }
+ }
+
+ // The encoding to use when converting to/from lower level types.
+ Encoding encoding = 1;
+ }
+
+ // A value that combines incremental updates into a summarized value.
+ //
+ // Data is never directly written or read using type `Aggregate`. Writes will
+ // provide either the `input_type` or `state_type`, and reads will always
+ // return the `state_type` .
+ message Aggregate {
+ // Computes the sum of the input values.
+ // Allowed input: `Int64`
+ // State: same as input
+ message Sum {}
+
+ // Type of the inputs that are accumulated by this `Aggregate`, which must
+ // specify a full encoding.
+ // Use `AddInput` mutations to accumulate new inputs.
+ Type input_type = 1;
+
+ // Output only. Type that holds the internal accumulator state for the
+ // `Aggregate`. This is a function of the `input_type` and `aggregator`
+ // chosen, and will always specify a full encoding.
+ Type state_type = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
+
+ // Which aggregator function to use. The configured types must match.
+ oneof aggregator {
+ // Sum aggregator.
+ Sum sum = 4;
+ }
+ }
+
+ // The kind of type that this represents.
+ oneof kind {
+ // Bytes
+ Bytes bytes_type = 1;
+
+ // Int64
+ Int64 int64_type = 5;
+
+ // Aggregate
+ Aggregate aggregate_type = 6;
+ }
+}
diff --git a/google/bigtable/v2/data.proto b/google/bigtable/v2/data.proto
@@ -1,4 +1,4 @@
-// Copyright 2022 Google LLC
+// Copyright 2023 Google LLC
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
@@ -16,6 +16,8 @@ syntax = "proto3";
 
 package google.bigtable.v2;
 
+import "google/api/field_behavior.proto";
+
 option csharp_namespace = "Google.Cloud.Bigtable.V2";
 option go_package = "google.golang.org/genproto/googleapis/bigtable/v2;bigtable";
 option java_multiple_files = true;
@@ -85,6 +87,32 @@ message Cell {
  repeated string labels = 3;
 }
 
+// `Value` represents a dynamically typed value.
+// The typed fields in `Value` are used as a transport encoding for the actual
+// value (which may be of a more complex type). See the documentation of the
+// `Type` message for more details.
+message Value {
+ // Options for transporting values within the protobuf type system. A given
+ // `kind` may support more than one `type` and vice versa. On write, this is
+ // roughly analogous to a GoogleSQL literal.
+ //
+ // The value is `NULL` if none of the fields in `kind` is set. If `type` is
+ // also omitted on write, we will infer it based on the schema.
+ oneof kind {
+ // Represents a raw byte sequence with no type information.
+ // The `type` field must be omitted.
+ bytes raw_value = 8;
+
+ // Represents a raw cell timestamp with no type information.
+ // The `type` field must be omitted.
+ int64 raw_timestamp_micros = 9;
+
+ // Represents a typed value transported as an integer.
+ // Default type for writes: `Int64`
+ int64 int_value = 6;
+ }
+}
+
 // Specifies a contiguous range of rows.
 message RowRange {
  // The row key at which to start the range.
@@ -463,6 +491,26 @@ message Mutation {
  bytes value = 4;
  }
 
+ // A Mutation which incrementally updates a cell in an `Aggregate` family.
+ message AddToCell {
+ // The name of the `Aggregate` family into which new data should be added.
+ // This must be a family with a `value_type` of `Aggregate`.
+ // Format: `[-_.a-zA-Z0-9]+`
+ string family_name = 1;
+
+ // The qualifier of the column into which new data should be added. This
+ // must be a `raw_value`.
+ Value column_qualifier = 2;
+
+ // The timestamp of the cell to which new data should be added. This must
+ // be a `raw_timestamp_micros` that matches the table's `granularity`.
+ Value timestamp = 3;
+
+ // The input value to be accumulated into the specified cell. This must be
+ // compatible with the family's `value_type.input_type`.
+ Value input = 4;
+ }
+
  // A Mutation which deletes cells from the specified column, optionally
  // restricting the deletions to a given timestamp range.
  message DeleteFromColumn {
@@ -493,6 +541,9 @@ message Mutation {
  // Set a cell's value.
  SetCell set_cell = 1;
 
+ // Incrementally updates an `Aggregate` cell.
+ AddToCell add_to_cell = 5;
+
  // Deletes cells from a column.
  DeleteFromColumn delete_from_column = 2;