Skip to content

Commit

Permalink
feat: Add preserveAsciiControlCharacters to CsvOptions (#2143)
Browse files Browse the repository at this point in the history
* feat: Add preserveAsciiControlCharacters to CsvOptions

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* Add IT

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
takayahilton and gcf-owl-bot[bot] committed Aug 29, 2022
1 parent 77f2bae commit 856893f
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public final class CsvOptions extends FormatOptions {
private final String fieldDelimiter;
private final String quote;
private final Long skipLeadingRows;
private final Boolean preserveAsciiControlCharacters;

public static final class Builder {

Expand All @@ -43,6 +44,7 @@ public static final class Builder {
private String fieldDelimiter;
private String quote;
private Long skipLeadingRows;
private Boolean preserveAsciiControlCharacters;

private Builder() {}

Expand All @@ -53,6 +55,7 @@ private Builder(CsvOptions csvOptions) {
this.fieldDelimiter = csvOptions.fieldDelimiter;
this.quote = csvOptions.quote;
this.skipLeadingRows = csvOptions.skipLeadingRows;
this.preserveAsciiControlCharacters = csvOptions.preserveAsciiControlCharacters;
}

/**
Expand Down Expand Up @@ -130,6 +133,15 @@ public Builder setSkipLeadingRows(long skipLeadingRows) {
return this;
}

/**
* Sets whether BigQuery should allow ascii control characters in a CSV file. By default ascii
* control characters are not allowed.
*/
public Builder setPreserveAsciiControlCharacters(boolean preserveAsciiControlCharacters) {
this.preserveAsciiControlCharacters = preserveAsciiControlCharacters;
return this;
}

/** Creates a {@code CsvOptions} object. */
public CsvOptions build() {
return new CsvOptions(this);
Expand All @@ -144,6 +156,7 @@ private CsvOptions(Builder builder) {
this.fieldDelimiter = builder.fieldDelimiter;
this.quote = builder.quote;
this.skipLeadingRows = builder.skipLeadingRows;
this.preserveAsciiControlCharacters = builder.preserveAsciiControlCharacters;
}

/**
Expand Down Expand Up @@ -192,6 +205,14 @@ public Long getSkipLeadingRows() {
return skipLeadingRows;
}

/**
* Returns whether BigQuery should allow ascii control characters in a CSV file. By default ascii
* control characters are not allowed.
*/
public Boolean getPreserveAsciiControlCharacters() {
return preserveAsciiControlCharacters;
}

/** Returns a builder for the {@code CsvOptions} object. */
public Builder toBuilder() {
return new Builder(this);
Expand All @@ -207,6 +228,7 @@ public String toString() {
.add("fieldDelimiter", fieldDelimiter)
.add("quote", quote)
.add("skipLeadingRows", skipLeadingRows)
.add("preserveAsciiControlCharacters", preserveAsciiControlCharacters)
.toString();
}

Expand All @@ -219,7 +241,8 @@ public int hashCode() {
encoding,
fieldDelimiter,
quote,
skipLeadingRows);
skipLeadingRows,
preserveAsciiControlCharacters);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,8 @@ com.google.api.services.bigquery.model.JobConfiguration toPb() {
.setAllowJaggedRows(csvOptions.allowJaggedRows())
.setAllowQuotedNewlines(csvOptions.allowQuotedNewLines())
.setEncoding(csvOptions.getEncoding())
.setQuote(csvOptions.getQuote());
.setQuote(csvOptions.getQuote())
.setPreserveAsciiControlCharacters(csvOptions.getPreserveAsciiControlCharacters());
if (csvOptions.getSkipLeadingRows() != null) {
// todo(mziccard) remove checked cast or comment when #1044 is closed
loadConfigurationPb.setSkipLeadingRows(Ints.checkedCast(csvOptions.getSkipLeadingRows()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ public class CsvOptionsTest {
private static final String FIELD_DELIMITER = ",";
private static final String QUOTE = "\"";
private static final long SKIP_LEADING_ROWS = 42L;

private static final boolean PRESERVE_ASCII_CONTROL_CHARACTERS = true;
private static final CsvOptions CSV_OPTIONS =
CsvOptions.newBuilder()
.setAllowJaggedRows(ALLOW_JAGGED_ROWS)
Expand All @@ -38,6 +40,7 @@ public class CsvOptionsTest {
.setFieldDelimiter(FIELD_DELIMITER)
.setQuote(QUOTE)
.setSkipLeadingRows(SKIP_LEADING_ROWS)
.setPreserveAsciiControlCharacters(PRESERVE_ASCII_CONTROL_CHARACTERS)
.build();

@Test
Expand All @@ -64,6 +67,8 @@ public void testBuilder() {
assertEquals(FIELD_DELIMITER, CSV_OPTIONS.getFieldDelimiter());
assertEquals(QUOTE, CSV_OPTIONS.getQuote());
assertEquals(SKIP_LEADING_ROWS, (long) CSV_OPTIONS.getSkipLeadingRows());
assertEquals(
PRESERVE_ASCII_CONTROL_CHARACTERS, CSV_OPTIONS.getPreserveAsciiControlCharacters());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4560,4 +4560,30 @@ public void testLocation() throws Exception {
bigquery.delete(dataset.getDatasetId(), DatasetDeleteOption.deleteContents());
}
}

@Test
public void testPreserveAsciiControlCharacters()
throws InterruptedException, IOException, TimeoutException {
String destinationTableName = "test_preserve_ascii_control_characters";
TableId tableId = TableId.of(DATASET, destinationTableName);
WriteChannelConfiguration configuration =
WriteChannelConfiguration.newBuilder(tableId)
.setFormatOptions(
FormatOptions.csv().toBuilder().setPreserveAsciiControlCharacters(true).build())
.setCreateDisposition(JobInfo.CreateDisposition.CREATE_IF_NEEDED)
.setSchema(SIMPLE_SCHEMA)
.build();
TableDataWriteChannel channel = bigquery.writer(configuration);
try {
channel.write(ByteBuffer.wrap("\u0000".getBytes(StandardCharsets.UTF_8)));
} finally {
channel.close();
}
Job job = channel.getJob().waitFor();
assertNull(job.getStatus().getError());
Page<FieldValueList> rows = bigquery.listTableData(tableId);
FieldValueList row = rows.getValues().iterator().next();
assertEquals("\u0000", row.get(0).getStringValue());
assertTrue(bigquery.delete(tableId));
}
}

0 comments on commit 856893f

Please sign in to comment.