diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 10cf433a8..3f7634f25 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -13,5 +13,5 @@ # limitations under the License. docker: image: gcr.io/cloud-devrel-public-resources/owlbot-python:latest - digest: sha256:8ff1efe878e18bd82a0fb7b70bb86f77e7ab6901fed394440b6135db0ba8d84a -# created: 2025-01-09T12:01:16.422459506Z + digest: sha256:f016446d6e520e5fb552c45b110cba3f217bffdd3d06bdddd076e9e6d13266cf +# created: 2025-02-21T19:32:52.01306189Z diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 6543d5285..188c44bbd 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -10,14 +10,10 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro snippets-3.8' - - 'Kokoro snippets-3.12' - - 'Kokoro system-3.8' - 'Kokoro system-3.12' + - 'Kokoro snippets-3.12' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.7' - - 'Samples - Python 3.8' - 'Samples - Python 3.9' - 'Samples - Python 3.10' - 'Samples - Python 3.11' @@ -28,8 +24,7 @@ branchProtectionRules: requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - - 'Kokoro snippets-3.8' - 'cla/google' - 'Samples - Lint' - - 'Samples - Python 3.7' - - 'Samples - Python 3.8' + - 'Samples - Python 3.9' + - 'Samples - Python 3.10' diff --git a/.kokoro/build.sh b/.kokoro/build.sh index e4da2e2a7..d41b45aa1 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -15,11 +15,13 @@ set -eo pipefail +CURRENT_DIR=$(dirname "${BASH_SOURCE[0]}") + if [[ -z "${PROJECT_ROOT:-}" ]]; then - PROJECT_ROOT="github/python-bigquery" + PROJECT_ROOT=$(realpath "${CURRENT_DIR}/..") fi -cd "${PROJECT_ROOT}" +pushd "${PROJECT_ROOT}" # Disable buffering, so that the logs stream through. export PYTHONUNBUFFERED=1 @@ -28,10 +30,16 @@ export PYTHONUNBUFFERED=1 env | grep KOKORO # Setup service account credentials. -export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +if [[ -f "${KOKORO_GFILE_DIR}/service-account.json" ]] +then + export GOOGLE_APPLICATION_CREDENTIALS=${KOKORO_GFILE_DIR}/service-account.json +fi # Setup project id. -export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +if [[ -f "${KOKORO_GFILE_DIR}/project-id.json" ]] +then + export PROJECT_ID=$(cat "${KOKORO_GFILE_DIR}/project-id.json") +fi # If this is a continuous build, send the test log to the FlakyBot. # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. @@ -46,7 +54,7 @@ fi # If NOX_SESSION is set, it only runs the specified session, # otherwise run all the sessions. if [[ -n "${NOX_SESSION:-}" ]]; then - python3 -m nox -s ${NOX_SESSION:-} + python3 -m nox -s ${NOX_SESSION:-} else - python3 -m nox + python3 -m nox fi diff --git a/.kokoro/docker/docs/requirements.in b/.kokoro/docker/docs/requirements.in index 816817c67..586bd0703 100644 --- a/.kokoro/docker/docs/requirements.in +++ b/.kokoro/docker/docs/requirements.in @@ -1 +1,2 @@ nox +gcp-docuploader diff --git a/.kokoro/docker/docs/requirements.txt b/.kokoro/docker/docs/requirements.txt index f99a5c4aa..a9360a25b 100644 --- a/.kokoro/docker/docs/requirements.txt +++ b/.kokoro/docker/docs/requirements.txt @@ -2,16 +2,124 @@ # This file is autogenerated by pip-compile with Python 3.10 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in +# pip-compile --allow-unsafe --generate-hashes requirements.in # -argcomplete==3.5.2 \ - --hash=sha256:036d020d79048a5d525bc63880d7a4b8d1668566b8a76daf1144c0bbe0f63472 \ - --hash=sha256:23146ed7ac4403b70bd6026402468942ceba34a6732255b9edf5b7354f68a6bb +argcomplete==3.5.3 \ + --hash=sha256:2ab2c4a215c59fd6caaff41a869480a23e8f6a5f910b266c1808037f4e375b61 \ + --hash=sha256:c12bf50eded8aebb298c7b7da7a5ff3ee24dffd9f5281867dfe1424b58c55392 # via nox +cachetools==5.5.0 \ + --hash=sha256:02134e8439cdc2ffb62023ce1debca2944c3f289d66bb17ead3ab3dede74b292 \ + --hash=sha256:2cc24fb4cbe39633fb7badd9db9ca6295d766d9c2995f245725a46715d050f2a + # via google-auth +certifi==2024.12.14 \ + --hash=sha256:1275f7a45be9464efc1173084eaa30f866fe2e47d389406136d332ed4967ec56 \ + --hash=sha256:b650d30f370c2b724812bee08008be0c4163b163ddaec3f2546c1caf65f191db + # via requests +charset-normalizer==3.4.1 \ + --hash=sha256:0167ddc8ab6508fe81860a57dd472b2ef4060e8d378f0cc555707126830f2537 \ + --hash=sha256:01732659ba9b5b873fc117534143e4feefecf3b2078b0a6a2e925271bb6f4cfa \ + --hash=sha256:01ad647cdd609225c5350561d084b42ddf732f4eeefe6e678765636791e78b9a \ + --hash=sha256:04432ad9479fa40ec0f387795ddad4437a2b50417c69fa275e212933519ff294 \ + --hash=sha256:0907f11d019260cdc3f94fbdb23ff9125f6b5d1039b76003b5b0ac9d6a6c9d5b \ + --hash=sha256:0924e81d3d5e70f8126529951dac65c1010cdf117bb75eb02dd12339b57749dd \ + --hash=sha256:09b26ae6b1abf0d27570633b2b078a2a20419c99d66fb2823173d73f188ce601 \ + --hash=sha256:09b5e6733cbd160dcc09589227187e242a30a49ca5cefa5a7edd3f9d19ed53fd \ + --hash=sha256:0af291f4fe114be0280cdd29d533696a77b5b49cfde5467176ecab32353395c4 \ + --hash=sha256:0f55e69f030f7163dffe9fd0752b32f070566451afe180f99dbeeb81f511ad8d \ + --hash=sha256:1a2bc9f351a75ef49d664206d51f8e5ede9da246602dc2d2726837620ea034b2 \ + --hash=sha256:22e14b5d70560b8dd51ec22863f370d1e595ac3d024cb8ad7d308b4cd95f8313 \ + --hash=sha256:234ac59ea147c59ee4da87a0c0f098e9c8d169f4dc2a159ef720f1a61bbe27cd \ + --hash=sha256:2369eea1ee4a7610a860d88f268eb39b95cb588acd7235e02fd5a5601773d4fa \ + --hash=sha256:237bdbe6159cff53b4f24f397d43c6336c6b0b42affbe857970cefbb620911c8 \ + --hash=sha256:28bf57629c75e810b6ae989f03c0828d64d6b26a5e205535585f96093e405ed1 \ + --hash=sha256:2967f74ad52c3b98de4c3b32e1a44e32975e008a9cd2a8cc8966d6a5218c5cb2 \ + --hash=sha256:2a75d49014d118e4198bcee5ee0a6f25856b29b12dbf7cd012791f8a6cc5c496 \ + --hash=sha256:2bdfe3ac2e1bbe5b59a1a63721eb3b95fc9b6817ae4a46debbb4e11f6232428d \ + --hash=sha256:2d074908e1aecee37a7635990b2c6d504cd4766c7bc9fc86d63f9c09af3fa11b \ + --hash=sha256:2fb9bd477fdea8684f78791a6de97a953c51831ee2981f8e4f583ff3b9d9687e \ + --hash=sha256:311f30128d7d333eebd7896965bfcfbd0065f1716ec92bd5638d7748eb6f936a \ + --hash=sha256:329ce159e82018d646c7ac45b01a430369d526569ec08516081727a20e9e4af4 \ + --hash=sha256:345b0426edd4e18138d6528aed636de7a9ed169b4aaf9d61a8c19e39d26838ca \ + --hash=sha256:363e2f92b0f0174b2f8238240a1a30142e3db7b957a5dd5689b0e75fb717cc78 \ + --hash=sha256:3a3bd0dcd373514dcec91c411ddb9632c0d7d92aed7093b8c3bbb6d69ca74408 \ + --hash=sha256:3bed14e9c89dcb10e8f3a29f9ccac4955aebe93c71ae803af79265c9ca5644c5 \ + --hash=sha256:44251f18cd68a75b56585dd00dae26183e102cd5e0f9f1466e6df5da2ed64ea3 \ + --hash=sha256:44ecbf16649486d4aebafeaa7ec4c9fed8b88101f4dd612dcaf65d5e815f837f \ + --hash=sha256:4532bff1b8421fd0a320463030c7520f56a79c9024a4e88f01c537316019005a \ + --hash=sha256:49402233c892a461407c512a19435d1ce275543138294f7ef013f0b63d5d3765 \ + --hash=sha256:4c0907b1928a36d5a998d72d64d8eaa7244989f7aaaf947500d3a800c83a3fd6 \ + --hash=sha256:4d86f7aff21ee58f26dcf5ae81a9addbd914115cdebcbb2217e4f0ed8982e146 \ + --hash=sha256:5777ee0881f9499ed0f71cc82cf873d9a0ca8af166dfa0af8ec4e675b7df48e6 \ + --hash=sha256:5df196eb874dae23dcfb968c83d4f8fdccb333330fe1fc278ac5ceeb101003a9 \ + --hash=sha256:619a609aa74ae43d90ed2e89bdd784765de0a25ca761b93e196d938b8fd1dbbd \ + --hash=sha256:6e27f48bcd0957c6d4cb9d6fa6b61d192d0b13d5ef563e5f2ae35feafc0d179c \ + --hash=sha256:6ff8a4a60c227ad87030d76e99cd1698345d4491638dfa6673027c48b3cd395f \ + --hash=sha256:73d94b58ec7fecbc7366247d3b0b10a21681004153238750bb67bd9012414545 \ + --hash=sha256:7461baadb4dc00fd9e0acbe254e3d7d2112e7f92ced2adc96e54ef6501c5f176 \ + --hash=sha256:75832c08354f595c760a804588b9357d34ec00ba1c940c15e31e96d902093770 \ + --hash=sha256:7709f51f5f7c853f0fb938bcd3bc59cdfdc5203635ffd18bf354f6967ea0f824 \ + --hash=sha256:78baa6d91634dfb69ec52a463534bc0df05dbd546209b79a3880a34487f4b84f \ + --hash=sha256:7974a0b5ecd505609e3b19742b60cee7aa2aa2fb3151bc917e6e2646d7667dcf \ + --hash=sha256:7a4f97a081603d2050bfaffdefa5b02a9ec823f8348a572e39032caa8404a487 \ + --hash=sha256:7b1bef6280950ee6c177b326508f86cad7ad4dff12454483b51d8b7d673a2c5d \ + --hash=sha256:7d053096f67cd1241601111b698f5cad775f97ab25d81567d3f59219b5f1adbd \ + --hash=sha256:804a4d582ba6e5b747c625bf1255e6b1507465494a40a2130978bda7b932c90b \ + --hash=sha256:807f52c1f798eef6cf26beb819eeb8819b1622ddfeef9d0977a8502d4db6d534 \ + --hash=sha256:80ed5e856eb7f30115aaf94e4a08114ccc8813e6ed1b5efa74f9f82e8509858f \ + --hash=sha256:8417cb1f36cc0bc7eaba8ccb0e04d55f0ee52df06df3ad55259b9a323555fc8b \ + --hash=sha256:8436c508b408b82d87dc5f62496973a1805cd46727c34440b0d29d8a2f50a6c9 \ + --hash=sha256:89149166622f4db9b4b6a449256291dc87a99ee53151c74cbd82a53c8c2f6ccd \ + --hash=sha256:8bfa33f4f2672964266e940dd22a195989ba31669bd84629f05fab3ef4e2d125 \ + --hash=sha256:8c60ca7339acd497a55b0ea5d506b2a2612afb2826560416f6894e8b5770d4a9 \ + --hash=sha256:91b36a978b5ae0ee86c394f5a54d6ef44db1de0815eb43de826d41d21e4af3de \ + --hash=sha256:955f8851919303c92343d2f66165294848d57e9bba6cf6e3625485a70a038d11 \ + --hash=sha256:97f68b8d6831127e4787ad15e6757232e14e12060bec17091b85eb1486b91d8d \ + --hash=sha256:9b23ca7ef998bc739bf6ffc077c2116917eabcc901f88da1b9856b210ef63f35 \ + --hash=sha256:9f0b8b1c6d84c8034a44893aba5e767bf9c7a211e313a9605d9c617d7083829f \ + --hash=sha256:aabfa34badd18f1da5ec1bc2715cadc8dca465868a4e73a0173466b688f29dda \ + --hash=sha256:ab36c8eb7e454e34e60eb55ca5d241a5d18b2c6244f6827a30e451c42410b5f7 \ + --hash=sha256:b010a7a4fd316c3c484d482922d13044979e78d1861f0e0650423144c616a46a \ + --hash=sha256:b1ac5992a838106edb89654e0aebfc24f5848ae2547d22c2c3f66454daa11971 \ + --hash=sha256:b7b2d86dd06bfc2ade3312a83a5c364c7ec2e3498f8734282c6c3d4b07b346b8 \ + --hash=sha256:b97e690a2118911e39b4042088092771b4ae3fc3aa86518f84b8cf6888dbdb41 \ + --hash=sha256:bc2722592d8998c870fa4e290c2eec2c1569b87fe58618e67d38b4665dfa680d \ + --hash=sha256:c0429126cf75e16c4f0ad00ee0eae4242dc652290f940152ca8c75c3a4b6ee8f \ + --hash=sha256:c30197aa96e8eed02200a83fba2657b4c3acd0f0aa4bdc9f6c1af8e8962e0757 \ + --hash=sha256:c4c3e6da02df6fa1410a7680bd3f63d4f710232d3139089536310d027950696a \ + --hash=sha256:c75cb2a3e389853835e84a2d8fb2b81a10645b503eca9bcb98df6b5a43eb8886 \ + --hash=sha256:c96836c97b1238e9c9e3fe90844c947d5afbf4f4c92762679acfe19927d81d77 \ + --hash=sha256:d7f50a1f8c450f3925cb367d011448c39239bb3eb4117c36a6d354794de4ce76 \ + --hash=sha256:d973f03c0cb71c5ed99037b870f2be986c3c05e63622c017ea9816881d2dd247 \ + --hash=sha256:d98b1668f06378c6dbefec3b92299716b931cd4e6061f3c875a71ced1780ab85 \ + --hash=sha256:d9c3cdf5390dcd29aa8056d13e8e99526cda0305acc038b96b30352aff5ff2bb \ + --hash=sha256:dad3e487649f498dd991eeb901125411559b22e8d7ab25d3aeb1af367df5efd7 \ + --hash=sha256:dccbe65bd2f7f7ec22c4ff99ed56faa1e9f785482b9bbd7c717e26fd723a1d1e \ + --hash=sha256:dd78cfcda14a1ef52584dbb008f7ac81c1328c0f58184bf9a84c49c605002da6 \ + --hash=sha256:e218488cd232553829be0664c2292d3af2eeeb94b32bea483cf79ac6a694e037 \ + --hash=sha256:e358e64305fe12299a08e08978f51fc21fac060dcfcddd95453eabe5b93ed0e1 \ + --hash=sha256:ea0d8d539afa5eb2728aa1932a988a9a7af94f18582ffae4bc10b3fbdad0626e \ + --hash=sha256:eab677309cdb30d047996b36d34caeda1dc91149e4fdca0b1a039b3f79d9a807 \ + --hash=sha256:eb8178fe3dba6450a3e024e95ac49ed3400e506fd4e9e5c32d30adda88cbd407 \ + --hash=sha256:ecddf25bee22fe4fe3737a399d0d177d72bc22be6913acfab364b40bce1ba83c \ + --hash=sha256:eea6ee1db730b3483adf394ea72f808b6e18cf3cb6454b4d86e04fa8c4327a12 \ + --hash=sha256:f08ff5e948271dc7e18a35641d2f11a4cd8dfd5634f55228b691e62b37125eb3 \ + --hash=sha256:f30bf9fd9be89ecb2360c7d94a711f00c09b976258846efe40db3d05828e8089 \ + --hash=sha256:fa88b843d6e211393a37219e6a1c1df99d35e8fd90446f1118f4216e307e48cd \ + --hash=sha256:fc54db6c8593ef7d4b2a331b58653356cf04f67c960f584edb7c3d8c97e8f39e \ + --hash=sha256:fd4ec41f914fa74ad1b8304bbc634b3de73d2a0889bd32076342a573e0779e00 \ + --hash=sha256:ffc9202a29ab3920fa812879e95a9e78b2465fd10be7fcbd042899695d75e616 + # via requests +click==8.1.8 \ + --hash=sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2 \ + --hash=sha256:ed53c9d8990d83c2a27deae68e4ee337473f6330c040a31d4225c9574d16096a + # via gcp-docuploader colorlog==6.9.0 \ --hash=sha256:5906e71acd67cb07a71e779c47c4bcb45fb8c2993eebe9e5adcd6a6f1b283eff \ --hash=sha256:bfba54a1b93b94f54e1f4fe48395725a3d92fd2a4af702f6bd70946bdc0c6ac2 - # via nox + # via + # gcp-docuploader + # nox distlib==0.3.9 \ --hash=sha256:47f8c22fd27c27e25a65601af709b38e4f0a45ea4fc2e710f65755fa8caaaf87 \ --hash=sha256:a60f20dea646b8a33f3e7772f74dc0b2d0772d2837ee1342a00645c81edf9403 @@ -20,10 +128,78 @@ filelock==3.16.1 \ --hash=sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0 \ --hash=sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435 # via virtualenv +gcp-docuploader==0.6.5 \ + --hash=sha256:30221d4ac3e5a2b9c69aa52fdbef68cc3f27d0e6d0d90e220fc024584b8d2318 \ + --hash=sha256:b7458ef93f605b9d46a4bf3a8dc1755dad1f31d030c8679edf304e343b347eea + # via -r requirements.in +google-api-core==2.24.0 \ + --hash=sha256:10d82ac0fca69c82a25b3efdeefccf6f28e02ebb97925a8cce8edbfe379929d9 \ + --hash=sha256:e255640547a597a4da010876d333208ddac417d60add22b6851a0c66a831fcaf + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.37.0 \ + --hash=sha256:0054623abf1f9c83492c63d3f47e77f0a544caa3d40b2d98e099a611c2dd5d00 \ + --hash=sha256:42664f18290a6be591be5329a96fe30184be1a1badb7292a7f686a9659de9ca0 + # via + # google-api-core + # google-cloud-core + # google-cloud-storage +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 + # via google-cloud-storage +google-cloud-storage==2.19.0 \ + --hash=sha256:aeb971b5c29cf8ab98445082cbfe7b161a1f48ed275822f59ed3f1524ea54fba \ + --hash=sha256:cd05e9e7191ba6cb68934d8eb76054d9be4562aa89dbc4236feee4d7d51342b2 + # via gcp-docuploader +google-crc32c==1.6.0 \ + --hash=sha256:05e2d8c9a2f853ff116db9706b4a27350587f341eda835f46db3c0a8c8ce2f24 \ + --hash=sha256:18e311c64008f1f1379158158bb3f0c8d72635b9eb4f9545f8cf990c5668e59d \ + --hash=sha256:236c87a46cdf06384f614e9092b82c05f81bd34b80248021f729396a78e55d7e \ + --hash=sha256:35834855408429cecf495cac67ccbab802de269e948e27478b1e47dfb6465e57 \ + --hash=sha256:386122eeaaa76951a8196310432c5b0ef3b53590ef4c317ec7588ec554fec5d2 \ + --hash=sha256:40b05ab32a5067525670880eb5d169529089a26fe35dce8891127aeddc1950e8 \ + --hash=sha256:48abd62ca76a2cbe034542ed1b6aee851b6f28aaca4e6551b5599b6f3ef175cc \ + --hash=sha256:50cf2a96da226dcbff8671233ecf37bf6e95de98b2a2ebadbfdf455e6d05df42 \ + --hash=sha256:51c4f54dd8c6dfeb58d1df5e4f7f97df8abf17a36626a217f169893d1d7f3e9f \ + --hash=sha256:5bcc90b34df28a4b38653c36bb5ada35671ad105c99cfe915fb5bed7ad6924aa \ + --hash=sha256:62f6d4a29fea082ac4a3c9be5e415218255cf11684ac6ef5488eea0c9132689b \ + --hash=sha256:6eceb6ad197656a1ff49ebfbbfa870678c75be4344feb35ac1edf694309413dc \ + --hash=sha256:7aec8e88a3583515f9e0957fe4f5f6d8d4997e36d0f61624e70469771584c760 \ + --hash=sha256:91ca8145b060679ec9176e6de4f89b07363d6805bd4760631ef254905503598d \ + --hash=sha256:a184243544811e4a50d345838a883733461e67578959ac59964e43cca2c791e7 \ + --hash=sha256:a9e4b426c3702f3cd23b933436487eb34e01e00327fac20c9aebb68ccf34117d \ + --hash=sha256:bb0966e1c50d0ef5bc743312cc730b533491d60585a9a08f897274e57c3f70e0 \ + --hash=sha256:bb8b3c75bd157010459b15222c3fd30577042a7060e29d42dabce449c087f2b3 \ + --hash=sha256:bd5e7d2445d1a958c266bfa5d04c39932dc54093fa391736dbfdb0f1929c1fb3 \ + --hash=sha256:c87d98c7c4a69066fd31701c4e10d178a648c2cac3452e62c6b24dc51f9fcc00 \ + --hash=sha256:d2952396dc604544ea7476b33fe87faedc24d666fb0c2d5ac971a2b9576ab871 \ + --hash=sha256:d8797406499f28b5ef791f339594b0b5fdedf54e203b5066675c406ba69d705c \ + --hash=sha256:d9e9913f7bd69e093b81da4535ce27af842e7bf371cde42d1ae9e9bd382dc0e9 \ + --hash=sha256:e2806553238cd076f0a55bddab37a532b53580e699ed8e5606d0de1f856b5205 \ + --hash=sha256:ebab974b1687509e5c973b5c4b8b146683e101e102e17a86bd196ecaa4d099fc \ + --hash=sha256:ed767bf4ba90104c1216b68111613f0d5926fb3780660ea1198fc469af410e9d \ + --hash=sha256:f7a1fc29803712f80879b0806cb83ab24ce62fc8daf0569f2204a0cfd7f68ed4 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.2 \ + --hash=sha256:3ce7551e9fe6d99e9a126101d2536612bb73486721951e9562fee0f90c6ababa \ + --hash=sha256:5280aed4629f2b60b847b0d42f9857fd4935c11af266744df33d8074cae92fe0 + # via google-cloud-storage +googleapis-common-protos==1.66.0 \ + --hash=sha256:c3e7b33d15fdca5374cc0a7346dd92ffa847425cc4ea941d970f13680052ec8c \ + --hash=sha256:d7abcd75fabb2e0ec9f74466401f6c119a0b498e27370e9be4c94cb7e382b8ed + # via google-api-core +idna==3.10 \ + --hash=sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9 \ + --hash=sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3 + # via requests nox==2024.10.9 \ --hash=sha256:1d36f309a0a2a853e9bccb76bbef6bb118ba92fa92674d15604ca99adeb29eab \ --hash=sha256:7aa9dc8d1c27e9f45ab046ffd1c3b2c4f7c91755304769df231308849ebded95 - # via -r synthtool/gcp/templates/python_library/.kokoro/docker/docs/requirements.in + # via -r requirements.in packaging==24.2 \ --hash=sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759 \ --hash=sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f @@ -32,6 +208,51 @@ platformdirs==4.3.6 \ --hash=sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907 \ --hash=sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb # via virtualenv +proto-plus==1.25.0 \ + --hash=sha256:c91fc4a65074ade8e458e95ef8bac34d4008daa7cce4a12d6707066fca648961 \ + --hash=sha256:fbb17f57f7bd05a68b7707e745e26528b0b3c34e378db91eef93912c54982d91 + # via google-api-core +protobuf==5.29.3 \ + --hash=sha256:0a18ed4a24198528f2333802eb075e59dea9d679ab7a6c5efb017a59004d849f \ + --hash=sha256:0eb32bfa5219fc8d4111803e9a690658aa2e6366384fd0851064b963b6d1f2a7 \ + --hash=sha256:3ea51771449e1035f26069c4c7fd51fba990d07bc55ba80701c78f886bf9c888 \ + --hash=sha256:5da0f41edaf117bde316404bad1a486cb4ededf8e4a54891296f648e8e076620 \ + --hash=sha256:6ce8cc3389a20693bfde6c6562e03474c40851b44975c9b2bf6df7d8c4f864da \ + --hash=sha256:84a57163a0ccef3f96e4b6a20516cedcf5bb3a95a657131c5c3ac62200d23252 \ + --hash=sha256:a4fa6f80816a9a0678429e84973f2f98cbc218cca434abe8db2ad0bffc98503a \ + --hash=sha256:a8434404bbf139aa9e1300dbf989667a83d42ddda9153d8ab76e0d5dcaca484e \ + --hash=sha256:b89c115d877892a512f79a8114564fb435943b59067615894c3b13cd3e1fa107 \ + --hash=sha256:c027e08a08be10b67c06bf2370b99c811c466398c357e615ca88c91c07f0910f \ + --hash=sha256:daaf63f70f25e8689c072cfad4334ca0ac1d1e05a92fc15c54eb9cf23c3efd84 + # via + # gcp-docuploader + # google-api-core + # googleapis-common-protos + # proto-plus +pyasn1==0.6.1 \ + --hash=sha256:0d632f46f2ba09143da3a8afe9e33fb6f92fa2320ab7e886e2d0f7672af84629 \ + --hash=sha256:6f580d2bdd84365380830acf45550f2511469f673cb4a5ae3857a3170128b034 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 \ + --hash=sha256:49bfa96b45a292b711e986f222502c1c9a5e1f4e568fc30e2574a6c7d07838fd \ + --hash=sha256:c28e2dbf9c06ad61c71a075c7e0f9fd0f1b0bb2d2ad4377f240d33ac2ab60a7c + # via google-auth +requests==2.32.3 \ + --hash=sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760 \ + --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 + # via + # google-api-core + # google-cloud-storage +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via google-auth +six==1.17.0 \ + --hash=sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274 \ + --hash=sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81 + # via gcp-docuploader tomli==2.2.1 \ --hash=sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6 \ --hash=sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd \ @@ -66,7 +287,11 @@ tomli==2.2.1 \ --hash=sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a \ --hash=sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7 # via nox -virtualenv==20.28.0 \ - --hash=sha256:23eae1b4516ecd610481eda647f3a7c09aea295055337331bb4e6892ecce47b0 \ - --hash=sha256:2c9c3262bb8e7b87ea801d715fae4495e6032450c71d2309be9550e7364049aa +urllib3==2.3.0 \ + --hash=sha256:1cee9ad369867bfdbbb48b7dd50374c0967a0bb7710050facf0dd6911440e3df \ + --hash=sha256:f8c5449b3cf0861679ce7e0503c7b44b5ec981bec0d1d3795a07f1ba96f0204d + # via requests +virtualenv==20.28.1 \ + --hash=sha256:412773c85d4dab0409b83ec36f7a6499e72eaf08c80e81e9576bca61831c71cb \ + --hash=sha256:5d34ab240fdb5d21549b76f9e8ff3af28252f5499fb6d6f031adac4e5a8c5329 # via nox diff --git a/.kokoro/publish-docs.sh b/.kokoro/publish-docs.sh index 233205d58..4ed4aaf13 100755 --- a/.kokoro/publish-docs.sh +++ b/.kokoro/publish-docs.sh @@ -20,10 +20,6 @@ export PYTHONUNBUFFERED=1 export PATH="${HOME}/.local/bin:${PATH}" -# Install nox -python3.10 -m pip install --require-hashes -r .kokoro/requirements.txt -python3.10 -m nox --version - # build docs nox -s docs diff --git a/.kokoro/requirements.txt b/.kokoro/requirements.txt index 16db448c1..6ad95a04a 100644 --- a/.kokoro/requirements.txt +++ b/.kokoro/requirements.txt @@ -112,34 +112,38 @@ colorlog==6.8.2 \ # via # gcp-docuploader # nox -cryptography==43.0.1 \ - --hash=sha256:014f58110f53237ace6a408b5beb6c427b64e084eb451ef25a28308270086494 \ - --hash=sha256:1bbcce1a551e262dfbafb6e6252f1ae36a248e615ca44ba302df077a846a8806 \ - --hash=sha256:203e92a75716d8cfb491dc47c79e17d0d9207ccffcbcb35f598fbe463ae3444d \ - --hash=sha256:27e613d7077ac613e399270253259d9d53872aaf657471473ebfc9a52935c062 \ - --hash=sha256:2bd51274dcd59f09dd952afb696bf9c61a7a49dfc764c04dd33ef7a6b502a1e2 \ - --hash=sha256:38926c50cff6f533f8a2dae3d7f19541432610d114a70808f0926d5aaa7121e4 \ - --hash=sha256:511f4273808ab590912a93ddb4e3914dfd8a388fed883361b02dea3791f292e1 \ - --hash=sha256:58d4e9129985185a06d849aa6df265bdd5a74ca6e1b736a77959b498e0505b85 \ - --hash=sha256:5b43d1ea6b378b54a1dc99dd8a2b5be47658fe9a7ce0a58ff0b55f4b43ef2b84 \ - --hash=sha256:61ec41068b7b74268fa86e3e9e12b9f0c21fcf65434571dbb13d954bceb08042 \ - --hash=sha256:666ae11966643886c2987b3b721899d250855718d6d9ce41b521252a17985f4d \ - --hash=sha256:68aaecc4178e90719e95298515979814bda0cbada1256a4485414860bd7ab962 \ - --hash=sha256:7c05650fe8023c5ed0d46793d4b7d7e6cd9c04e68eabe5b0aeea836e37bdcec2 \ - --hash=sha256:80eda8b3e173f0f247f711eef62be51b599b5d425c429b5d4ca6a05e9e856baa \ - --hash=sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d \ - --hash=sha256:88cce104c36870d70c49c7c8fd22885875d950d9ee6ab54df2745f83ba0dc365 \ - --hash=sha256:9d3cdb25fa98afdd3d0892d132b8d7139e2c087da1712041f6b762e4f807cc96 \ - --hash=sha256:a575913fb06e05e6b4b814d7f7468c2c660e8bb16d8d5a1faf9b33ccc569dd47 \ - --hash=sha256:ac119bb76b9faa00f48128b7f5679e1d8d437365c5d26f1c2c3f0da4ce1b553d \ - --hash=sha256:c1332724be35d23a854994ff0b66530119500b6053d0bd3363265f7e5e77288d \ - --hash=sha256:d03a475165f3134f773d1388aeb19c2d25ba88b6a9733c5c590b9ff7bbfa2e0c \ - --hash=sha256:d75601ad10b059ec832e78823b348bfa1a59f6b8d545db3a24fd44362a1564cb \ - --hash=sha256:de41fd81a41e53267cb020bb3a7212861da53a7d39f863585d13ea11049cf277 \ - --hash=sha256:e710bf40870f4db63c3d7d929aa9e09e4e7ee219e703f949ec4073b4294f6172 \ - --hash=sha256:ea25acb556320250756e53f9e20a4177515f012c9eaea17eb7587a8c4d8ae034 \ - --hash=sha256:f98bf604c82c416bc829e490c700ca1553eafdf2912a91e23a79d97d9801372a \ - --hash=sha256:fba1007b3ef89946dbbb515aeeb41e30203b004f0b4b00e5e16078b518563289 +cryptography==44.0.1 \ + --hash=sha256:00918d859aa4e57db8299607086f793fa7813ae2ff5a4637e318a25ef82730f7 \ + --hash=sha256:1e8d181e90a777b63f3f0caa836844a1182f1f265687fac2115fcf245f5fbec3 \ + --hash=sha256:1f9a92144fa0c877117e9748c74501bea842f93d21ee00b0cf922846d9d0b183 \ + --hash=sha256:21377472ca4ada2906bc313168c9dc7b1d7ca417b63c1c3011d0c74b7de9ae69 \ + --hash=sha256:24979e9f2040c953a94bf3c6782e67795a4c260734e5264dceea65c8f4bae64a \ + --hash=sha256:2a46a89ad3e6176223b632056f321bc7de36b9f9b93b2cc1cccf935a3849dc62 \ + --hash=sha256:322eb03ecc62784536bc173f1483e76747aafeb69c8728df48537eb431cd1911 \ + --hash=sha256:436df4f203482f41aad60ed1813811ac4ab102765ecae7a2bbb1dbb66dcff5a7 \ + --hash=sha256:4f422e8c6a28cf8b7f883eb790695d6d45b0c385a2583073f3cec434cc705e1a \ + --hash=sha256:53f23339864b617a3dfc2b0ac8d5c432625c80014c25caac9082314e9de56f41 \ + --hash=sha256:5fed5cd6102bb4eb843e3315d2bf25fede494509bddadb81e03a859c1bc17b83 \ + --hash=sha256:610a83540765a8d8ce0f351ce42e26e53e1f774a6efb71eb1b41eb01d01c3d12 \ + --hash=sha256:6c8acf6f3d1f47acb2248ec3ea261171a671f3d9428e34ad0357148d492c7864 \ + --hash=sha256:6f76fdd6fd048576a04c5210d53aa04ca34d2ed63336d4abd306d0cbe298fddf \ + --hash=sha256:72198e2b5925155497a5a3e8c216c7fb3e64c16ccee11f0e7da272fa93b35c4c \ + --hash=sha256:887143b9ff6bad2b7570da75a7fe8bbf5f65276365ac259a5d2d5147a73775f2 \ + --hash=sha256:888fcc3fce0c888785a4876ca55f9f43787f4c5c1cc1e2e0da71ad481ff82c5b \ + --hash=sha256:8e6a85a93d0642bd774460a86513c5d9d80b5c002ca9693e63f6e540f1815ed0 \ + --hash=sha256:94f99f2b943b354a5b6307d7e8d19f5c423a794462bde2bf310c770ba052b1c4 \ + --hash=sha256:9b336599e2cb77b1008cb2ac264b290803ec5e8e89d618a5e978ff5eb6f715d9 \ + --hash=sha256:a2d8a7045e1ab9b9f803f0d9531ead85f90c5f2859e653b61497228b18452008 \ + --hash=sha256:b8272f257cf1cbd3f2e120f14c68bff2b6bdfcc157fafdee84a1b795efd72862 \ + --hash=sha256:bf688f615c29bfe9dfc44312ca470989279f0e94bb9f631f85e3459af8efc009 \ + --hash=sha256:d9c5b9f698a83c8bd71e0f4d3f9f839ef244798e5ffe96febfa9714717db7af7 \ + --hash=sha256:dd7c7e2d71d908dc0f8d2027e1604102140d84b155e658c20e8ad1304317691f \ + --hash=sha256:df978682c1504fc93b3209de21aeabf2375cb1571d4e61907b3e7a2540e83026 \ + --hash=sha256:e403f7f766ded778ecdb790da786b418a9f2394f36e8cc8b796cc056ab05f44f \ + --hash=sha256:eb3889330f2a4a148abead555399ec9a32b13b7c8ba969b72d8e500eb7ef84cd \ + --hash=sha256:f4daefc971c2d1f82f03097dc6f216744a6cd2ac0f04c68fb935ea2ba2a0d420 \ + --hash=sha256:f51f5705ab27898afda1aaa430f34ad90dc117421057782022edf0600bec5f14 \ + --hash=sha256:fd0ee90072861e276b0ff08bd627abec29e32a53b2be44e41dbcdf87cbee2b00 # via # -r requirements.in # gcp-releasetool diff --git a/CHANGELOG.md b/CHANGELOG.md index 45c39e19c..91d0a362d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,32 @@ [1]: https://pypi.org/project/google-cloud-bigquery/#history +## [3.30.0](https://github.com/googleapis/python-bigquery/compare/v3.29.0...v3.30.0) (2025-02-26) + + +### Features + +* Add roundingmode enum, wiring, and tests ([#2121](https://github.com/googleapis/python-bigquery/issues/2121)) ([3a48948](https://github.com/googleapis/python-bigquery/commit/3a4894827f6e73a4a88cb22933c2004697dabcc7)) +* Adds foreign_type_info attribute to table class and adds unit tests. ([#2126](https://github.com/googleapis/python-bigquery/issues/2126)) ([2c19681](https://github.com/googleapis/python-bigquery/commit/2c1968115bef8e1dc84e0125615f551b9b011a4b)) +* Support resource_tags for table ([#2093](https://github.com/googleapis/python-bigquery/issues/2093)) ([d4070ca](https://github.com/googleapis/python-bigquery/commit/d4070ca21b5797e900a9e87b966837ee1c278217)) + + +### Bug Fixes + +* Avoid blocking in download thread when using BQ Storage API ([#2034](https://github.com/googleapis/python-bigquery/issues/2034)) ([54c8d07](https://github.com/googleapis/python-bigquery/commit/54c8d07f06a8ae460c9e0fb1614e1fbc21efb5df)) +* Retry 404 errors in `Client.query(...)` ([#2135](https://github.com/googleapis/python-bigquery/issues/2135)) ([c6d5f8a](https://github.com/googleapis/python-bigquery/commit/c6d5f8aaec21ab8f17436407aded4bc2316323fd)) + + +### Dependencies + +* Updates required checks list in github ([#2136](https://github.com/googleapis/python-bigquery/issues/2136)) ([fea49ff](https://github.com/googleapis/python-bigquery/commit/fea49ffbf8aa1d53451864ceb7fd73189b6661cb)) +* Use pandas-gbq to determine schema in `load_table_from_dataframe` ([#2095](https://github.com/googleapis/python-bigquery/issues/2095)) ([7603bd7](https://github.com/googleapis/python-bigquery/commit/7603bd71d60592ef2a551d9eea09987b218edc73)) + + +### Documentation + +* Update magics.rst ([#2125](https://github.com/googleapis/python-bigquery/issues/2125)) ([b5bcfb3](https://github.com/googleapis/python-bigquery/commit/b5bcfb303d27015b747a3b0747ecd7f7ed0ed557)) + ## [3.29.0](https://github.com/googleapis/python-bigquery/compare/v3.28.0...v3.29.0) (2025-01-21) diff --git a/docs/magics.rst b/docs/magics.rst index aa14c6bfa..549d67f76 100644 --- a/docs/magics.rst +++ b/docs/magics.rst @@ -6,7 +6,7 @@ in a Jupyter notebook cell. .. code:: - %load_ext google.cloud.bigquery + %load_ext bigquery_magics This makes the ``%%bigquery`` magic available. @@ -27,8 +27,9 @@ Running a parameterized query: :start-after: [START bigquery_jupyter_query_params_scalars] :end-before: [END bigquery_jupyter_query_params_scalars] -API Reference -------------- +BigQuery Magics Reference +------------------------- -.. automodule:: google.cloud.bigquery.magics.magics - :members: +- `BigQuery Magics Documentation`_ + +.. _BigQuery Magics Documentation: https://googleapis.dev/python/bigquery-magics/latest diff --git a/google/cloud/bigquery/_helpers.py b/google/cloud/bigquery/_helpers.py index ea47af28d..d40217c4d 100644 --- a/google/cloud/bigquery/_helpers.py +++ b/google/cloud/bigquery/_helpers.py @@ -978,11 +978,11 @@ def _build_resource_from_properties(obj, filter_fields): """ partial = {} for filter_field in filter_fields: - api_field = obj._PROPERTY_TO_API_FIELD.get(filter_field) + api_field = _get_sub_prop(obj._PROPERTY_TO_API_FIELD, filter_field) if api_field is None and filter_field not in obj._properties: raise ValueError("No property %s" % filter_field) elif api_field is not None: - partial[api_field] = obj._properties.get(api_field) + _set_sub_prop(partial, api_field, _get_sub_prop(obj._properties, api_field)) else: # allows properties that are not defined in the library # and properties that have the same name as API resource key diff --git a/google/cloud/bigquery/_job_helpers.py b/google/cloud/bigquery/_job_helpers.py index e66ab2763..b028cd357 100644 --- a/google/cloud/bigquery/_job_helpers.py +++ b/google/cloud/bigquery/_job_helpers.py @@ -47,6 +47,7 @@ from google.cloud.bigquery import job import google.cloud.bigquery.query from google.cloud.bigquery import table +import google.cloud.bigquery.retry from google.cloud.bigquery.retry import POLLING_DEFAULT_VALUE # Avoid circular imports @@ -142,12 +143,28 @@ def do_query(): raise create_exc try: + # Sometimes we get a 404 after a Conflict. In this case, we + # have pretty high confidence that by retrying the 404, we'll + # (hopefully) eventually recover the job. + # https://github.com/googleapis/python-bigquery/issues/2134 + # + # Allow users who want to completely disable retries to + # continue to do so by setting retry to None. + get_job_retry = retry + if retry is not None: + # TODO(tswast): Amend the user's retry object with allowing + # 404 to retry when there's a public way to do so. + # https://github.com/googleapis/python-api-core/issues/796 + get_job_retry = ( + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY + ) + query_job = client.get_job( job_id, project=project, location=location, - retry=retry, - timeout=timeout, + retry=get_job_retry, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, ) except core_exceptions.GoogleAPIError: # (includes RetryError) raise @@ -156,7 +173,13 @@ def do_query(): else: return query_job + # Allow users who want to completely disable retries to + # continue to do so by setting job_retry to None. + if job_retry is not None: + do_query = google.cloud.bigquery.retry._DEFAULT_QUERY_JOB_INSERT_RETRY(do_query) + future = do_query() + # The future might be in a failed state now, but if it's # unrecoverable, we'll find out when we ask for it's result, at which # point, we may retry. diff --git a/google/cloud/bigquery/_pandas_helpers.py b/google/cloud/bigquery/_pandas_helpers.py index bf7d10c0f..0017d92ce 100644 --- a/google/cloud/bigquery/_pandas_helpers.py +++ b/google/cloud/bigquery/_pandas_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pandas.""" +"""Shared helper functions for connecting BigQuery and pandas. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pandas_to_bigquery.py +""" import concurrent.futures from datetime import datetime @@ -20,6 +25,7 @@ from itertools import islice import logging import queue +import threading import warnings from typing import Any, Union, Optional, Callable, Generator, List @@ -39,6 +45,16 @@ else: import numpy + +try: + import pandas_gbq.schema.pandas_to_bigquery # type: ignore + + pandas_gbq_import_exception = None +except ImportError as exc: + pandas_gbq = None + pandas_gbq_import_exception = exc + + try: import db_dtypes # type: ignore @@ -119,6 +135,21 @@ def __init__(self): # be an atomic operation in the Python language definition (enforced by # the global interpreter lock). self.done = False + # To assist with testing and understanding the behavior of the + # download, use this object as shared state to track how many worker + # threads have started and have gracefully shutdown. + self._started_workers_lock = threading.Lock() + self.started_workers = 0 + self._finished_workers_lock = threading.Lock() + self.finished_workers = 0 + + def start(self): + with self._started_workers_lock: + self.started_workers += 1 + + def finish(self): + with self._finished_workers_lock: + self.finished_workers += 1 BQ_FIELD_TYPE_TO_ARROW_FIELD_METADATA = { @@ -429,6 +460,10 @@ def _first_array_valid(series): def dataframe_to_bq_schema(dataframe, bq_schema): """Convert a pandas DataFrame schema to a BigQuery schema. + DEPRECATED: Use + pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields(), + instead. See: go/pandas-gbq-and-bigframes-redundancy. + Args: dataframe (pandas.DataFrame): DataFrame for which the client determines the BigQuery schema. @@ -444,6 +479,20 @@ def dataframe_to_bq_schema(dataframe, bq_schema): The automatically determined schema. Returns None if the type of any column cannot be determined. """ + if pandas_gbq is None: + warnings.warn( + "Loading pandas DataFrame into BigQuery will require pandas-gbq " + "package version 0.26.1 or greater in the future. " + f"Tried to import pandas-gbq and got: {pandas_gbq_import_exception}", + category=FutureWarning, + ) + else: + return pandas_gbq.schema.pandas_to_bigquery.dataframe_to_bigquery_fields( + dataframe, + override_bigquery_fields=bq_schema, + index=True, + ) + if bq_schema: bq_schema = schema._to_schema_fields(bq_schema) bq_schema_index = {field.name: field for field in bq_schema} @@ -786,20 +835,35 @@ def _bqstorage_page_to_dataframe(column_names, dtypes, page): def _download_table_bqstorage_stream( download_state, bqstorage_client, session, stream, worker_queue, page_to_item ): - reader = bqstorage_client.read_rows(stream.name) - - # Avoid deprecation warnings for passing in unnecessary read session. - # https://github.com/googleapis/python-bigquery-storage/issues/229 - if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: - rowstream = reader.rows() - else: - rowstream = reader.rows(session) + download_state.start() + try: + reader = bqstorage_client.read_rows(stream.name) - for page in rowstream.pages: - if download_state.done: - return - item = page_to_item(page) - worker_queue.put(item) + # Avoid deprecation warnings for passing in unnecessary read session. + # https://github.com/googleapis/python-bigquery-storage/issues/229 + if _versions_helpers.BQ_STORAGE_VERSIONS.is_read_session_optional: + rowstream = reader.rows() + else: + rowstream = reader.rows(session) + + for page in rowstream.pages: + item = page_to_item(page) + + # Make sure we set a timeout on put() so that we give the worker + # thread opportunities to shutdown gracefully, for example if the + # parent thread shuts down or the parent generator object which + # collects rows from all workers goes out of scope. See: + # https://github.com/googleapis/python-bigquery/issues/2032 + while True: + if download_state.done: + return + try: + worker_queue.put(item, timeout=_PROGRESS_INTERVAL) + break + except queue.Full: + continue + finally: + download_state.finish() def _nowait(futures): @@ -825,6 +889,7 @@ def _download_table_bqstorage( page_to_item: Optional[Callable] = None, max_queue_size: Any = _MAX_QUEUE_SIZE_DEFAULT, max_stream_count: Optional[int] = None, + download_state: Optional[_DownloadState] = None, ) -> Generator[Any, None, None]: """Downloads a BigQuery table using the BigQuery Storage API. @@ -852,6 +917,9 @@ def _download_table_bqstorage( is True, the requested streams are limited to 1 regardless of the `max_stream_count` value. If 0 or None, then the number of requested streams will be unbounded. Defaults to None. + download_state (Optional[_DownloadState]): + A threadsafe state object which can be used to observe the + behavior of the worker threads created by this method. Yields: pandas.DataFrame: Pandas DataFrames, one for each chunk of data @@ -910,7 +978,8 @@ def _download_table_bqstorage( # Use _DownloadState to notify worker threads when to quit. # See: https://stackoverflow.com/a/29237343/101923 - download_state = _DownloadState() + if download_state is None: + download_state = _DownloadState() # Create a queue to collect frames as they are created in each thread. # diff --git a/google/cloud/bigquery/_pyarrow_helpers.py b/google/cloud/bigquery/_pyarrow_helpers.py index 3c745a611..1b42cd5c7 100644 --- a/google/cloud/bigquery/_pyarrow_helpers.py +++ b/google/cloud/bigquery/_pyarrow_helpers.py @@ -12,7 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Shared helper functions for connecting BigQuery and pyarrow.""" +"""Shared helper functions for connecting BigQuery and pyarrow. + +NOTE: This module is DEPRECATED. Please make updates in the pandas-gbq package, +instead. See: go/pandas-gbq-and-bigframes-redundancy and +https://github.com/googleapis/python-bigquery-pandas/blob/main/pandas_gbq/schema/pyarrow_to_bigquery.py +""" from typing import Any diff --git a/google/cloud/bigquery/enums.py b/google/cloud/bigquery/enums.py index d8cbe9969..5519bc989 100644 --- a/google/cloud/bigquery/enums.py +++ b/google/cloud/bigquery/enums.py @@ -246,6 +246,11 @@ class KeyResultStatementKind: class StandardSqlTypeNames(str, enum.Enum): + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in GoogleSQL. + """ + def _generate_next_value_(name, start, count, last_values): return name @@ -267,6 +272,9 @@ def _generate_next_value_(name, start, count, last_values): ARRAY = enum.auto() STRUCT = enum.auto() RANGE = enum.auto() + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = enum.auto() class EntityTypes(str, enum.Enum): @@ -285,7 +293,10 @@ class EntityTypes(str, enum.Enum): # See also: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types # and https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types class SqlTypeNames(str, enum.Enum): - """Enum of allowed SQL type names in schema.SchemaField.""" + """Enum of allowed SQL type names in schema.SchemaField. + + Datatype used in Legacy SQL. + """ STRING = "STRING" BYTES = "BYTES" @@ -306,6 +317,9 @@ class SqlTypeNames(str, enum.Enum): DATETIME = "DATETIME" INTERVAL = "INTERVAL" # NOTE: not available in legacy types RANGE = "RANGE" # NOTE: not available in legacy types + # NOTE: FOREIGN acts as a wrapper for data types + # not natively understood by BigQuery unless translated + FOREIGN = "FOREIGN" class WriteDisposition(object): @@ -344,3 +358,32 @@ class DeterminismLevel: NOT_DETERMINISTIC = "NOT_DETERMINISTIC" """The UDF is not deterministic.""" + + +class RoundingMode(str, enum.Enum): + """Rounding mode options that can be used when storing NUMERIC or BIGNUMERIC + values. + + ROUNDING_MODE_UNSPECIFIED: will default to using ROUND_HALF_AWAY_FROM_ZERO. + + ROUND_HALF_AWAY_FROM_ZERO: rounds half values away from zero when applying + precision and scale upon writing of NUMERIC and BIGNUMERIC values. + For Scale: 0 + * 1.1, 1.2, 1.3, 1.4 => 1 + * 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN: rounds half values to the nearest even value when applying + precision and scale upon writing of NUMERIC and BIGNUMERIC values. + For Scale: 0 + * 1.1, 1.2, 1.3, 1.4 => 1 + * 1.5 => 2 + * 1.6, 1.7, 1.8, 1.9 => 2 + * 2.5 => 2 + """ + + def _generate_next_value_(name, start, count, last_values): + return name + + ROUNDING_MODE_UNSPECIFIED = enum.auto() + ROUND_HALF_AWAY_FROM_ZERO = enum.auto() + ROUND_HALF_EVEN = enum.auto() diff --git a/google/cloud/bigquery/retry.py b/google/cloud/bigquery/retry.py index 10958980d..999d0e851 100644 --- a/google/cloud/bigquery/retry.py +++ b/google/cloud/bigquery/retry.py @@ -82,6 +82,32 @@ def _should_retry(exc): pass ``retry=bigquery.DEFAULT_RETRY.with_deadline(30)``. """ + +def _should_retry_get_job_conflict(exc): + """Predicate for determining when to retry a jobs.get call after a conflict error. + + Sometimes we get a 404 after a Conflict. In this case, we + have pretty high confidence that by retrying the 404, we'll + (hopefully) eventually recover the job. + https://github.com/googleapis/python-bigquery/issues/2134 + + Note: we may be able to extend this to user-specified predicates + after https://github.com/googleapis/python-api-core/issues/796 + to tweak existing Retry object predicates. + """ + return isinstance(exc, exceptions.NotFound) or _should_retry(exc) + + +# Pick a deadline smaller than our other deadlines since we want to timeout +# before those expire. +_DEFAULT_GET_JOB_CONFLICT_DEADLINE = _DEFAULT_RETRY_DEADLINE / 3.0 +_DEFAULT_GET_JOB_CONFLICT_RETRY = retry.Retry( + predicate=_should_retry_get_job_conflict, + deadline=_DEFAULT_GET_JOB_CONFLICT_DEADLINE, +) +"""Private, may be removed in future.""" + + # Note: Take care when updating DEFAULT_TIMEOUT to anything but None. We # briefly had a default timeout, but even setting it at more than twice the # theoretical server-side default timeout of 2 minutes was not enough for @@ -142,6 +168,34 @@ def _job_should_retry(exc): The default job retry object. """ + +def _query_job_insert_should_retry(exc): + # Per https://github.com/googleapis/python-bigquery/issues/2134, sometimes + # we get a 404 error. In this case, if we get this far, assume that the job + # doesn't actually exist and try again. We can't add 404 to the default + # job_retry because that happens for errors like "this table does not + # exist", which probably won't resolve with a retry. + if isinstance(exc, exceptions.RetryError): + exc = exc.cause + + if isinstance(exc, exceptions.NotFound): + message = exc.message + # Don't try to retry table/dataset not found, just job not found. + # The URL contains jobs, so use whitespace to disambiguate. + return message is not None and " job" in message.lower() + + return _job_should_retry(exc) + + +_DEFAULT_QUERY_JOB_INSERT_RETRY = retry.Retry( + predicate=_query_job_insert_should_retry, + # jobs.insert doesn't wait for the job to complete, so we don't need the + # long _DEFAULT_JOB_DEADLINE for this part. + deadline=_DEFAULT_RETRY_DEADLINE, +) +"""Private, may be removed in future.""" + + DEFAULT_GET_JOB_TIMEOUT = 128 """ Default timeout for Client.get_job(). diff --git a/google/cloud/bigquery/schema.py b/google/cloud/bigquery/schema.py index 42dfbfca8..03cde830e 100644 --- a/google/cloud/bigquery/schema.py +++ b/google/cloud/bigquery/schema.py @@ -15,21 +15,21 @@ """Schemas for BigQuery tables / queries.""" from __future__ import annotations -import collections import enum import typing -from typing import Any, cast, Dict, Iterable, Optional, Union +from typing import Any, cast, Dict, Iterable, Optional, Union, Sequence from google.cloud.bigquery import _helpers from google.cloud.bigquery import standard_sql +from google.cloud.bigquery import enums from google.cloud.bigquery.enums import StandardSqlTypeNames _STRUCT_TYPES = ("RECORD", "STRUCT") # SQL types reference: -# https://cloud.google.com/bigquery/data-types#legacy_sql_data_types -# https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types +# LEGACY SQL: https://cloud.google.com/bigquery/data-types#legacy_sql_data_types +# GoogleSQL: https://cloud.google.com/bigquery/docs/reference/standard-sql/data-types LEGACY_TO_STANDARD_TYPES = { "STRING": StandardSqlTypeNames.STRING, "BYTES": StandardSqlTypeNames.BYTES, @@ -48,6 +48,7 @@ "DATE": StandardSqlTypeNames.DATE, "TIME": StandardSqlTypeNames.TIME, "DATETIME": StandardSqlTypeNames.DATETIME, + "FOREIGN": StandardSqlTypeNames.FOREIGN, # no direct conversion from ARRAY, the latter is represented by mode="REPEATED" } """String names of the legacy SQL types to integer codes of Standard SQL standard_sql.""" @@ -166,6 +167,35 @@ class SchemaField(object): the type is RANGE, this field is required. Possible values for the field element type of a RANGE include `DATE`, `DATETIME` and `TIMESTAMP`. + + rounding_mode: Union[enums.RoundingMode, str, None] + Specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + + Unspecified will default to using ROUND_HALF_AWAY_FROM_ZERO. + ROUND_HALF_AWAY_FROM_ZERO rounds half values away from zero + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5, 1.6, 1.7, 1.8, 1.9 => 2 + + ROUND_HALF_EVEN rounds half values to the nearest even value + when applying precision and scale upon writing of NUMERIC and BIGNUMERIC + values. + + For Scale: 0 + 1.1, 1.2, 1.3, 1.4 => 1 + 1.5 => 2 + 1.6, 1.7, 1.8, 1.9 => 2 + 2.5 => 2 + + foreign_type_definition: Optional[str] + Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. """ def __init__( @@ -181,11 +211,14 @@ def __init__( scale: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, max_length: Union[int, _DefaultSentinel] = _DEFAULT_VALUE, range_element_type: Union[FieldElementType, str, None] = None, + rounding_mode: Union[enums.RoundingMode, str, None] = None, + foreign_type_definition: Optional[str] = None, ): self._properties: Dict[str, Any] = { "name": name, "type": field_type, } + self._properties["name"] = name if mode is not None: self._properties["mode"] = mode.upper() if description is not _DEFAULT_VALUE: @@ -206,6 +239,11 @@ def __init__( self._properties["rangeElementType"] = {"type": range_element_type} if isinstance(range_element_type, FieldElementType): self._properties["rangeElementType"] = range_element_type.to_api_repr() + if rounding_mode is not None: + self._properties["roundingMode"] = rounding_mode + if foreign_type_definition is not None: + self._properties["foreignTypeDefinition"] = foreign_type_definition + if fields: # Don't set the property if it's not set. self._properties["fields"] = [field.to_api_repr() for field in fields] @@ -304,6 +342,22 @@ def range_element_type(self): ret = self._properties.get("rangeElementType") return FieldElementType.from_api_repr(ret) + @property + def rounding_mode(self): + """Enum that specifies the rounding mode to be used when storing values of + NUMERIC and BIGNUMERIC type. + """ + return self._properties.get("roundingMode") + + @property + def foreign_type_definition(self): + """Definition of the foreign data type. + + Only valid for top-level schema fields (not nested fields). + If the type is FOREIGN, this field is required. + """ + return self._properties.get("foreignTypeDefinition") + @property def fields(self): """Optional[tuple]: Subfields contained in this field. @@ -434,6 +488,8 @@ def _parse_schema_resource(info): Optional[Sequence[google.cloud.bigquery.schema.SchemaField`]: A list of parsed fields, or ``None`` if no "fields" key found. """ + if isinstance(info, list): + return [SchemaField.from_api_repr(f) for f in info] return [SchemaField.from_api_repr(f) for f in info.get("fields", ())] @@ -446,40 +502,46 @@ def _build_schema_resource(fields): Returns: Sequence[Dict]: Mappings describing the schema of the supplied fields. """ - return [field.to_api_repr() for field in fields] + if isinstance(fields, Sequence): + # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields + return [field.to_api_repr() for field in fields] + + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") def _to_schema_fields(schema): - """Coerce `schema` to a list of schema field instances. + """Coerces schema to a list of SchemaField instances while + preserving the original structure as much as possible. Args: - schema(Sequence[Union[ \ - :class:`~google.cloud.bigquery.schema.SchemaField`, \ - Mapping[str, Any] \ - ]]): - Table schema to convert. If some items are passed as mappings, - their content must be compatible with - :meth:`~google.cloud.bigquery.schema.SchemaField.from_api_repr`. + schema (Sequence[Union[ \ + :class:`~google.cloud.bigquery.schema.SchemaField`, \ + Mapping[str, Any] \ + ] + ] + ):: + Table schema to convert. Can be a list of SchemaField + objects or mappings. Returns: - Sequence[:class:`~google.cloud.bigquery.schema.SchemaField`] + A list of SchemaField objects. Raises: - Exception: If ``schema`` is not a sequence, or if any item in the - sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` - instance or a compatible mapping representation of the field. + TypeError: If schema is not a Sequence. """ - for field in schema: - if not isinstance(field, (SchemaField, collections.abc.Mapping)): - raise ValueError( - "Schema items must either be fields or compatible " - "mapping representations." - ) - return [ - field if isinstance(field, SchemaField) else SchemaField.from_api_repr(field) - for field in schema - ] + if isinstance(schema, Sequence): + # Input is a Sequence (e.g. a list): Process and return a list of SchemaFields + return [ + field + if isinstance(field, SchemaField) + else SchemaField.from_api_repr(field) + for field in schema + ] + + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") class PolicyTagList(object): diff --git a/google/cloud/bigquery/table.py b/google/cloud/bigquery/table.py index fa8d81962..c70a0ebea 100644 --- a/google/cloud/bigquery/table.py +++ b/google/cloud/bigquery/table.py @@ -21,7 +21,8 @@ import functools import operator import typing -from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union +from typing import Any, Dict, Iterable, Iterator, List, Optional, Tuple, Union, Sequence + import warnings try: @@ -66,6 +67,7 @@ from google.cloud.bigquery.encryption_configuration import EncryptionConfiguration from google.cloud.bigquery.enums import DefaultPandasDTypes from google.cloud.bigquery.external_config import ExternalConfig +from google.cloud.bigquery import schema as _schema from google.cloud.bigquery.schema import _build_schema_resource from google.cloud.bigquery.schema import _parse_schema_resource from google.cloud.bigquery.schema import _to_schema_fields @@ -398,7 +400,7 @@ class Table(_TableBase): "partitioning_type": "timePartitioning", "range_partitioning": "rangePartitioning", "time_partitioning": "timePartitioning", - "schema": "schema", + "schema": ["schema", "fields"], "snapshot_definition": "snapshotDefinition", "clone_definition": "cloneDefinition", "streaming_buffer": "streamingBuffer", @@ -409,7 +411,9 @@ class Table(_TableBase): "require_partition_filter": "requirePartitionFilter", "table_constraints": "tableConstraints", "max_staleness": "maxStaleness", + "resource_tags": "resourceTags", "external_catalog_table_options": "externalCatalogTableOptions", + "foreign_type_info": ["schema", "foreignTypeInfo"], } def __init__(self, table_ref, schema=None) -> None: @@ -450,8 +454,20 @@ def schema(self): If ``schema`` is not a sequence, or if any item in the sequence is not a :class:`~google.cloud.bigquery.schema.SchemaField` instance or a compatible mapping representation of the field. + + .. Note:: + If you are referencing a schema for an external catalog table such + as a Hive table, it will also be necessary to populate the foreign_type_info + attribute. This is not necessary if defining the schema for a BigQuery table. + + For details, see: + https://cloud.google.com/bigquery/docs/external-tables + https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets + """ - prop = self._properties.get(self._PROPERTY_TO_API_FIELD["schema"]) + prop = _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["schema"] + ) if not prop: return [] else: @@ -462,10 +478,21 @@ def schema(self, value): api_field = self._PROPERTY_TO_API_FIELD["schema"] if value is None: - self._properties[api_field] = None - else: + _helpers._set_sub_prop( + self._properties, + api_field, + None, + ) + elif isinstance(value, Sequence): value = _to_schema_fields(value) - self._properties[api_field] = {"fields": _build_schema_resource(value)} + value = _build_schema_resource(value) + _helpers._set_sub_prop( + self._properties, + api_field, + value, + ) + else: + raise TypeError("Schema must be a Sequence (e.g. a list) or None.") @property def labels(self): @@ -1025,6 +1052,22 @@ def table_constraints(self) -> Optional["TableConstraints"]: table_constraints = TableConstraints.from_api_repr(table_constraints) return table_constraints + @property + def resource_tags(self): + """Dict[str, str]: Resource tags for the table. + + See: https://cloud.google.com/bigquery/docs/reference/rest/v2/tables#Table.FIELDS.resource_tags + """ + return self._properties.setdefault( + self._PROPERTY_TO_API_FIELD["resource_tags"], {} + ) + + @resource_tags.setter + def resource_tags(self, value): + if not isinstance(value, dict) and value is not None: + raise ValueError("resource_tags must be a dict or None") + self._properties[self._PROPERTY_TO_API_FIELD["resource_tags"]] = value + @property def external_catalog_table_options( self, @@ -1058,6 +1101,43 @@ def external_catalog_table_options( self._PROPERTY_TO_API_FIELD["external_catalog_table_options"] ] = value + @property + def foreign_type_info(self) -> Optional[_schema.ForeignTypeInfo]: + """Optional. Specifies metadata of the foreign data type definition in + field schema (TableFieldSchema.foreign_type_definition). + + Returns: + Optional[schema.ForeignTypeInfo]: + Foreign type information, or :data:`None` if not set. + + .. Note:: + foreign_type_info is only required if you are referencing an + external catalog such as a Hive table. + For details, see: + https://cloud.google.com/bigquery/docs/external-tables + https://cloud.google.com/bigquery/docs/datasets-intro#external_datasets + """ + + prop = _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"] + ) + if prop is not None: + return _schema.ForeignTypeInfo.from_api_repr(prop) + return None + + @foreign_type_info.setter + def foreign_type_info(self, value: Union[_schema.ForeignTypeInfo, dict, None]): + value = _helpers._isinstance_or_raise( + value, + (_schema.ForeignTypeInfo, dict), + none_allowed=True, + ) + if isinstance(value, _schema.ForeignTypeInfo): + value = value.to_api_repr() + _helpers._set_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["foreign_type_info"], value + ) + @classmethod def from_string(cls, full_table_id: str) -> "Table": """Construct a table from fully-qualified table ID. diff --git a/google/cloud/bigquery/version.py b/google/cloud/bigquery/version.py index 3d852b8a3..01c4c51ca 100644 --- a/google/cloud/bigquery/version.py +++ b/google/cloud/bigquery/version.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "3.29.0" +__version__ = "3.30.0" diff --git a/noxfile.py b/noxfile.py index e08956b11..87bd9a70c 100644 --- a/noxfile.py +++ b/noxfile.py @@ -110,6 +110,14 @@ def default(session, install_extras=True): else: install_target = "." session.install("-e", install_target, "-c", constraints_path) + + # Test with some broken "extras" in case the user didn't install the extra + # directly. For example, pandas-gbq is recommended for pandas features, but + # we want to test that we fallback to the previous behavior. For context, + # see internal document go/pandas-gbq-and-bigframes-redundancy. + if session.python == UNIT_TEST_PYTHON_VERSIONS[0]: + session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y") + session.run("python", "-m", "pip", "freeze") # Run py.test against the unit tests. @@ -228,6 +236,13 @@ def system(session): extras = "[all]" session.install("-e", f".{extras}", "-c", constraints_path) + # Test with some broken "extras" in case the user didn't install the extra + # directly. For example, pandas-gbq is recommended for pandas features, but + # we want to test that we fallback to the previous behavior. For context, + # see internal document go/pandas-gbq-and-bigframes-redundancy. + if session.python == SYSTEM_TEST_PYTHON_VERSIONS[0]: + session.run("python", "-m", "pip", "uninstall", "pandas-gbq", "-y") + # print versions of all dependencies session.run("python", "-m", "pip", "freeze") diff --git a/pyproject.toml b/pyproject.toml index ecf21d922..c4e5c2f0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,6 +74,9 @@ bqstorage = [ ] pandas = [ "pandas >= 1.1.0", + "pandas-gbq >= 0.26.1; python_version >= '3.8'", + "grpcio >= 1.47.0, < 2.0dev", + "grpcio >= 1.49.1, < 2.0dev; python_version >= '3.11'", "pyarrow >= 3.0.0", "db-dtypes >= 0.3.0, < 2.0.0dev", "importlib_metadata >= 1.0.0; python_version < '3.8'", diff --git a/samples/tests/test_download_public_data.py b/samples/tests/test_download_public_data.py index 02c2c6f9c..4f6c02452 100644 --- a/samples/tests/test_download_public_data.py +++ b/samples/tests/test_download_public_data.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - import pytest from .. import download_public_data @@ -21,20 +19,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data( - caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] -) -> None: - # Enable debug-level logging to verify the BigQuery Storage API is used. - caplog.set_level(logging.DEBUG) - +def test_download_public_data(capsys: pytest.CaptureFixture[str]) -> None: download_public_data.download_public_data() out, _ = capsys.readouterr() assert "year" in out assert "gender" in out assert "name" in out - - assert any( - "Started reading table 'bigquery-public-data.usa_names.usa_1910_current' with BQ Storage API session" - in message - for message in caplog.messages - ) diff --git a/samples/tests/test_download_public_data_sandbox.py b/samples/tests/test_download_public_data_sandbox.py index e86f604ad..d3dd31a38 100644 --- a/samples/tests/test_download_public_data_sandbox.py +++ b/samples/tests/test_download_public_data_sandbox.py @@ -12,8 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. -import logging - import pytest from .. import download_public_data_sandbox @@ -21,20 +19,9 @@ pytest.importorskip("google.cloud.bigquery_storage_v1") -def test_download_public_data_sandbox( - caplog: pytest.LogCaptureFixture, capsys: pytest.CaptureFixture[str] -) -> None: - # Enable debug-level logging to verify the BigQuery Storage API is used. - caplog.set_level(logging.DEBUG) - +def test_download_public_data_sandbox(capsys: pytest.CaptureFixture[str]) -> None: download_public_data_sandbox.download_public_data_sandbox() - out, err = capsys.readouterr() + out, _ = capsys.readouterr() assert "year" in out assert "gender" in out assert "name" in out - - assert any( - # An anonymous table is used because this sample reads from query results. - ("Started reading table" in message and "BQ Storage API session" in message) - for message in caplog.messages - ) diff --git a/testing/constraints-3.8.txt b/testing/constraints-3.8.txt index e5e73c5c7..9883fb8cc 100644 --- a/testing/constraints-3.8.txt +++ b/testing/constraints-3.8.txt @@ -1,2 +1,11 @@ grpcio==1.47.0 pandas==1.2.0 + +# This constraints file is used to check that lower bounds +# are correct in setup.py +# +# Pin the version to the lower bound. +# +# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev", +# Then this file should have foo==1.14.0 +pandas-gbq==0.26.1 diff --git a/tests/system/test_client.py b/tests/system/test_client.py index c0dd83b12..30e9f94a3 100644 --- a/tests/system/test_client.py +++ b/tests/system/test_client.py @@ -732,6 +732,16 @@ def test_list_tables(self): def test_update_table(self): dataset = self.temp_dataset(_make_dataset_id("update_table")) + # This creates unique tag keys for each of test runnings for different Python versions + tag_postfix = "".join(random.choices(string.ascii_letters + string.digits, k=4)) + tag_1 = f"owner_{tag_postfix}" + tag_2 = f"classification_{tag_postfix}" + tag_3 = f"env_{tag_postfix}" + + self._create_resource_tag_key_and_values(tag_1, ["Alice", "Bob"]) + self._create_resource_tag_key_and_values(tag_2, ["public"]) + self._create_resource_tag_key_and_values(tag_3, ["dev"]) + TABLE_NAME = "test_table" table_arg = Table(dataset.table(TABLE_NAME), schema=SCHEMA) self.assertFalse(_table_exists(table_arg)) @@ -744,14 +754,25 @@ def test_update_table(self): table.friendly_name = "Friendly" table.description = "Description" table.labels = {"priority": "high", "color": "blue"} + table.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "Alice", + f"{Config.CLIENT.project}/{tag_3}": "dev", + } table2 = Config.CLIENT.update_table( - table, ["friendly_name", "description", "labels"] + table, ["friendly_name", "description", "labels", "resource_tags"] ) self.assertEqual(table2.friendly_name, "Friendly") self.assertEqual(table2.description, "Description") self.assertEqual(table2.labels, {"priority": "high", "color": "blue"}) + self.assertEqual( + table2.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "Alice", + f"{Config.CLIENT.project}/{tag_3}": "dev", + }, + ) table2.description = None table2.labels = { @@ -759,9 +780,28 @@ def test_update_table(self): "shape": "circle", # add "priority": None, # delete } - table3 = Config.CLIENT.update_table(table2, ["description", "labels"]) + table2.resource_tags = { + f"{Config.CLIENT.project}/{tag_1}": "Bob", # change + f"{Config.CLIENT.project}/{tag_2}": "public", # add + f"{Config.CLIENT.project}/{tag_3}": None, # delete + } + table3 = Config.CLIENT.update_table( + table2, ["description", "labels", "resource_tags"] + ) self.assertIsNone(table3.description) self.assertEqual(table3.labels, {"color": "green", "shape": "circle"}) + self.assertEqual( + table3.resource_tags, + { + f"{Config.CLIENT.project}/{tag_1}": "Bob", + f"{Config.CLIENT.project}/{tag_2}": "public", + }, + ) + + # Delete resource tag bindings. + table3.resource_tags = None + table4 = Config.CLIENT.update_table(table3, ["resource_tags"]) + self.assertEqual(table4.resource_tags, {}) # If we try to update using table2 again, it will fail because the # previous update changed the ETag. diff --git a/tests/system/test_pandas.py b/tests/system/test_pandas.py index 85c7b79e6..a9e76d416 100644 --- a/tests/system/test_pandas.py +++ b/tests/system/test_pandas.py @@ -1259,7 +1259,7 @@ def test_upload_time_and_datetime_56(bigquery_client, dataset_id): df = pandas.DataFrame( dict( dt=[ - datetime.datetime(2020, 1, 8, 8, 0, 0), + datetime.datetime(2020, 1, 8, 8, 0, 0, tzinfo=datetime.timezone.utc), datetime.datetime( 2020, 1, diff --git a/tests/unit/job/test_load.py b/tests/unit/job/test_load.py index 0fb044696..10df46fb3 100644 --- a/tests/unit/job/test_load.py +++ b/tests/unit/job/test_load.py @@ -272,7 +272,7 @@ def test_schema_setter_invalid_field(self): config = LoadJobConfig() full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): config.schema = [full_name, object()] def test_schema_setter(self): diff --git a/tests/unit/test__pandas_helpers.py b/tests/unit/test__pandas_helpers.py index 3a5fddacc..fdd232a5c 100644 --- a/tests/unit/test__pandas_helpers.py +++ b/tests/unit/test__pandas_helpers.py @@ -16,6 +16,7 @@ import datetime import decimal import functools +import gc import operator import queue from typing import Union @@ -34,6 +35,11 @@ except ImportError: pandas = None +try: + import pandas_gbq.schema.pandas_to_bigquery +except ImportError: + pandas_gbq = None + try: import geopandas except ImportError: @@ -1280,7 +1286,21 @@ def test_dataframe_to_parquet_compression_method(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_named_index(module_under_test): +@pytest.mark.skipif(pandas_gbq is None, reason="Requires `pandas-gbq`") +def test_dataframe_to_bq_schema_returns_schema_with_pandas_gbq( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame({"field00": ["foo", "bar"]}) + got = module_under_test.dataframe_to_bq_schema(dataframe, []) + # Don't assert beyond this, since pandas-gbq is now source of truth. + assert got is not None + + +@pytest.mark.skipif(pandas is None, reason="Requires `pandas`") +def test_dataframe_to_bq_schema_w_named_index(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1291,7 +1311,8 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): index = pandas.Index(["a", "b"], name="str_index") dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1303,7 +1324,9 @@ def test_dataframe_to_bq_schema_w_named_index(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_multiindex(module_under_test): +def test_dataframe_to_bq_schema_w_multiindex(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1320,7 +1343,8 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): ) dataframe = pandas.DataFrame(df_data, index=index) - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, []) expected_schema = ( schema.SchemaField("str_index", "STRING", "NULLABLE"), @@ -1334,7 +1358,9 @@ def test_dataframe_to_bq_schema_w_multiindex(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): +def test_dataframe_to_bq_schema_w_bq_schema(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df_data = collections.OrderedDict( [ ("str_column", ["hello", "world"]), @@ -1349,7 +1375,10 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): {"name": "bool_column", "type": "BOOL", "mode": "REQUIRED"}, ] - returned_schema = module_under_test.dataframe_to_bq_schema(dataframe, dict_schema) + with pytest.warns(FutureWarning, match="pandas-gbq"): + returned_schema = module_under_test.dataframe_to_bq_schema( + dataframe, dict_schema + ) expected_schema = ( schema.SchemaField("str_column", "STRING", "NULLABLE"), @@ -1360,7 +1389,11 @@ def test_dataframe_to_bq_schema_w_bq_schema(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") -def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "execution_date": datetime.date(2019, 5, 10)}, @@ -1388,7 +1421,11 @@ def test_dataframe_to_bq_schema_fallback_needed_wo_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): +def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow( + module_under_test, monkeypatch +): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"id": 10, "status": "FOO", "created_at": datetime.date(2019, 5, 10)}, @@ -1418,7 +1455,9 @@ def test_dataframe_to_bq_schema_fallback_needed_w_pyarrow(module_under_test): @pytest.mark.skipif(pandas is None, reason="Requires `pandas`") @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") -def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): +def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test, monkeypatch): + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + dataframe = pandas.DataFrame( data=[ {"struct_field": {"one": 2}, "status": "FOO"}, @@ -1442,9 +1481,11 @@ def test_dataframe_to_bq_schema_pyarrow_fallback_fails(module_under_test): @pytest.mark.skipif(geopandas is None, reason="Requires `geopandas`") -def test_dataframe_to_bq_schema_geography(module_under_test): +def test_dataframe_to_bq_schema_geography(module_under_test, monkeypatch): from shapely import wkt + monkeypatch.setattr(module_under_test, "pandas_gbq", None) + df = geopandas.GeoDataFrame( pandas.DataFrame( dict( @@ -1455,7 +1496,10 @@ def test_dataframe_to_bq_schema_geography(module_under_test): ), geometry="geo1", ) - bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + + with pytest.warns(FutureWarning, match="pandas-gbq"): + bq_schema = module_under_test.dataframe_to_bq_schema(df, []) + assert bq_schema == ( schema.SchemaField("name", "STRING"), schema.SchemaField("geo1", "GEOGRAPHY"), @@ -1846,6 +1890,98 @@ def fake_download_stream( assert queue_used.maxsize == expected_maxsize +@pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") +def test__download_table_bqstorage_shuts_down_workers( + monkeypatch, + module_under_test, +): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2032 + + Make sure that when the top-level iterator goes out of scope (is deleted), + the child threads are also stopped. + """ + from google.cloud.bigquery import dataset + from google.cloud.bigquery import table + import google.cloud.bigquery_storage_v1.reader + import google.cloud.bigquery_storage_v1.types + + monkeypatch.setattr( + _versions_helpers.BQ_STORAGE_VERSIONS, "_installed_version", None + ) + monkeypatch.setattr(bigquery_storage, "__version__", "2.5.0") + + # Create a fake stream with a decent number of rows. + arrow_schema = pyarrow.schema( + [ + ("int_col", pyarrow.int64()), + ("str_col", pyarrow.string()), + ] + ) + arrow_rows = pyarrow.record_batch( + [ + pyarrow.array([0, 1, 2], type=pyarrow.int64()), + pyarrow.array(["a", "b", "c"], type=pyarrow.string()), + ], + schema=arrow_schema, + ) + session = google.cloud.bigquery_storage_v1.types.ReadSession() + session.data_format = "ARROW" + session.arrow_schema = {"serialized_schema": arrow_schema.serialize().to_pybytes()} + session.streams = [ + google.cloud.bigquery_storage_v1.types.ReadStream(name=name) + for name in ("stream/s0", "stream/s1", "stream/s2") + ] + bqstorage_client = mock.create_autospec( + bigquery_storage.BigQueryReadClient, instance=True + ) + reader = mock.create_autospec( + google.cloud.bigquery_storage_v1.reader.ReadRowsStream, instance=True + ) + reader.__iter__.return_value = [ + google.cloud.bigquery_storage_v1.types.ReadRowsResponse( + arrow_schema={"serialized_schema": arrow_schema.serialize().to_pybytes()}, + arrow_record_batch={ + "serialized_record_batch": arrow_rows.serialize().to_pybytes() + }, + ) + for _ in range(100) + ] + reader.rows.return_value = google.cloud.bigquery_storage_v1.reader.ReadRowsIterable( + reader, read_session=session + ) + bqstorage_client.read_rows.return_value = reader + bqstorage_client.create_read_session.return_value = session + table_ref = table.TableReference( + dataset.DatasetReference("project-x", "dataset-y"), + "table-z", + ) + download_state = module_under_test._DownloadState() + assert download_state.started_workers == 0 + assert download_state.finished_workers == 0 + + result_gen = module_under_test._download_table_bqstorage( + "some-project", + table_ref, + bqstorage_client, + max_queue_size=1, + page_to_item=module_under_test._bqstorage_page_to_arrow, + download_state=download_state, + ) + + result_gen_iter = iter(result_gen) + next(result_gen_iter) + assert download_state.started_workers == 3 + assert download_state.finished_workers == 0 + + # Stop iteration early and simulate the variables going out of scope + # to be doubly sure that the worker threads are supposed to be cleaned up. + del result_gen, result_gen_iter + gc.collect() + + assert download_state.started_workers == 3 + assert download_state.finished_workers == 3 + + @pytest.mark.skipif(isinstance(pyarrow, mock.Mock), reason="Requires `pyarrow`") def test_download_arrow_row_iterator_unknown_field_type(module_under_test): fake_page = api_core.page_iterator.Page( diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py index 14089b031..4f13d6ecc 100644 --- a/tests/unit/test_client.py +++ b/tests/unit/test_client.py @@ -28,9 +28,12 @@ from unittest import mock import warnings -import requests +import freezegun import packaging import pytest +import requests + +import google.api try: @@ -55,6 +58,8 @@ import google.cloud._helpers from google.cloud import bigquery +from google.cloud.bigquery import job as bqjob +import google.cloud.bigquery._job_helpers from google.cloud.bigquery.dataset import DatasetReference from google.cloud.bigquery import exceptions from google.cloud.bigquery import ParquetOptions @@ -2051,7 +2056,7 @@ def test_update_dataset(self): ds.labels = LABELS ds.access_entries = [AccessEntry("OWNER", "userByEmail", "phred@example.com")] ds.resource_tags = RESOURCE_TAGS - fields = [ + filter_fields = [ "description", "friendly_name", "location", @@ -2065,12 +2070,12 @@ def test_update_dataset(self): ) as final_attributes: ds2 = client.update_dataset( ds, - fields=fields, + fields=filter_fields, timeout=7.5, ) final_attributes.assert_called_once_with( - {"path": "/%s" % PATH, "fields": fields}, client, None + {"path": "/%s" % PATH, "fields": filter_fields}, client, None ) conn.api_request.assert_called_once_with( @@ -2320,6 +2325,7 @@ def test_update_table(self): "description": description, "friendlyName": title, "labels": {"x": "y"}, + "resourceTags": {"123456789012/key": "value"}, } ) schema = [ @@ -2343,7 +2349,8 @@ def test_update_table(self): table.description = description table.friendly_name = title table.labels = {"x": "y"} - fields = ["schema", "description", "friendly_name", "labels"] + table.resource_tags = {"123456789012/key": "value"} + fields = ["schema", "description", "friendly_name", "labels", "resource_tags"] with mock.patch( "google.cloud.bigquery.opentelemetry_tracing._get_final_span_attributes" ) as final_attributes: @@ -2375,6 +2382,7 @@ def test_update_table(self): "description": description, "friendlyName": title, "labels": {"x": "y"}, + "resourceTags": {"123456789012/key": "value"}, } conn.api_request.assert_called_once_with( method="PATCH", data=sent, path="/" + path, timeout=7.5 @@ -2383,6 +2391,7 @@ def test_update_table(self): self.assertEqual(updated_table.friendly_name, table.friendly_name) self.assertEqual(updated_table.schema, table.schema) self.assertEqual(updated_table.labels, table.labels) + self.assertEqual(updated_table.resource_tags, table.resource_tags) # ETag becomes If-Match header. table._properties["etag"] = "etag" @@ -2611,7 +2620,7 @@ def test_update_table_w_schema_None(self): self.assertEqual(len(conn.api_request.call_args_list), 2) req = conn.api_request.call_args_list[1] self.assertEqual(req[1]["method"], "PATCH") - sent = {"schema": None} + sent = {"schema": {"fields": None}} self.assertEqual(req[1]["data"], sent) self.assertEqual(req[1]["path"], "/%s" % path) self.assertEqual(len(updated_table.schema), 0) @@ -5304,6 +5313,173 @@ def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails(self): with pytest.raises(DataLoss, match="we lost your job, sorry"): client.query("SELECT 1;", job_id=None) + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_fails_no_retries(self): + from google.api_core.exceptions import Conflict + from google.api_core.exceptions import DataLoss + from google.cloud.bigquery.job import QueryJob + + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + job_create_error = Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + QueryJob, "_begin", side_effect=job_create_error + ) + get_job_patcher = mock.patch.object( + client, "get_job", side_effect=DataLoss("we lost your job, sorry") + ) + + with job_begin_patcher, get_job_patcher: + # If get job request fails but supposedly there does exist a job + # with this ID already, raise the exception explaining why we + # couldn't recover the job. + with pytest.raises(DataLoss, match="we lost your job, sorry"): + client.query( + "SELECT 1;", + job_id=None, + # Explicitly test with no retries to make sure those branches are covered. + retry=None, + job_retry=None, + ) + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404(self): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404, but we know + because of the conflict that really the job does exist. Retry until we + get the job status (or timeout). + """ + job_id = "abc123" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + conn = client._connection = make_connection( + # We're mocking QueryJob._begin, so this is only going to be + # jobs.get requests and responses. + google.api_core.exceptions.TooManyRequests("this is retriable by default"), + google.api_core.exceptions.NotFound("we lost your job"), + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": self.PROJECT, + "location": "TESTLOC", + "jobId": job_id, + } + }, + ) + + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + return_value=job_id, + ) + + with job_begin_patcher, job_id_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path] + * 4, + ) + assert result.job_id == job_id + + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_retries_404_and_query_job_insert( + self, + ): + """Regression test for https://github.com/googleapis/python-bigquery/issues/2134 + + Sometimes after a Conflict, the fetch fails with a 404. If it keeps + failing with a 404, assume that the job actually doesn't exist. + """ + job_id_1 = "abc123" + job_id_2 = "xyz789" + creds = _make_credentials() + http = object() + client = self._make_one(project=self.PROJECT, credentials=creds, _http=http) + + # We're mocking QueryJob._begin, so that the connection should only get + # jobs.get requests. + job_create_error = google.api_core.exceptions.Conflict("Job already exists.") + job_begin_patcher = mock.patch.object( + bqjob.QueryJob, "_begin", side_effect=job_create_error + ) + conn = client._connection = make_connection( + google.api_core.exceptions.NotFound("we lost your job again, sorry"), + { + "jobReference": { + "projectId": self.PROJECT, + "location": "TESTLOC", + "jobId": job_id_2, + } + }, + ) + + # Choose a small deadline so the 404 retries give up. + retry = ( + google.cloud.bigquery.retry._DEFAULT_GET_JOB_CONFLICT_RETRY.with_deadline(1) + ) + job_id_patcher = mock.patch.object( + google.cloud.bigquery._job_helpers, + "make_job_id", + side_effect=[job_id_1, job_id_2], + ) + retry_patcher = mock.patch.object( + google.cloud.bigquery.retry, + "_DEFAULT_GET_JOB_CONFLICT_RETRY", + retry, + ) + + with freezegun.freeze_time( + "2025-01-01 00:00:00", + # 10x the retry deadline to guarantee a timeout. + auto_tick_seconds=10, + ), job_begin_patcher, job_id_patcher, retry_patcher: + # If get job request fails there does exist a job + # with this ID already, retry 404 until we get it (or fails for a + # non-retriable reason, see other tests). + result = client.query("SELECT 1;", job_id=None) + + jobs_get_path_1 = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id_1}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + jobs_get_path_2 = mock.call( + method="GET", + path=f"/projects/{self.PROJECT}/jobs/{job_id_2}", + query_params={ + "projection": "full", + }, + timeout=google.cloud.bigquery.retry.DEFAULT_GET_JOB_TIMEOUT, + ) + conn.api_request.assert_has_calls( + # Double-check that it was jobs.get that was called for each of our + # mocked responses. + [jobs_get_path_1, jobs_get_path_2], + ) + assert result.job_id == job_id_2 + def test_query_job_rpc_fail_w_conflict_random_id_job_fetch_succeeds(self): from google.api_core.exceptions import Conflict from google.cloud.bigquery.job import QueryJob @@ -8387,8 +8563,12 @@ def test_load_table_from_dataframe_w_automatic_schema_detection_fails(self): autospec=True, side_effect=google.api_core.exceptions.NotFound("Table not found"), ) + pandas_gbq_patch = mock.patch( + "google.cloud.bigquery._pandas_helpers.pandas_gbq", + new=None, + ) - with load_patch as load_table_from_file, get_table_patch: + with load_patch as load_table_from_file, get_table_patch, pandas_gbq_patch: with warnings.catch_warnings(record=True) as warned: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8444,7 +8624,6 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8456,6 +8635,7 @@ def test_load_table_from_dataframe_w_index_and_auto_schema(self): ] ), ) + with load_patch as load_table_from_file, get_table_patch: client.load_table_from_dataframe( dataframe, self.TABLE_REF, location=self.LOCATION @@ -8576,10 +8756,10 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se client = self._make_client() dataframe = pandas.DataFrame({"x": [1, 2, None, 4]}, dtype="Int64") + load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True ) - get_table_patch = mock.patch( "google.cloud.bigquery.client.Client.get_table", autospec=True, @@ -8608,8 +8788,11 @@ def test_load_table_from_dataframe_w_nullable_int64_datatype_automatic_schema(se sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert tuple(sent_config.schema) == ( - SchemaField("x", "INT64", "NULLABLE", None), + assert ( + # Accept either the GoogleSQL or legacy SQL type name from pandas-gbq. + tuple(sent_config.schema) == (SchemaField("x", "INT64", "NULLABLE", None),) + or tuple(sent_config.schema) + == (SchemaField("x", "INTEGER", "NULLABLE", None),) ) def test_load_table_from_dataframe_struct_fields(self): @@ -8755,7 +8938,7 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): data=records, columns=["float_column", "array_column"] ) - expected_schema = [ + expected_schema_googlesql = [ SchemaField("float_column", "FLOAT"), SchemaField( "array_column", @@ -8763,6 +8946,14 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): mode="REPEATED", ), ] + expected_schema_legacy_sql = [ + SchemaField("float_column", "FLOAT"), + SchemaField( + "array_column", + "INTEGER", + mode="REPEATED", + ), + ] load_patch = mock.patch( "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True @@ -8798,7 +8989,10 @@ def test_load_table_from_dataframe_array_fields_w_auto_schema(self): sent_config = load_table_from_file.mock_calls[0][2]["job_config"] assert sent_config.source_format == job.SourceFormat.PARQUET - assert sent_config.schema == expected_schema + assert ( + sent_config.schema == expected_schema_googlesql + or sent_config.schema == expected_schema_legacy_sql + ) def test_load_table_from_dataframe_w_partial_schema(self): pandas = pytest.importorskip("pandas") @@ -8918,7 +9112,6 @@ def test_load_table_from_dataframe_w_partial_schema_extra_types(self): load_table_from_file.assert_not_called() message = str(exc_context.value) - assert "bq_schema contains fields not present in dataframe" in message assert "unknown_col" in message def test_load_table_from_dataframe_w_schema_arrow_custom_compression(self): diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py index efbc5d26f..3f2304a70 100644 --- a/tests/unit/test_schema.py +++ b/tests/unit/test_schema.py @@ -19,6 +19,7 @@ import pytest from google.cloud import bigquery +from google.cloud.bigquery import enums from google.cloud.bigquery.standard_sql import StandardSqlStructType from google.cloud.bigquery import schema from google.cloud.bigquery.schema import PolicyTagList @@ -49,6 +50,8 @@ def test_constructor_defaults(self): self.assertEqual(field.fields, ()) self.assertIsNone(field.policy_tags) self.assertIsNone(field.default_value_expression) + self.assertEqual(field.rounding_mode, None) + self.assertEqual(field.foreign_type_definition, None) def test_constructor_explicit(self): FIELD_DEFAULT_VALUE_EXPRESSION = "This is the default value for this field" @@ -64,6 +67,8 @@ def test_constructor_explicit(self): ) ), default_value_expression=FIELD_DEFAULT_VALUE_EXPRESSION, + rounding_mode=enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED, + foreign_type_definition="INTEGER", ) self.assertEqual(field.name, "test") self.assertEqual(field.field_type, "STRING") @@ -80,6 +85,8 @@ def test_constructor_explicit(self): ) ), ) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") + self.assertEqual(field.foreign_type_definition, "INTEGER") def test_constructor_explicit_none(self): field = self._make_one("test", "STRING", description=None, policy_tags=None) @@ -137,8 +144,16 @@ def test_to_api_repr(self): {"names": ["foo", "bar"]}, ) + ROUNDINGMODE = enums.RoundingMode.ROUNDING_MODE_UNSPECIFIED + field = self._make_one( - "foo", "INTEGER", "NULLABLE", description="hello world", policy_tags=policy + "foo", + "INTEGER", + "NULLABLE", + description="hello world", + policy_tags=policy, + rounding_mode=ROUNDINGMODE, + foreign_type_definition=None, ) self.assertEqual( field.to_api_repr(), @@ -148,6 +163,7 @@ def test_to_api_repr(self): "type": "INTEGER", "description": "hello world", "policyTags": {"names": ["foo", "bar"]}, + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", }, ) @@ -181,6 +197,7 @@ def test_from_api_repr(self): "description": "test_description", "name": "foo", "type": "record", + "roundingMode": "ROUNDING_MODE_UNSPECIFIED", } ) self.assertEqual(field.name, "foo") @@ -192,6 +209,7 @@ def test_from_api_repr(self): self.assertEqual(field.fields[0].field_type, "INTEGER") self.assertEqual(field.fields[0].mode, "NULLABLE") self.assertEqual(field.range_element_type, None) + self.assertEqual(field.rounding_mode, "ROUNDING_MODE_UNSPECIFIED") def test_from_api_repr_policy(self): field = self._get_target_class().from_api_repr( @@ -283,6 +301,28 @@ def test_fields_property(self): schema_field = self._make_one("boat", "RECORD", fields=fields) self.assertEqual(schema_field.fields, fields) + def test_roundingmode_property_str(self): + ROUNDINGMODE = "ROUND_HALF_AWAY_FROM_ZERO" + schema_field = self._make_one("test", "STRING", rounding_mode=ROUNDINGMODE) + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + + del schema_field + schema_field = self._make_one("test", "STRING") + schema_field._properties["roundingMode"] = ROUNDINGMODE + self.assertEqual(schema_field.rounding_mode, ROUNDINGMODE) + + def test_foreign_type_definition_property_str(self): + FOREIGN_TYPE_DEFINITION = "INTEGER" + schema_field = self._make_one( + "test", "STRING", foreign_type_definition=FOREIGN_TYPE_DEFINITION + ) + self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + + del schema_field + schema_field = self._make_one("test", "STRING") + schema_field._properties["foreignTypeDefinition"] = FOREIGN_TYPE_DEFINITION + self.assertEqual(schema_field.foreign_type_definition, FOREIGN_TYPE_DEFINITION) + def test_to_standard_sql_simple_type(self): examples = ( # a few legacy types @@ -457,6 +497,20 @@ def test_to_standard_sql_unknown_type(self): bigquery.StandardSqlTypeNames.TYPE_KIND_UNSPECIFIED, ) + def test_to_standard_sql_foreign_type_valid(self): + legacy_type = "FOREIGN" + standard_type = bigquery.StandardSqlTypeNames.FOREIGN + foreign_type_definition = "INTEGER" + + field = self._make_one( + "some_field", + field_type=legacy_type, + foreign_type_definition=foreign_type_definition, + ) + standard_field = field.to_standard_sql() + self.assertEqual(standard_field.name, "some_field") + self.assertEqual(standard_field.type.type_kind, standard_type) + def test___eq___wrong_type(self): field = self._make_one("test", "STRING") other = object() @@ -711,27 +765,62 @@ def test__parse_schema_resource_fields_without_mode(self): self._verifySchema(schema, RESOURCE) -class Test_build_schema_resource(unittest.TestCase, _SchemaBase): +class Test_build_schema_resource: + """Tests for the _build_schema_resource function.""" + def _call_fut(self, resource): - from google.cloud.bigquery.schema import _build_schema_resource + return schema._build_schema_resource(resource) + + FULL_NAME = schema.SchemaField( + name="full_name", field_type="STRING", mode="REQUIRED" + ) + AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED") + LIST_RESOURCE = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED") + FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} + + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param([], [], id="empty list"), + pytest.param([FULL_NAME, AGE], LIST_RESOURCE, id="list"), + ], + ) + def test_ctor_valid_input(self, schema, expected): + result = self._call_fut(schema) + + assert result == expected - return _build_schema_resource(resource) + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param(123, TypeError, id="invalid type"), + ], + ) + def test_ctor_invalid_input(self, schema, expected): + with pytest.raises(TypeError) as e: + self._call_fut(schema) + + # Looking for the first phrase from the string "Schema must be a ..." + assert "Schema must be a " in str(e.value) def test_defaults(self): from google.cloud.bigquery.schema import SchemaField full_name = SchemaField("full_name", "STRING", mode="REQUIRED") age = SchemaField("age", "INTEGER", mode="REQUIRED") + # test with simple list resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + } + assert resource[1] == {"name": "age", "type": "INTEGER", "mode": "REQUIRED"} def test_w_description(self): from google.cloud.bigquery.schema import SchemaField @@ -748,25 +837,20 @@ def test_w_description(self): description=None, ) resource = self._call_fut([full_name, age]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - { - "name": "full_name", - "type": "STRING", - "mode": "REQUIRED", - "description": DESCRIPTION, - }, - ) - self.assertEqual( - resource[1], - { - "name": "age", - "type": "INTEGER", - "mode": "REQUIRED", - "description": None, - }, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + "description": DESCRIPTION, + } + + assert resource[1] == { + "name": "age", + "type": "INTEGER", + "mode": "REQUIRED", + "description": None, + } def test_w_subfields(self): from google.cloud.bigquery.schema import SchemaField @@ -778,49 +862,72 @@ def test_w_subfields(self): "phone", "RECORD", mode="REPEATED", fields=[ph_type, ph_num] ) resource = self._call_fut([full_name, phone]) - self.assertEqual(len(resource), 2) - self.assertEqual( - resource[0], - {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, - ) - self.assertEqual( - resource[1], - { - "name": "phone", - "type": "RECORD", - "mode": "REPEATED", - "fields": [ - {"name": "type", "type": "STRING", "mode": "REQUIRED"}, - {"name": "number", "type": "STRING", "mode": "REQUIRED"}, - ], - }, - ) + assert len(resource) == 2 + assert resource[0] == { + "name": "full_name", + "type": "STRING", + "mode": "REQUIRED", + } + assert resource[1] == { + "name": "phone", + "type": "RECORD", + "mode": "REPEATED", + "fields": [ + {"name": "type", "type": "STRING", "mode": "REQUIRED"}, + {"name": "number", "type": "STRING", "mode": "REQUIRED"}, + ], + } -class Test_to_schema_fields(unittest.TestCase): +class Test_to_schema_fields: + """Tests for the _to_schema_fields function.""" + @staticmethod def _call_fut(schema): from google.cloud.bigquery.schema import _to_schema_fields return _to_schema_fields(schema) - def test_invalid_type(self): - schema = [ - ("full_name", "STRING", "REQUIRED"), - ("address", "STRING", "REQUIRED"), - ] - with self.assertRaises(ValueError): - self._call_fut(schema) - - def test_schema_fields_sequence(self): - from google.cloud.bigquery.schema import SchemaField + FULL_NAME = schema.SchemaField( + name="full_name", field_type="STRING", mode="REQUIRED" + ) + AGE = schema.SchemaField(name="age", field_type="INTEGER", mode="REQUIRED") + LIST_RESOURCE = [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ] + FOREIGN_TYPE_INFO = schema.ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED") + FOREIGN_TYPE_INFO_RESOURCE = {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"} - schema = [ - SchemaField("full_name", "STRING", mode="REQUIRED"), - SchemaField("age", "INT64", mode="NULLABLE"), - ] + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param([], [], id="empty list"), + pytest.param((), [], id="empty tuple"), + pytest.param(LIST_RESOURCE, [FULL_NAME, AGE], id="list"), + ], + ) + def test_ctor_valid_input(self, schema, expected): result = self._call_fut(schema) - self.assertEqual(result, schema) + + assert result == expected + + @pytest.mark.parametrize( + "schema,expected", + [ + pytest.param(123, TypeError, id="invalid schema type"), + pytest.param([123, 123], TypeError, id="invalid SchemaField type"), + pytest.param({"fields": 123}, TypeError, id="invalid type, dict"), + pytest.param( + {"fields": 123, "foreignTypeInfo": 123}, + TypeError, + id="invalid type, dict", + ), + ], + ) + def test_ctor_invalid_input(self, schema, expected): + with pytest.raises(expected): + self._call_fut(schema) def test_unknown_properties(self): schema = [ @@ -879,7 +986,7 @@ def test_valid_mapping_representation(self): ] result = self._call_fut(schema) - self.assertEqual(result, expected_schema) + assert result == expected_schema class TestPolicyTags(unittest.TestCase): diff --git a/tests/unit/test_table.py b/tests/unit/test_table.py index de8b331f5..1a3d7ec0f 100644 --- a/tests/unit/test_table.py +++ b/tests/unit/test_table.py @@ -31,6 +31,7 @@ from google.cloud.bigquery import _versions_helpers from google.cloud.bigquery import exceptions from google.cloud.bigquery import external_config +from google.cloud.bigquery import schema from google.cloud.bigquery.table import TableReference from google.cloud.bigquery.dataset import DatasetReference @@ -699,7 +700,7 @@ def test_schema_setter_invalid_field(self): table_ref = dataset.table(self.TABLE_NAME) table = self._make_one(table_ref) full_name = SchemaField("full_name", "STRING", mode="REQUIRED") - with self.assertRaises(ValueError): + with self.assertRaises(TypeError): table.schema = [full_name, object()] def test_schema_setter_valid_fields(self): @@ -1213,6 +1214,83 @@ def test_to_api_repr_w_unsetting_expiration(self): } self.assertEqual(resource, exp_resource) + def test_to_api_repr_w_schema_and_foreign_type_info(self): + """Tests to ensure that to_api_repr works correctly with + both schema and foreign_type_info fields + """ + + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + SCHEMA = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreignTypeInfo": FOREIGNTYPEINFO, + } + + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": SCHEMA, + } + + table = self._get_target_class().from_api_repr(API_REPR) + assert table._properties == table.to_api_repr() + + # update schema (i.e. the fields), ensure foreign_type_info is unchanged + table.schema = [] + expected = { + "fields": [], + "foreignTypeInfo": {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}, + } + assert table.to_api_repr()["schema"] == expected + + # update foreign_type_info, ensure schema (i.e. the fields), is unchanged + table.foreign_type_info = {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"} + expected = { + "fields": [], + "foreignTypeInfo": {"typeSystem": "SCHEMA_SHOULD_NOT_CHANGE"}, + } + assert table.to_api_repr()["schema"] == expected + + def test_from_api_repr_w_schema_and_foreign_type_info(self): + """Tests to ensure that to_api_repr works correctly with + both schema and foreign_type_info fields + """ + + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + SCHEMA = { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreignTypeInfo": FOREIGNTYPEINFO, + } + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": SCHEMA, + } + + table = self._get_target_class().from_api_repr(API_REPR) + assert table._properties == API_REPR + def test__build_resource_w_custom_field(self): dataset = DatasetReference(self.PROJECT, self.DS_ID) table_ref = dataset.table(self.TABLE_NAME) @@ -1481,6 +1559,33 @@ def test_encryption_configuration_setter(self): table.encryption_configuration = None self.assertIsNone(table.encryption_configuration) + def test_resource_tags_getter_empty(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + self.assertEqual(table.resource_tags, {}) + + def test_resource_tags_update_in_place(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.resource_tags["123456789012/key"] = "value" + self.assertEqual(table.resource_tags, {"123456789012/key": "value"}) + + def test_resource_tags_setter(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + table.resource_tags = {"123456789012/key": "value"} + self.assertEqual(table.resource_tags, {"123456789012/key": "value"}) + + def test_resource_tags_setter_bad_value(self): + dataset = DatasetReference(self.PROJECT, self.DS_ID) + table_ref = dataset.table(self.TABLE_NAME) + table = self._make_one(table_ref) + with self.assertRaises(ValueError): + table.resource_tags = 12345 + def test___repr__(self): from google.cloud.bigquery.table import TableReference @@ -5966,6 +6071,99 @@ def test_external_catalog_table_options_from_api_repr(self): assert result == expected +class TestForeignTypeInfo: + PROJECT = "test-project" + DATASET_ID = "test_dataset" + TABLE_ID = "coffee_table" + DATASET = DatasetReference(PROJECT, DATASET_ID) + TABLEREF = DATASET.table(TABLE_ID) + FOREIGNTYPEINFO = { + "typeSystem": "TYPE_SYSTEM_UNSPECIFIED", + } + API_REPR = { + "tableReference": { + "projectId": PROJECT, + "datasetId": DATASET_ID, + "tableId": TABLE_ID, + }, + "schema": { + "fields": [ + {"name": "full_name", "type": "STRING", "mode": "REQUIRED"}, + {"name": "age", "type": "INTEGER", "mode": "REQUIRED"}, + ], + "foreign_info_type": FOREIGNTYPEINFO, + }, + } + + from google.cloud.bigquery.schema import ForeignTypeInfo + + @staticmethod + def _get_target_class(self): + from google.cloud.bigquery.table import Table + + return Table + + def _make_one(self, *args, **kw): + return self._get_target_class(self)(*args, **kw) + + def test_foreign_type_info_default_initialization(self): + table = self._make_one(self.TABLEREF) + assert table.foreign_type_info is None + + @pytest.mark.parametrize( + "foreign_type_info, expected", + [ + ( + {"typeSystem": "TYPE_SYSTEM_UNSPECIFIED"}, + "TYPE_SYSTEM_UNSPECIFIED", + ), + (None, None), + ( + ForeignTypeInfo(type_system="TYPE_SYSTEM_UNSPECIFIED"), + "TYPE_SYSTEM_UNSPECIFIED", + ), + ], + ) + def test_foreign_type_info_valid_inputs(self, foreign_type_info, expected): + table = self._make_one(self.TABLEREF) + + table.foreign_type_info = foreign_type_info + + if foreign_type_info is None: + result = table.foreign_type_info + else: + result = table.foreign_type_info.type_system + assert result == expected + + def test_foreign_type_info_invalid_inputs(self): + table = self._make_one(self.TABLEREF) + + # invalid on the whole + with pytest.raises(TypeError, match="Pass .*"): + table.foreign_type_info = 123 + + def test_foreign_type_info_to_api_repr(self): + table = self._make_one(self.TABLEREF) + + table.foreign_type_info = self.ForeignTypeInfo( + type_system="TYPE_SYSTEM_UNSPECIFIED", + ) + + result = table.to_api_repr()["schema"]["foreignTypeInfo"] + expected = self.FOREIGNTYPEINFO + assert result == expected + + def test_foreign_type_info_from_api_repr(self): + table = self._make_one(self.TABLEREF) + table.foreign_type_info = self.FOREIGNTYPEINFO + + fti = schema.ForeignTypeInfo.from_api_repr(self.FOREIGNTYPEINFO) + + result = fti.to_api_repr() + expected = self.FOREIGNTYPEINFO + assert result == expected + + @pytest.mark.parametrize( "table_path", (