将数据从谷歌表导入 MySQL 表

ITH*_*Guy 3 javascript mysql jdbc google-apps-script

使用谷歌应用程序脚本将数据从谷歌表导入 MySQL 表。我有一个非常庞大的数据集可以将谷歌工作表导入表格。但是,我遇到了超出最大执行时间的异常,是否还有其他选项可以加快执行速度。

var address = 'database_IP_address';
var rootPwd = 'root_password';
var user = 'user_name';
var userPwd = 'user_password';
var db = 'database_name';

var root = 'root';
var instanceUrl = 'jdbc:mysql://' + address;
var dbUrl = instanceUrl + '/' + db;

function googleSheetsToMySQL() {   

  var RecId;
  var Code;
  var ProductDescription;
  var Price;

  var dbconnection = Jdbc.getConnection(dbUrl, root, rootPwd);
  var statement = dbconnection.createStatement();
  var googlesheet = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('product'); 
  var data = googlesheet.getDataRange().getValues();  

  for (var i = 1; i < data.length; i++) {
  RecId = data[i][0];
  Code = data[i][1];
  ProductDescription = data[i][2];
  Price = data[i][3];

  var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";  
  statement = dbconnection.prepareCall(sql);  
  statement.setString(1, RecId);
  statement.setString(2, Code);
  statement.setString(3, ProductDescription);
  statement.setString(4, Price);
  statement.executeUpdate();  
  }

  statement.close();
  dbconnection.close();
}
Run Code Online (Sandbox Code Playgroud)

使用批处理

dbconnection.setAutoCommit(false)

for (var i = 1; i < data.length; i++) {
RecId = data[i][0];
Code = data[i][1];
ProductDescription = data[i][2];
Price = data[i][3];

var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";
statement = dbconnection.prepareCall(sql);
statement.setString(1, RecId);
statement.setString(2, Code);
statement.setString(3, ProductDescription);
statement.setString(4, Price);
statement.addBatch()
statement.executeBatch()
}

dbconnection.commit()
Run Code Online (Sandbox Code Playgroud)

Tod*_*odd 5

我怀疑您可能已经找到了问题的解决方案,但是对于所有可能像我一样偶然发现此问题的人来说,有一种简单的方法可以加快这些请求。OP快到了……

使用提供的代码:

function googleSheetsToMySQL() {

  var sheetName = 'name_of_google_sheet';

  var dbAddress = 'database_ip_address';
  var dbUser = 'database_user_name';
  var dbPassword = 'database_user_password';
  var dbName = 'database_name';
  var dbTableName = 'database_table_name';

  var dbURL = 'jdbc:mysql://' + dbAddress + '/' + dbName;

  // Regarding the statement used by the OP, you might find something like....
  //
  // "INSERT INTO " + dbTableName + " (recid, code, product_description, price) VALUES (?, ?, ?, ?);";
  //
  // to be more practical if you're trying to implement the OP's code, 
  // as you are unlikely to have a stored procedure named 'sp_googlesheetstotable', or may be more 
  // familiar with basic queries like INSERT, UPDATE, or SELECT

  var sql = "{call [dbo].[sp_googlesheetstotable](?,?,?,?)}";

  // The more records/requests you load into the statement object, the longer it will take to process,
  // which may mean you exceed the execution time before you can do any post processing.
  //
  // For example, you may want to record the last row you exported in the event the export must be halted
  // prematurely. You could create a series of Triggers to re-initiate the export, picking up right where
  // you left off.
  //
  // The other consideration is that you want your GAS memory utilization to remain as low as possible to
  // keep things running smoothly and quickly, so try to strike a balance that fits the data you're
  // working with.

  var maxRecordsPerBatch = 1000;

  var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
  var sheet = spreadsheet.getSheetByName(sheetName);

  var sheetData = sheet.getDataRange().getValues();

  var dbConnection = Jdbc.getConnection(dbURL, dbUser, dbPassword);

  // The following only needs to be set when you are changing the statement that needs to be prepared
  // or when you need to reset the variable.
  //
  // For example, if you were to switch to a different sheet which may have different values, columns,
  // structure, and/or target database table.

  var dbStatement = dbConnection.prepareCall(sql);

  var RecId;
  var Code;
  var ProductDescription;
  var Price;

  var recordCounter = 0;
  var lastRow;

  dbConnection.setAutoCommit(false);

  for (var i = 1; i < sheetData.length; i++) {

    lastRow = (i + 1 == sheetData.length ? true : false);

    RecId = sheetData[i][0];
    Code = sheetData[i][1];
    ProductDescription = sheetData[i][2];
    Price = sheetData[i][3];

    dbStatement.setString(1, RecId);
    dbStatement.setString(2, Code);
    dbStatement.setString(3, ProductDescription);
    dbStatement.setString(4, Price);

    // This command takes what has been set above and adds the request to the array that will be sent 
    // to the database for processing.

    dbStatement.addBatch();

    recordCounter += 1;

    if (recordCounter == maxRecordsPerBatch || lastRow)
    {
      try {
        dbStatement.executeBatch();
      }
      catch(e)
      {
        console.log('Attempted to update TABLE `' + dbTableName + '` in DB `' + dbName + '`, but the following error was returned: ' + e);
      }

      if (!lastRow)
      { // Reset vars
        dbStatement = dbConnection.prepareCall( sql ); // Better to reset this variable to avoid any potential "No operations allowed after statement closed" errors
        recordCounter = 0;
      }
    }
  }

  dbConnection.commit();
  dbConnection.close();
}
Run Code Online (Sandbox Code Playgroud)

OP 可能仍会遇到执行时间限制(我做了少于 10k 条记录),但您应该避免批量处理单个请求,除非您在定位问题行时遇到问题。

从这个链接

请务必记住,添加到 Statement 或 PreparedStatement 的每个更新都由数据库单独执行。这意味着,其中一些可能会在其中一个失败之前成功。所有成功的语句现在都应用于数据库,但其余的更新可能不会。这可能会导致数据库中的数据不一致。

为避免这种情况,您可以在 JDBC 事务中执行批量更新。在事务中执行时,您可以确保要么执行所有更新,要么都不执行。任何成功的更新都可以回滚,以防其中一个更新失败。

替代方案

如果时间限制是一个巨大的麻烦,您可以尝试从外部访问表格中的数据。为了后代,我已经复制了基本说明,但如果它仍然有效,请访问链接。

链接到源

  1. 更新 composer.json 以要求“google/apiclient”:“^2.0”并运行 composer update
  2. https://console.developers.google.com/apis/dashboard上创建项目。
  3. 单击启用 API 并启用 Google Sheets API
  4. 转到凭据,然后单击创建凭据,然后选择服务帐户密钥
  5. 在下拉列表中选择新建服务帐户。给账户起个名字,什么都行。
  6. 对于角色,我选择了 Project -> Service Account Actor
  7. 对于密钥类型,选择 JSON(默认)并下载文件。这个文件包含一个私钥,所以要非常小心,毕竟它是你的凭据
  8. 最后,编辑您要访问的电子表格的共享权限,并共享您可以在 JSON 文件中找到的 client_email 地址的查看(如果您只想读取文件)或编辑(如果您需要读/写)访问权限。
<?php
require __DIR__ . '/vendor/autoload.php';


/*
 * We need to get a Google_Client object first to handle auth and api calls, etc.
 */
$client = new \Google_Client();
$client->setApplicationName('My PHP App');
$client->setScopes([\Google_Service_Sheets::SPREADSHEETS]);
$client->setAccessType('offline');

/*
 * The JSON auth file can be provided to the Google Client in two ways, one is as a string which is assumed to be the
 * path to the json file. This is a nice way to keep the creds out of the environment.
 *
 * The second option is as an array. For this example I'll pull the JSON from an environment variable, decode it, and
 * pass along.
 */
$jsonAuth = getenv('JSON_AUTH');
$client->setAuthConfig(json_decode($jsonAuth, true));

/*
 * With the Google_Client we can get a Google_Service_Sheets service object to interact with sheets
 */
$sheets = new \Google_Service_Sheets($client);

/*
 * To read data from a sheet we need the spreadsheet ID and the range of data we want to retrieve.
 * Range is defined using A1 notation, see https://developers.google.com/sheets/api/guides/concepts#a1_notation
 */
$data = [];

// The first row contains the column titles, so lets start pulling data from row 2
$currentRow = 2;

// The range of A2:H will get columns A through H and all rows starting from row 2
$spreadsheetId = getenv('SPREADSHEET_ID');
$range = 'A2:H';
$rows = $sheets->spreadsheets_values->get($spreadsheetId, $range, ['majorDimension' => 'ROWS']);
if (isset($rows['values'])) {
    foreach ($rows['values'] as $row) {
        /*
         * If first column is empty, consider it an empty row and skip (this is just for example)
         */
        if (empty($row[0])) {
            break;
        }

        $data[] = [
            'col-a' => $row[0],
            'col-b' => $row[1],
            'col-c' => $row[2],
            'col-d' => $row[3],
            'col-e' => $row[4],
            'col-f' => $row[5],
            'col-g' => $row[6],
            'col-h' => $row[7],
        ];

        /*
         * Now for each row we've seen, lets update the I column with the current date
         */
        $updateRange = 'I'.$currentRow;
        $updateBody = new \Google_Service_Sheets_ValueRange([
            'range' => $updateRange,
            'majorDimension' => 'ROWS',
            'values' => ['values' => date('c')],
        ]);
        $sheets->spreadsheets_values->update(
            $spreadsheetId,
            $updateRange,
            $updateBody,
            ['valueInputOption' => 'USER_ENTERED']
        );

        $currentRow++;
    }
}

print_r($data);
/* Output:
Array
(
    [0] => Array
        (
            [col-a] => 123
            [col-b] => test
            [col-c] => user
            [col-d] => test user
            [col-e] => usertest
            [col-f] => email@domain.com
            [col-g] => yes
            [col-h] => no
        )

    [1] => Array
        (
            [col-a] => 1234
            [col-b] => another
            [col-c] => user
            [col-d] =>
            [col-e] => another
            [col-f] => another@eom.com
            [col-g] => no
            [col-h] => yes
        )

)
 */
Run Code Online (Sandbox Code Playgroud)