Jas*_*man 3 python amazon-web-services amazon-athena
我正在尝试使用python连接到AWS Athena.我正在尝试使用pyathenajdbc来完成此任务.我遇到的问题是获得连接.当我运行下面的代码时,我收到一条错误消息,指出它无法找到AthenaDriver.(java.lang.RuntimeException:未找到类com.amazonaws.athena.jdbc.AthenaDriver).我确实从AWS下载了这个文件,我确认它正在该目录中.
from mdpbi.rsi.config import *
from mdpbi.tools.functions import mdpLog
from pkg_resources import resource_string
import argparse
import os
import pyathenajdbc
import sys
SCRIPT_NAME = "Athena_Export"
ATHENA_JDBC_CLASSPATH = "/opt/amazon/athenajdbc/AthenaJDBC41-1.0.0.jar"
EXPORT_OUTFILE = "RSI_Export.txt"
EXPORT_OUTFILE_PATH = os.path.join(WORKINGDIR, EXPORT_OUTFILE)
def get_arg_parser():
"""This function returns the argument parser object to be used with this script"""
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
return parser
def main():
args = get_arg_parser().parse_args(sys.argv[1:])
logger = mdpLog(SCRIPT_NAME, LOGDIR)
SQL = resource_string("mdpbi.rsi.athena.resources", "athena.sql")
conn = pyathenajdbc.connect(
s3_staging_dir="s3://athena",
access_key=AWS_ACCESS_KEY_ID,
secret_key=AWS_SECRET_ACCESS_KEY,
region_name="us-east-1",
log_path=LOGDIR,
driver_path=ATHENA_JDBC_CLASSPATH
)
try:
with conn.cursor() as cursor:
cursor.execute(SQL)
logger.info(cursor.description)
logger.info(cursor.fetchall())
finally:
conn.close()
return 0
if __name__ == '__main__':
rtn = main()
sys.exit(rtn)
Run Code Online (Sandbox Code Playgroud)
回溯(最近一次调用最后一次):文件"/usr/lib64/python2.7/runpy.py",第174行,在_run_module_as_main" main ",fname,loader,pkg_name)文件"/usr/lib64/python2.7/ runpy.py",第72行,在run_globals文件中的_run_code exec代码中"/ home/ec2-user/jason_testing/mdpbi/rsi/athena/main .py",第53行,在rtn = main()文件"/ home/ec2-user/jason_testing/mdpbi/rsi/athena/main .py",第39行,在主driver_path = athena_jdbc_driver_path文件中"/opt/mdpbi/Python_Envs/2.7.10/local/lib/python2.7/dist-packages/ pyathenajdbc/init .py",第65行,连接driver_path,**kwargs)文件"/opt/mdpbi/Python_Envs/2.7.10/local/lib/python2.7/dist-packages/pyathenajdbc/connection.py",第68行,在init jpype.JClass(ATHENA_DRIVER_CLASS_NAME)文件"/opt/mdpbi/Python_Envs/2.7.10/lib64/python2.7/dist-packages/jpype/_jclass.py",第55行,在JClass中引发_RUNTIMEEXCEPTION.PYEXC ("未找到类%s"%名称)
JDBC驱动程序需要Java 8.我当前正在运行Java 7.我能够在EC2实例上安装另一个版本的Java.
https://tecadmin.net/install-java-8-on-centos-rhel-and-fedora/#
我还必须在我的代码中设置java版本.通过这些更改,代码现在可以按预期运行.
from mdpbi.rsi.config import *
from mdpbi.tools.functions import mdpLog
from pkg_resources import resource_string
import argparse
import os
import pyathenajdbc
import sys
SCRIPT_NAME = "Athena_Export"
def get_arg_parser():
"""This function returns the argument parser object to be used with this script"""
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
return parser
def main():
args = get_arg_parser().parse_args(sys.argv[1:])
logger = mdpLog(SCRIPT_NAME, LOGDIR)
SQL = resource_string("mdpbi.rsi.athena.resources", "athena.sql")
os.environ["JAVA_HOME"] = "/opt/jdk1.8.0_121"
os.environ["JRE_HOME"] = "/opt/jdk1.8.0_121/jre"
os.environ["PATH"] = "/opt/jdk1.8.0_121/bin:/opt/jdk1.8.0_121/jre/bin"
conn = pyathenajdbc.connect(
s3_staging_dir="s3://mdpbi.data.rsi.out/",
access_key=AWS_ACCESS_KEY_ID,
secret_key=AWS_SECRET_ACCESS_KEY,
schema_name="rsi",
region_name="us-east-1"
)
try:
with conn.cursor() as cursor:
cursor.execute(SQL)
logger.info(cursor.description)
logger.info(cursor.fetchall())
finally:
conn.close()
return 0
if __name__ == '__main__':
rtn = main()
sys.exit(rtn)
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
5162 次 |
| 最近记录: |