Sou*_*uad 4 sql json broadcastreceiver amazon-web-services amazon-athena
为了了解每个文件的所有者EC2 instance,我cloudtrail logs通过 查询存储在 S3 中的文件Athena。
我在 Athena 有一张表,其结构如下:
CREATE EXTERNAL TABLE cloudtrail_logs (
eventversion STRING,
useridentity STRUCT<
type:STRING,
principalid:STRING,
arn:STRING,
accountid:STRING,
invokedby:STRING,
accesskeyid:STRING,
userName:STRING,
sessioncontext:STRUCT<
attributes:STRUCT<
mfaauthenticated:STRING,
creationdate:STRING>,
sessionissuer:STRUCT<
type:STRING,
principalId:STRING,
arn:STRING,
accountId:STRING,
userName:STRING>>>,
eventtime STRING,
eventsource STRING,
eventname STRING,
awsregion STRING,
sourceipaddress STRING,
useragent STRING,
errorcode STRING,
errormessage STRING,
requestparameters STRING,
responseelements STRING,
additionaleventdata STRING,
requestid STRING,
eventid STRING,
resources ARRAY<STRUCT<
ARN:STRING,
accountId:STRING,
type:STRING>>,
eventtype STRING,
apiversion STRING,
readonly STRING,
recipientaccountid STRING,
serviceeventdetails STRING,
sharedeventid STRING,
vpcendpointid STRING
)
PARTITIONED BY (account string, region string, year string)
ROW FORMAT SERDE 'com.amazon.emr.hive.serde.CloudTrailSerde'
STORED AS INPUTFORMAT 'com.amazon.emr.cloudtrail.CloudTrailInputFormat'
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://<BUCKET>/AWSLogs/';
Run Code Online (Sandbox Code Playgroud)
我想找到启动 EC2 实例的用户的身份,因此我需要解析该字段responseelements 并仅获取responseelements具有特定instanceID.
该字段responseelements是这样的:
{
"requestId":"cab34472-31cc-44cd-ae32-a84077e55cb6",
"reservationId":"r-05964c8549788ac50",
"ownerId":"xxxxxxxxxx",
"groupSet":{},
"instancesSet":{
"items":[
{"instanceId":"i-043543cb4c12",
"imageId":"ami-078df974",
"instanceState":{"code":0,"name":"pending"},
"privateDnsName":"ip-444444.eu-west-1.compute.internal",
"keyName":"key-dev","amiLaunchIndex":0,"productCodes":{},
"instanceType":"t2.large",
"launchTime":1488438050000,
"placement":{"availabilityZone":"eu-west-1b","tenancy":"default"},
"monitoring":{"state":"pending"},
"subnetId":"subnet-d8fffff",
"vpcId":"vpc-444435",
"privateIpAddress":"10.0.42.49",
"stateReason":{"code":"pending","message":"pending"},
"architecture":"x86_64",
"rootDeviceType":"ebs",
"rootDeviceName":"/dev/xvda",
"blockDeviceMapping":{},
"virtualizationType":"hvm",
"hypervisor":"xen",
"clientToken":"c6e53004-c561-437d-a642-196489ff297c_subnet-fffffffff",
"groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
"sourceDestCheck":true,
"networkInterfaceSet":{
"items":[
{"networkInterfaceId":"eni-b16b66f0",
"subnetId":"subnet-dffffff",
"vpcId":"vpc-50fffff35",
"ownerId":"xxxxxxxx",
"status":"in-use",
"macAddress":"fdsfdsfsdfqdsf",
"privateIpAddress":"10.0.42.34234213",
"privateDnsName":"ip-1dddddd.eu-west-1.compute.internal",
"sourceDestCheck":true,
"groupSet":{"items":[{"groupId":"sg-64878700","groupName":"MetamSecurityGroup"}]},
"attachment":{"attachmentId":"eni-attach-45619121","deviceIndex":0,"status":"attaching","attachTime":1488438050000,"deleteOnTermination":true},
"privateIpAddressesSet":{"item":[{"privateIpAddress":"10ffffff","privateDnsName":"ip-ffffff.eu-west-1.compute.internal","primary":true}]},
"ipv6AddressesSet":{},
"tagSet":{}}]}
,"iamInstanceProfile":{"arn":"arn:aws:iam::xxxxx:instance-profile/infra-EC2InstanceProfile-1D59C5YR0LIYJ","id":"eeeeeeeeeeeeeeeeee"},
"ebsOptimized":false}
]
},
"requesterId":"226008221399"
}
Run Code Online (Sandbox Code Playgroud)
这是我尝试过的查询:
SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instanceId') as instance_id
FROM cloudtrail_logs
WHERE account = 'xxxxxxxxxxxxxxx'
AND eventname = 'RunInstances';
Run Code Online (Sandbox Code Playgroud)
但这给出了instance_id 一个空列。如何正确地仅从instance_id获取resposneelement?
我找到了正确的查询来查找 ECS 实例的所有者。这可能对某人有帮助!
SELECT DISTINCT eventsource, eventname, useridentity.userName, eventtime, json_extract(responseelements, '$.instancesSet.items[0].instanceId') as instance_id
FROM cloudtrail_logs
WHERE account = 'xxxxxxx'
AND eventname = 'RunInstances'
AND responseelements LIKE '%i-3434ecb4c12%'
;
Run Code Online (Sandbox Code Playgroud)
| 归档时间: |
|
| 查看次数: |
2515 次 |
| 最近记录: |