Skip to content

Commit 8b41c34

Browse files
committed
s3-cf-lambda
1 parent f50943f commit 8b41c34

File tree

1 file changed

+101
-0
lines changed

1 file changed

+101
-0
lines changed

s3-cf-lambda/handler.py

+101
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,101 @@
1+
import boto3
2+
3+
s3 = boto3.client("s3")
4+
5+
def lambda_handler(event, context):
6+
for record in event["Records"]:
7+
bucket = record["s3"]["bucket"]["name"]
8+
key = record["s3"]["object"]["key"]
9+
distro = key.split(".")[0].split("/")[0]
10+
dateAndHour = key.split(".")[1].split("/")[0]
11+
year, month, day, hour = dateAndHour.split("-")
12+
13+
dest = "partitioned/{}/year={}/month={}/day={}/hour={}/{}".format(
14+
distro, year, month, day, hour, key
15+
)
16+
17+
print(f"copy: s3://{bucket}/{key} -> s3://{bucket}/{dest}")
18+
19+
s3.copy_object(Bucket=bucket, Key=dest, CopySource=bucket + "/" + key)
20+
#s3.delete_object(Bucket=bucket, Key=key)
21+
22+
return {
23+
"statusCode": 200,
24+
"body": "",
25+
"headers": {"Content-Type": "application/json", "Access-Control-Allow-Origin": "*"}
26+
}
27+
28+
"""
29+
{
30+
"Records": [
31+
{
32+
"s3": {
33+
"bucket": {
34+
"name": "irr-static-logs"
35+
},
36+
"object": {
37+
"key": "E3U1KSE3QN2C8K.2022-10-12-12.05c44449.gz"
38+
}
39+
}
40+
}
41+
]
42+
}
43+
44+
{
45+
"Version": "2012-10-17",
46+
"Statement": [
47+
{
48+
"Effect": "Allow",
49+
"Action": [
50+
"s3:*",
51+
"s3-object-lambda:*"
52+
],
53+
"Resource": "*"
54+
}
55+
]
56+
}
57+
58+
59+
CREATE EXTERNAL TABLE IF NOT EXISTS
60+
default.partitioned_cf (
61+
date DATE,
62+
time STRING,
63+
location STRING,
64+
bytes BIGINT,
65+
requestip STRING,
66+
method STRING,
67+
host STRING,
68+
uri STRING,
69+
status INT,
70+
referrer STRING,
71+
useragent STRING,
72+
querystring STRING,
73+
cookie STRING,
74+
resulttype STRING,
75+
requestid STRING,
76+
hostheader STRING,
77+
requestprotocol STRING,
78+
requestbytes BIGINT,
79+
timetaken FLOAT,
80+
xforwardedfor STRING,
81+
sslprotocol STRING,
82+
sslcipher STRING,
83+
responseresulttype STRING,
84+
httpversion STRING,
85+
filestatus STRING,
86+
encryptedfields INT
87+
)
88+
PARTITIONED BY(
89+
year string,
90+
month string,
91+
day string,
92+
hour string )
93+
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
94+
LOCATION 's3://irr-static-logs/partitioned/E3U1KSE3QN2C8K'
95+
TBLPROPERTIES ( 'skip.header.line.count'='2');
96+
97+
msck repair table default.partitioned_cf
98+
99+
select * from default.partitioned_cf limit 20;
100+
101+
"""

0 commit comments

Comments
 (0)