1
+ import boto3
2
+
3
+ s3 = boto3 .client ("s3" )
4
+
5
+ def lambda_handler (event , context ):
6
+ for record in event ["Records" ]:
7
+ bucket = record ["s3" ]["bucket" ]["name" ]
8
+ key = record ["s3" ]["object" ]["key" ]
9
+ distro = key .split ("." )[0 ].split ("/" )[0 ]
10
+ dateAndHour = key .split ("." )[1 ].split ("/" )[0 ]
11
+ year , month , day , hour = dateAndHour .split ("-" )
12
+
13
+ dest = "partitioned/{}/year={}/month={}/day={}/hour={}/{}" .format (
14
+ distro , year , month , day , hour , key
15
+ )
16
+
17
+ print (f"copy: s3://{ bucket } /{ key } -> s3://{ bucket } /{ dest } " )
18
+
19
+ s3 .copy_object (Bucket = bucket , Key = dest , CopySource = bucket + "/" + key )
20
+ #s3.delete_object(Bucket=bucket, Key=key)
21
+
22
+ return {
23
+ "statusCode" : 200 ,
24
+ "body" : "" ,
25
+ "headers" : {"Content-Type" : "application/json" , "Access-Control-Allow-Origin" : "*" }
26
+ }
27
+
28
+ """
29
+ {
30
+ "Records": [
31
+ {
32
+ "s3": {
33
+ "bucket": {
34
+ "name": "irr-static-logs"
35
+ },
36
+ "object": {
37
+ "key": "E3U1KSE3QN2C8K.2022-10-12-12.05c44449.gz"
38
+ }
39
+ }
40
+ }
41
+ ]
42
+ }
43
+
44
+ {
45
+ "Version": "2012-10-17",
46
+ "Statement": [
47
+ {
48
+ "Effect": "Allow",
49
+ "Action": [
50
+ "s3:*",
51
+ "s3-object-lambda:*"
52
+ ],
53
+ "Resource": "*"
54
+ }
55
+ ]
56
+ }
57
+
58
+
59
+ CREATE EXTERNAL TABLE IF NOT EXISTS
60
+ default.partitioned_cf (
61
+ date DATE,
62
+ time STRING,
63
+ location STRING,
64
+ bytes BIGINT,
65
+ requestip STRING,
66
+ method STRING,
67
+ host STRING,
68
+ uri STRING,
69
+ status INT,
70
+ referrer STRING,
71
+ useragent STRING,
72
+ querystring STRING,
73
+ cookie STRING,
74
+ resulttype STRING,
75
+ requestid STRING,
76
+ hostheader STRING,
77
+ requestprotocol STRING,
78
+ requestbytes BIGINT,
79
+ timetaken FLOAT,
80
+ xforwardedfor STRING,
81
+ sslprotocol STRING,
82
+ sslcipher STRING,
83
+ responseresulttype STRING,
84
+ httpversion STRING,
85
+ filestatus STRING,
86
+ encryptedfields INT
87
+ )
88
+ PARTITIONED BY(
89
+ year string,
90
+ month string,
91
+ day string,
92
+ hour string )
93
+ ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t '
94
+ LOCATION 's3://irr-static-logs/partitioned/E3U1KSE3QN2C8K'
95
+ TBLPROPERTIES ( 'skip.header.line.count'='2');
96
+
97
+ msck repair table default.partitioned_cf
98
+
99
+ select * from default.partitioned_cf limit 20;
100
+
101
+ """
0 commit comments