I started using S3 at work.
The S3 Key is stored in the DB and is usually fine, but it is troublesome if it shifts somewhere.
So I used boto to get a list of S3 keys.
I think it's efficient because it seems that only HEAD is thrown.
#! /usr/bin/env python
# -*- coding: utf-8 -*-
"""
Output the list of files contained in the target bucket to TSV.
"""
import sys
import os
import csv
from ConfigParser import SafeConfigParser
from getpass import getpass
from boto import connect_s3
AWS_CLI_CONFIG_PATH = os.path.expanduser('~/.aws/config')
def get_aws_config(config_path=AWS_CLI_CONFIG_PATH):
"""
Returns the following key from aws cli config
- aws_access_key_id
- aws_secret_access_key'
"""
keys = ['aws_access_key_id', 'aws_secret_access_key']
cfg = SafeConfigParser()
with open(config_path, 'r') as fp:
cfg.readfp(fp)
return tuple(cfg.get('default', x) for x in keys)
def get_bucket(aws_access_key_id, aws_secret_access_key, bucket_name):
"""
returns boto S3 bucket
"""
if not aws_access_key_id and not aws_secret_access_key:
aws_access_key_id, aws_secret_access_key = get_aws_config()
return connect_s3(aws_access_key_id, aws_secret_access_key).get_bucket(bucket_name)
def write_tsv(aws_access_key_id, aws_secret_access_key, bucket_name, file_name):
"""
S3 bucket key.name list file_Export to name as TSV.
"""
#Determining the absolute file path
file_path = os.path.abspath(file_name)
def _writerows(rows):
with open(file_path, 'a') as fp:
writer = csv.writer(fp, dialect='excel-tab')
writer.writerows(rows)
#Export header
_writerows([('key_name', )])
#Export body
rows = []
for key in get_bucket(aws_access_key_id, aws_secret_access_key, bucket_name).list():
rows.append(key.name)
if len(rows) > 1000:
_writerows(rows)
rows = []
else:
_writerows(rows)
if __name__ == '__main__':
if len(sys.argv) != 2:
print('Please specify output filename.')
else:
print('Please input the aws_access_key_id/aws_secret_access_key and a target bucket name.')
print('If you don\'t input the aws_access_key_id/aws_secret_access_key, then we use awscli config.')
aws_access_key_id = getpass('aws_access_key_id: ')
aws_secret_access_key = getpass('aws_secret_access_key: ')
bucket_name = raw_input('target bucket name: ')
if not aws_access_key_id and not aws_secret_access_key and not os.path.isfile(AWS_CLI_CONFIG_PATH):
print('Please specify the aws_access_key_id/aws_secret_access_key or create awscli config.')
sys.exit(1)
write_tsv(
aws_access_key_id,
aws_secret_access_key,
bucket_name,
sys.argv[1])
print('Output: {}'.format(sys.argv[1]))
** Sutema ** The company to which I belong seems to be recruiting employees. If you think you should try writing Python, please apply.
Recommended Posts