background

I don't think there is much need, but I made it possible to easily share files while saving the files in S3 to the storage of a different Vender. It wasn't difficult to use GoogleDriveApi to follow the documentation, but there were times when the file size was large. I think there is a similar implementation again, I will leave it as a memo just in case.

What you want to do

Automatically transfer S3 data to Google Drive.

What i did

Migrated the contents of the S3 bucket to the specified team drive in Python.

Implementation details

Download the file from S3 to tmp.
Check if the file already exists in Gdrive
Overwrite if it exists
If it does not exist, add it as a new one.

Advance preparation

--Files to be migrated exist in S3 bucket --Preparation of GoogleClientId, GoogleClientSecret Reference site --Preparation of GoogleRefreshToken Reference site --Preparation of FolderId of migration destination Google Drive

This is the source that I actually implemented.

# Download File from S3 to Local tmp Dir
# Upload a file to Google Drive

import os
import boto3
import json
import requests
import magic



## setting info
CONTENT_BUCKET_NAME = 'MY_S３_BUCKET_NAME'
CONTENT_BACKUP_KEY = 'MY_S３_BUCKET_KEY'
GOOGLE_CLIENT_ID = "XXXXXXXXXXXX.apps.googleusercontent.com"
GOOGLE_CLIENT_SECRET = "XXXXXXXXXXXX"
GOOGLE_REFRESH_TOKEN = "XXXXXXXXXXXX"
GOOGLE_FOLDER_ID = 'GOOGLE_FOLDER_ID'


s3 = boto3.resource('s3')
 
# Get the object from the event and show its content type
bucket = CONTENT_BUCKET_NAME
key = CONTENT_BACKUP_KEY
file_name = key.split("/")[1]
file_path = os.path.join("/tmp/"+ file_name)
s3.Object(bucket, key).download_file(file_path)
filesize = os.path.getsize(file_path)
fname, extension = os.path.splitext(file_name)

# refresh token
access_token_url = 'https://accounts.google.com/o/oauth2/token'
headers = {"Content-Type":"application/json","X-Accept":"application/json"}
refresh_token_request = {"grant_type":"refresh_token", "client_id": GOOGLE_CLIENT_ID, "client_secret": GOOGLE_CLIENT_SECRET, "refresh_token": GOOGLE_REFRESH_TOKEN}
access_token_request = requests.post(access_token_url,headers=headers,data=json.dumps(refresh_token_request))
access_token = access_token_request.json()['access_token']
print(access_token)

# check file already exist 
downloadUrl = "https://www.googleapis.com/drive/v3/files"
headers = {
    'Host':'www.googleapis.com',
    'Authorization': 'Bearer ' + access_token,
    'Content-Type':'application/json; charset=UTF-8',
    "X-Accept":"application/json"
}
qs= { "q": "'" + GOOGLE_FOLDER_ID + "' in parents and name='" + file_name + "' and trashed=false",
      "supportsAllDrives": True,
      "includeItemsFromAllDrives": True
    }

fileExistCheck = requests.get(downloadUrl, params=qs, headers=headers)
responseJsonFiles = fileExistCheck.json()['files']
searchResponseLength = len(responseJsonFiles)

#upload_file()
mime = magic.Magic(mime=True)
mimeType = mime.from_file(file_path) 

#folder_id = GOOGLE_FOLDER_ID
headers = {
    'Host':'www.googleapis.com',
    'Content-Length': str(filesize),
    'Authorization': 'Bearer ' + access_token,
    'Content-Type':'application/json; charset=UTF-8',
    'X-Upload-Content-Type': mimeType,
    'X-Upload-Content-Length': str(filesize)
}

with open(file_path, 'rb') as data:
  file_name= os.path.basename(file_path)
  metadata = {
    "name": file_name,
    "title": file_name,
    "parents": [GOOGLE_FOLDER_ID],
    'kind': 'drive#permission',
    "permissionDetails": [
      {
        "permissionType": "file",
        "role": "organizer"
      }
    ],
  }
　
　# No file exist. Post new one.
  if searchResponseLength < 1:
    postUrl = "https://www.googleapis.com/upload/drive/v3/files?uploadType=resumable&supportsAllDrives=true"
    r = requests.post(postUrl, data=json.dumps(metadata), headers=headers)
    # data upload url
    uploadUrl = r.headers['Location']

    r2 = requests.post(uploadUrl, data=data, headers=headers)
  
  # file exist. Put to override
  else:
    fileId = responseJsonFiles[0]['id']
    metadata = {
      "filename": file_name,
      "name": file_name,
      "title": file_name,
      'kind': 'drive#permission',
      "permissionDetails": [
        {
          "permissionType": "file",
          "role": "organizer"
        }
      ]
    }
    
    putUrl = "https://www.googleapis.com/upload/drive/v3/files/" + fileId + "?uploadType=resumable&supportsAllDrives=true"
    r = requests.patch(putUrl, data=json.dumps(metadata), headers=headers)
    uploadUrl = r.headers['Location']
    r2 = requests.patch(uploadUrl, data=data, headers=headers)

At the end

I think there are more improvements, but if there is an easier way to implement it, please comment.