|
|
# -*- coding: utf-8 -*-
|
|
|
import sys
|
|
|
import collections
|
|
|
import datetime
|
|
|
import calendar
|
|
|
import os
|
|
|
from collections import defaultdict
|
|
|
from openpyxl import load_workbook
|
|
|
import numpy as np
|
|
|
import pandas as pd
|
|
|
from openpyxl import Workbook
|
|
|
from openpyxl.styles import NamedStyle, PatternFill, Font, Alignment, Border, Side, colors
|
|
|
|
|
|
import boto3
|
|
|
import time
|
|
|
_dict = {
|
|
|
# 6910账号aksk
|
|
|
'AWS_ACCESS_KEY_ID': 'AKIA4FU4JFDXO5J2ISNY',
|
|
|
'AWS_SECRET_ACCESS_KEY': 'AVfPZ/XryDsoWmYB293eNPOyiO82x9xR62KFKezS',
|
|
|
# 7422账号aksk
|
|
|
'GLOBAL_AWS_ACCESS_KEY_ID': "AKIAQWIJ3GB7CTO5GAWY",
|
|
|
'GLOBAL_AWS_SECRET_ACCESS_KEY': "o1iQFQFSW+W/GGzjIRcNu1RDO1s72Uv2pJK/zf3z"
|
|
|
}
|
|
|
aws_aksk = {
|
|
|
"089158463905": {
|
|
|
"account_type": "amazon-china",
|
|
|
"aws_access_key_id": "",
|
|
|
"aws_secret_access_key": "",
|
|
|
"bucket": "089158463905billing",
|
|
|
"credential_type": "Amazon IAM Role",
|
|
|
"role_arn": "arn:aws-cn:iam::089158463905:role/089158463905billing",
|
|
|
"external_id": "l2c-1i5u1ds0",
|
|
|
},
|
|
|
}
|
|
|
def auto_download_file_form_s3(file: tuple):
|
|
|
for i in file:
|
|
|
payer_id = i.split('-')[0]
|
|
|
credential = get_credentials(payer_id)
|
|
|
bucket = credential.pop('bucket')
|
|
|
s3_client = boto3.client('s3', **credential)
|
|
|
print('{}开始下载'.format(i))
|
|
|
s3_client.download_file(
|
|
|
bucket, i, i)
|
|
|
print('{}下载完成'.format(i))
|
|
|
def get_credentials(payer_id):
|
|
|
credential = aws_aksk.get(payer_id, None)
|
|
|
if not credential:
|
|
|
return
|
|
|
if credential.get('account_type', None) == 'amazon-china':
|
|
|
region_name = 'cn-northwest-1'
|
|
|
aws_access_key_id = _dict['AWS_ACCESS_KEY_ID']
|
|
|
aws_secret_access_key = _dict['AWS_SECRET_ACCESS_KEY']
|
|
|
else:
|
|
|
region_name = 'us-east-2'
|
|
|
aws_access_key_id = _dict['GLOBAL_AWS_ACCESS_KEY_ID']
|
|
|
aws_secret_access_key = _dict['GLOBAL_AWS_SECRET_ACCESS_KEY']
|
|
|
if credential.get('credential_type', None) == 'Amazon Access Key':
|
|
|
return {
|
|
|
'aws_access_key_id': credential['aws_access_key_id'],
|
|
|
'aws_secret_access_key': credential['aws_secret_access_key'],
|
|
|
'region_name': region_name,
|
|
|
'bucket': credential['bucket']
|
|
|
}
|
|
|
else:
|
|
|
sts = boto3.client('sts',
|
|
|
region_name=region_name,
|
|
|
aws_access_key_id=aws_access_key_id,
|
|
|
aws_secret_access_key=aws_secret_access_key
|
|
|
)
|
|
|
sts_credential = sts.assume_role(
|
|
|
RoleArn=credential['role_arn'],
|
|
|
RoleSessionName='crawl-billing' + str(time.time()),
|
|
|
DurationSeconds=900,
|
|
|
ExternalId=credential['external_id']
|
|
|
)
|
|
|
return {
|
|
|
'aws_access_key_id': sts_credential['Credentials']['AccessKeyId'],
|
|
|
'aws_secret_access_key': sts_credential['Credentials']['SecretAccessKey'],
|
|
|
'region_name': region_name,
|
|
|
'aws_session_token': sts_credential['Credentials']['SessionToken'],
|
|
|
'bucket': credential['bucket']
|
|
|
}
|
|
|
|
|
|
month_dict = {
|
|
|
'01': 'Jan.',
|
|
|
'02': 'Feb.',
|
|
|
'03': 'Mar.',
|
|
|
'04': 'April',
|
|
|
'05': 'May',
|
|
|
'06': 'June',
|
|
|
'07': 'July',
|
|
|
'08': 'Aug.',
|
|
|
'09': 'Sep.',
|
|
|
'10': 'Oct.',
|
|
|
'11': 'Nov.',
|
|
|
'12': 'Dec.'
|
|
|
}
|
|
|
|
|
|
class Custom_Excel():
|
|
|
|
|
|
def __init__(self, *args, **kwargs):
|
|
|
self.path = kwargs.get('path', None)
|
|
|
self.prd_li = kwargs.get('prd_li', [])
|
|
|
self.non_prd_li = kwargs.get('non_prd_li', [])
|
|
|
self.name = kwargs.get('name', '')
|
|
|
|
|
|
def get_data(self):
|
|
|
df = pd.DataFrame()
|
|
|
self.month = month_dict.get(self.path[0][-6:-4])
|
|
|
for csv in self.path:
|
|
|
csv_data = pd.read_csv(
|
|
|
csv,
|
|
|
sep=',',
|
|
|
encoding='utf-8',
|
|
|
skiprows=[0, ],
|
|
|
dtype={'LinkedAccountId': np.object},
|
|
|
)
|
|
|
df = pd.concat([df, csv_data], axis=0)
|
|
|
df.reset_index(drop=True)
|
|
|
df['UsageType'] = df['UsageType'].fillna('')
|
|
|
df = df[df['RecordType'] == 'LinkedLineItem']
|
|
|
# total = df['CostBeforeTax'].sum()
|
|
|
EC2_data = df[(df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances') | df['Operation'].str.contains('Hourly'))]
|
|
|
EBS_data = df.loc[df['UsageType'].str.contains('EBS')]
|
|
|
snapshot_data = EBS_data.loc[EBS_data['ItemDescription'].str.contains('snapshot')]
|
|
|
ELB_data = df[df['ProductName'] == 'Elastic Load Balancing']
|
|
|
S3_data = df[df['ProductName'] == 'Amazon Simple Storage Service']
|
|
|
DC_data = df[df['ProductName'] == 'AWS Direct Connect']
|
|
|
other_data = df[(~((df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances')))) &
|
|
|
(~(df['UsageType'].str.contains('EBS'))) &
|
|
|
(df['ProductName'] != 'Amazon Simple Storage Service') &
|
|
|
(df['ProductName'] != 'Elastic Load Balancing') &
|
|
|
(df['ProductName'] != 'Amazon Elastic Compute Cloud') &
|
|
|
(df['ProductName'] != 'AWS Direct Connect') &
|
|
|
(df['ProductName'].notna())]
|
|
|
vat_data = df
|
|
|
snapshot_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
snapshot_no_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
EBS_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() - snapshot_prd
|
|
|
EBS_no_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() - snapshot_no_prd
|
|
|
EC2_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
EC2_no_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
ELB_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
ELB_no_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
S3_prd = S3_data[S3_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
S3_no_prd = S3_data[S3_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
DC_prd = DC_data[DC_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
DC_no_prd = DC_data[DC_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
other_prd = df[df['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
|
|
|
other_no_prd = df[df['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
|
|
|
vat_prd = vat_data[vat_data['LinkedAccountId'].isin(self.prd_li)]['TaxAmount'].sum()
|
|
|
vat_no_prd = vat_data[vat_data['LinkedAccountId'].isin(self.non_prd_li)]['TaxAmount'].sum()
|
|
|
workbook = load_workbook(self.name)
|
|
|
sheet = workbook[self.month]
|
|
|
sheet.cell(2, 2).value = EC2_prd
|
|
|
sheet.cell(2, 3).value = EC2_no_prd
|
|
|
sheet.cell(2, 4).value = EC2_prd + EC2_no_prd
|
|
|
sheet.cell(3, 2).value = EBS_prd
|
|
|
sheet.cell(3, 3).value = EBS_no_prd
|
|
|
sheet.cell(3, 4).value = EBS_prd + EBS_no_prd
|
|
|
sheet.cell(4, 2).value = snapshot_prd
|
|
|
sheet.cell(4, 3).value = snapshot_no_prd
|
|
|
sheet.cell(4, 4).value = snapshot_prd + snapshot_no_prd
|
|
|
sheet.cell(5, 2).value = ELB_prd
|
|
|
sheet.cell(5, 3).value = ELB_no_prd
|
|
|
sheet.cell(5, 4).value = ELB_prd + ELB_no_prd
|
|
|
sheet.cell(6, 2).value = S3_prd
|
|
|
sheet.cell(6, 3).value = S3_no_prd
|
|
|
sheet.cell(6, 4).value = S3_prd + S3_no_prd
|
|
|
sheet.cell(7, 2).value = DC_prd
|
|
|
sheet.cell(7, 3).value = DC_no_prd
|
|
|
sheet.cell(7, 4).value = DC_prd + DC_no_prd
|
|
|
sheet.cell(8, 2).value = other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd
|
|
|
sheet.cell(8, 3).value = other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd
|
|
|
sheet.cell(8, 4).value = (other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd) + (other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd)
|
|
|
sheet.cell(9, 2).value = vat_prd
|
|
|
sheet.cell(9, 3).value = vat_no_prd
|
|
|
sheet.cell(9, 4).value = vat_prd + vat_no_prd
|
|
|
sheet.cell(10, 2).value = other_prd + vat_prd
|
|
|
sheet.cell(10, 3).value = other_no_prd + vat_no_prd
|
|
|
sheet.cell(10, 4).value = other_prd + other_no_prd + vat_prd + vat_no_prd
|
|
|
workbook.save(self.name)
|
|
|
print('数据处理完成')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
# 获取输入执行时输入参数
|
|
|
month = '2023-07'
|
|
|
if len(sys.argv) > 1:
|
|
|
month = sys.argv[1]
|
|
|
print('执行月份:' + month)
|
|
|
|
|
|
csv_list = ['089158463905-aws-cost-allocation-ACTS-'+month+'.csv',
|
|
|
'089158463905-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv']
|
|
|
# 1.1 循环附件名
|
|
|
for csv in csv_list:
|
|
|
# print(os.path.isfile('./'+csv))
|
|
|
# 1.1.1附件不存在,则拉取
|
|
|
if not os.path.isfile('./' + csv):
|
|
|
print('附件不存在:', csv)
|
|
|
auto_download_file_form_s3([csv])
|
|
|
|
|
|
name = 'SONY PSC Monthly Bill Analysis-2023.xlsx'
|
|
|
ecar_sona_name = 'SONY Ecar&SONA Monthly Bill Analysis-2023.xlsx'
|
|
|
prd_li = ['594359020897']
|
|
|
non_prd_li = ['538753690131', '158810359537']
|
|
|
ecar_sona_prd_li = ['']
|
|
|
ecar_sona = ['244618405305']
|
|
|
Custom_Excel(path=csv_list, prd_li=prd_li, non_prd_li=non_prd_li, name=name).get_data()
|
|
|
Custom_Excel(path=csv_list, prd_li=ecar_sona_prd_li, non_prd_li=ecar_sona, name=ecar_sona_name).get_data() |