You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

209 lines
9.5 KiB

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

# -*- coding: utf-8 -*-
import sys
import collections
import datetime
import calendar
import os
from collections import defaultdict
from openpyxl import load_workbook
import numpy as np
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import NamedStyle, PatternFill, Font, Alignment, Border, Side, colors
import boto3
import time
_dict = {
# 6910账号aksk
'AWS_ACCESS_KEY_ID': 'AKIA4FU4JFDXO5J2ISNY',
'AWS_SECRET_ACCESS_KEY': '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',
# 7422账号aksk
'GLOBAL_AWS_ACCESS_KEY_ID': "AKIAQWIJ3GB7CTO5GAWY",
'GLOBAL_AWS_SECRET_ACCESS_KEY': "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"
}
aws_aksk = {
"089158463905": {
"account_type": "amazon-china",
"aws_access_key_id": "",
"aws_secret_access_key": "",
"bucket": "089158463905billing",
"credential_type": "Amazon IAM Role",
"role_arn": "arn:aws-cn:iam::089158463905:role/089158463905billing",
"external_id": "l2c-1i5u1ds0",
},
}
def auto_download_file_form_s3(file: tuple):
for i in file:
payer_id = i.split('-')[0]
credential = get_credentials(payer_id)
bucket = credential.pop('bucket')
s3_client = boto3.client('s3', **credential)
print('{}开始下载'.format(i))
s3_client.download_file(
bucket, i, i)
print('{}下载完成'.format(i))
def get_credentials(payer_id):
credential = aws_aksk.get(payer_id, None)
if not credential:
return
if credential.get('account_type', None) == 'amazon-china':
region_name = 'cn-northwest-1'
aws_access_key_id = _dict['AWS_ACCESS_KEY_ID']
aws_secret_access_key = _dict['AWS_SECRET_ACCESS_KEY']
else:
region_name = 'us-east-2'
aws_access_key_id = _dict['GLOBAL_AWS_ACCESS_KEY_ID']
aws_secret_access_key = _dict['GLOBAL_AWS_SECRET_ACCESS_KEY']
if credential.get('credential_type', None) == 'Amazon Access Key':
return {
'aws_access_key_id': credential['aws_access_key_id'],
'aws_secret_access_key': credential['aws_secret_access_key'],
'region_name': region_name,
'bucket': credential['bucket']
}
else:
sts = boto3.client('sts',
region_name=region_name,
aws_access_key_id=aws_access_key_id,
aws_secret_access_key=aws_secret_access_key
)
sts_credential = sts.assume_role(
RoleArn=credential['role_arn'],
RoleSessionName='crawl-billing' + str(time.time()),
DurationSeconds=900,
ExternalId=credential['external_id']
)
return {
'aws_access_key_id': sts_credential['Credentials']['AccessKeyId'],
'aws_secret_access_key': sts_credential['Credentials']['SecretAccessKey'],
'region_name': region_name,
'aws_session_token': sts_credential['Credentials']['SessionToken'],
'bucket': credential['bucket']
}
month_dict = {
'01': 'Jan.',
'02': 'Feb.',
'03': 'Mar.',
'04': 'April',
'05': 'May',
'06': 'June',
'07': 'July',
'08': 'Aug.',
'09': 'Sep.',
'10': 'Oct.',
'11': 'Nov.',
'12': 'Dec.'
}
class Custom_Excel():
def __init__(self, *args, **kwargs):
self.path = kwargs.get('path', None)
self.prd_li = kwargs.get('prd_li', [])
self.non_prd_li = kwargs.get('non_prd_li', [])
self.name = kwargs.get('name', '')
def get_data(self):
df = pd.DataFrame()
self.month = month_dict.get(self.path[0][-6:-4])
for csv in self.path:
csv_data = pd.read_csv(
csv,
sep=',',
encoding='utf-8',
skiprows=[0, ],
dtype={'LinkedAccountId': np.object},
)
df = pd.concat([df, csv_data], axis=0)
df.reset_index(drop=True)
df['UsageType'] = df['UsageType'].fillna('')
df = df[df['RecordType'] == 'LinkedLineItem']
# total = df['CostBeforeTax'].sum()
EC2_data = df[(df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances') | df['Operation'].str.contains('Hourly'))]
EBS_data = df.loc[df['UsageType'].str.contains('EBS')]
snapshot_data = EBS_data.loc[EBS_data['ItemDescription'].str.contains('snapshot')]
ELB_data = df[df['ProductName'] == 'Elastic Load Balancing']
S3_data = df[df['ProductName'] == 'Amazon Simple Storage Service']
DC_data = df[df['ProductName'] == 'AWS Direct Connect']
other_data = df[(~((df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances')))) &
(~(df['UsageType'].str.contains('EBS'))) &
(df['ProductName'] != 'Amazon Simple Storage Service') &
(df['ProductName'] != 'Elastic Load Balancing') &
(df['ProductName'] != 'Amazon Elastic Compute Cloud') &
(df['ProductName'] != 'AWS Direct Connect') &
(df['ProductName'].notna())]
vat_data = df
snapshot_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
snapshot_no_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
EBS_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() - snapshot_prd
EBS_no_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() - snapshot_no_prd
EC2_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
EC2_no_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
ELB_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
ELB_no_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
S3_prd = S3_data[S3_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
S3_no_prd = S3_data[S3_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
DC_prd = DC_data[DC_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
DC_no_prd = DC_data[DC_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
other_prd = df[df['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum()
other_no_prd = df[df['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum()
vat_prd = vat_data[vat_data['LinkedAccountId'].isin(self.prd_li)]['TaxAmount'].sum()
vat_no_prd = vat_data[vat_data['LinkedAccountId'].isin(self.non_prd_li)]['TaxAmount'].sum()
workbook = load_workbook(self.name)
sheet = workbook[self.month]
sheet.cell(2, 2).value = EC2_prd
sheet.cell(2, 3).value = EC2_no_prd
sheet.cell(2, 4).value = EC2_prd + EC2_no_prd
sheet.cell(3, 2).value = EBS_prd
sheet.cell(3, 3).value = EBS_no_prd
sheet.cell(3, 4).value = EBS_prd + EBS_no_prd
sheet.cell(4, 2).value = snapshot_prd
sheet.cell(4, 3).value = snapshot_no_prd
sheet.cell(4, 4).value = snapshot_prd + snapshot_no_prd
sheet.cell(5, 2).value = ELB_prd
sheet.cell(5, 3).value = ELB_no_prd
sheet.cell(5, 4).value = ELB_prd + ELB_no_prd
sheet.cell(6, 2).value = S3_prd
sheet.cell(6, 3).value = S3_no_prd
sheet.cell(6, 4).value = S3_prd + S3_no_prd
sheet.cell(7, 2).value = DC_prd
sheet.cell(7, 3).value = DC_no_prd
sheet.cell(7, 4).value = DC_prd + DC_no_prd
sheet.cell(8, 2).value = other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd
sheet.cell(8, 3).value = other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd
sheet.cell(8, 4).value = (other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd) + (other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd)
sheet.cell(9, 2).value = vat_prd
sheet.cell(9, 3).value = vat_no_prd
sheet.cell(9, 4).value = vat_prd + vat_no_prd
sheet.cell(10, 2).value = other_prd + vat_prd
sheet.cell(10, 3).value = other_no_prd + vat_no_prd
sheet.cell(10, 4).value = other_prd + other_no_prd + vat_prd + vat_no_prd
workbook.save(self.name)
print('数据处理完成')
if __name__ == '__main__':
# 获取输入执行时输入参数
month = '2023-07'
if len(sys.argv) > 1:
month = sys.argv[1]
print('执行月份:' + month)
csv_list = ['089158463905-aws-cost-allocation-ACTS-'+month+'.csv',
'089158463905-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv']
# 1.1 循环附件名
for csv in csv_list:
# print(os.path.isfile('./'+csv))
# 1.1.1附件不存在,则拉取
if not os.path.isfile('./' + csv):
print('附件不存在:', csv)
auto_download_file_form_s3([csv])
name = 'SONY PSC Monthly Bill Analysis-2023.xlsx'
ecar_sona_name = 'SONY Ecar&SONA Monthly Bill Analysis-2023.xlsx'
prd_li = ['594359020897']
non_prd_li = ['538753690131', '158810359537']
ecar_sona_prd_li = ['']
ecar_sona = ['244618405305']
Custom_Excel(path=csv_list, prd_li=prd_li, non_prd_li=non_prd_li, name=name).get_data()
Custom_Excel(path=csv_list, prd_li=ecar_sona_prd_li, non_prd_li=ecar_sona, name=ecar_sona_name).get_data()