# -*- coding: utf-8 -*- import sys import collections import datetime import calendar import os from collections import defaultdict from openpyxl import load_workbook import numpy as np import pandas as pd from openpyxl import Workbook from openpyxl.styles import NamedStyle, PatternFill, Font, Alignment, Border, Side, colors import boto3 import time _dict = { # 6910账号aksk 'AWS_ACCESS_KEY_ID': 'AKIA4FU4JFDXO5J2ISNY', 'AWS_SECRET_ACCESS_KEY': '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', # 7422账号aksk 'GLOBAL_AWS_ACCESS_KEY_ID': "AKIAQWIJ3GB7CTO5GAWY", 'GLOBAL_AWS_SECRET_ACCESS_KEY': "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" } aws_aksk = { "089158463905": { "account_type": "amazon-china", "aws_access_key_id": "", "aws_secret_access_key": "", "bucket": "089158463905billing", "credential_type": "Amazon IAM Role", "role_arn": "arn:aws-cn:iam::089158463905:role/089158463905billing", "external_id": "l2c-1i5u1ds0", }, } def auto_download_file_form_s3(file: tuple): for i in file: payer_id = i.split('-')[0] credential = get_credentials(payer_id) bucket = credential.pop('bucket') s3_client = boto3.client('s3', **credential) print('{}开始下载'.format(i)) s3_client.download_file( bucket, i, i) print('{}下载完成'.format(i)) def get_credentials(payer_id): credential = aws_aksk.get(payer_id, None) if not credential: return if credential.get('account_type', None) == 'amazon-china': region_name = 'cn-northwest-1' aws_access_key_id = _dict['AWS_ACCESS_KEY_ID'] aws_secret_access_key = _dict['AWS_SECRET_ACCESS_KEY'] else: region_name = 'us-east-2' aws_access_key_id = _dict['GLOBAL_AWS_ACCESS_KEY_ID'] aws_secret_access_key = _dict['GLOBAL_AWS_SECRET_ACCESS_KEY'] if credential.get('credential_type', None) == 'Amazon Access Key': return { 'aws_access_key_id': credential['aws_access_key_id'], 'aws_secret_access_key': credential['aws_secret_access_key'], 'region_name': region_name, 'bucket': credential['bucket'] } else: sts = boto3.client('sts', region_name=region_name, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key ) sts_credential = sts.assume_role( RoleArn=credential['role_arn'], RoleSessionName='crawl-billing' + str(time.time()), DurationSeconds=900, ExternalId=credential['external_id'] ) return { 'aws_access_key_id': sts_credential['Credentials']['AccessKeyId'], 'aws_secret_access_key': sts_credential['Credentials']['SecretAccessKey'], 'region_name': region_name, 'aws_session_token': sts_credential['Credentials']['SessionToken'], 'bucket': credential['bucket'] } month_dict = { '01': 'Jan.', '02': 'Feb.', '03': 'Mar.', '04': 'April', '05': 'May', '06': 'June', '07': 'July', '08': 'Aug.', '09': 'Sep.', '10': 'Oct.', '11': 'Nov.', '12': 'Dec.' } class Custom_Excel(): def __init__(self, *args, **kwargs): self.path = kwargs.get('path', None) self.prd_li = kwargs.get('prd_li', []) self.non_prd_li = kwargs.get('non_prd_li', []) self.name = kwargs.get('name', '') def get_data(self): df = pd.DataFrame() self.month = month_dict.get(self.path[0][-6:-4]) for csv in self.path: csv_data = pd.read_csv( csv, sep=',', encoding='utf-8', skiprows=[0, ], dtype={'LinkedAccountId': np.object}, ) df = pd.concat([df, csv_data], axis=0) df.reset_index(drop=True) df['UsageType'] = df['UsageType'].fillna('') df = df[df['RecordType'] == 'LinkedLineItem'] # total = df['CostBeforeTax'].sum() EC2_data = df[(df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances') | df['Operation'].str.contains('Hourly'))] EBS_data = df.loc[df['UsageType'].str.contains('EBS')] snapshot_data = EBS_data.loc[EBS_data['ItemDescription'].str.contains('snapshot')] ELB_data = df[df['ProductName'] == 'Elastic Load Balancing'] S3_data = df[df['ProductName'] == 'Amazon Simple Storage Service'] DC_data = df[df['ProductName'] == 'AWS Direct Connect'] other_data = df[(~((df['ProductName'] == 'Amazon Elastic Compute Cloud') & (df['Operation'].str.contains('RunInstances')))) & (~(df['UsageType'].str.contains('EBS'))) & (df['ProductName'] != 'Amazon Simple Storage Service') & (df['ProductName'] != 'Elastic Load Balancing') & (df['ProductName'] != 'Amazon Elastic Compute Cloud') & (df['ProductName'] != 'AWS Direct Connect') & (df['ProductName'].notna())] vat_data = df snapshot_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() snapshot_no_prd = snapshot_data[snapshot_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() EBS_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() - snapshot_prd EBS_no_prd = EBS_data[EBS_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() - snapshot_no_prd EC2_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() EC2_no_prd = EC2_data[EC2_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() ELB_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() ELB_no_prd = ELB_data[ELB_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() S3_prd = S3_data[S3_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() S3_no_prd = S3_data[S3_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() DC_prd = DC_data[DC_data['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() DC_no_prd = DC_data[DC_data['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() other_prd = df[df['LinkedAccountId'].isin(self.prd_li)]['CostBeforeTax'].sum() other_no_prd = df[df['LinkedAccountId'].isin(self.non_prd_li)]['CostBeforeTax'].sum() vat_prd = vat_data[vat_data['LinkedAccountId'].isin(self.prd_li)]['TaxAmount'].sum() vat_no_prd = vat_data[vat_data['LinkedAccountId'].isin(self.non_prd_li)]['TaxAmount'].sum() workbook = load_workbook(self.name) sheet = workbook[self.month] sheet.cell(2, 2).value = EC2_prd sheet.cell(2, 3).value = EC2_no_prd sheet.cell(2, 4).value = EC2_prd + EC2_no_prd sheet.cell(3, 2).value = EBS_prd sheet.cell(3, 3).value = EBS_no_prd sheet.cell(3, 4).value = EBS_prd + EBS_no_prd sheet.cell(4, 2).value = snapshot_prd sheet.cell(4, 3).value = snapshot_no_prd sheet.cell(4, 4).value = snapshot_prd + snapshot_no_prd sheet.cell(5, 2).value = ELB_prd sheet.cell(5, 3).value = ELB_no_prd sheet.cell(5, 4).value = ELB_prd + ELB_no_prd sheet.cell(6, 2).value = S3_prd sheet.cell(6, 3).value = S3_no_prd sheet.cell(6, 4).value = S3_prd + S3_no_prd sheet.cell(7, 2).value = DC_prd sheet.cell(7, 3).value = DC_no_prd sheet.cell(7, 4).value = DC_prd + DC_no_prd sheet.cell(8, 2).value = other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd sheet.cell(8, 3).value = other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd sheet.cell(8, 4).value = (other_prd - EC2_prd - EBS_prd - snapshot_prd - ELB_prd - S3_prd - DC_prd) + (other_no_prd - EC2_no_prd - EBS_no_prd - snapshot_no_prd - ELB_no_prd - S3_no_prd - DC_no_prd) sheet.cell(9, 2).value = vat_prd sheet.cell(9, 3).value = vat_no_prd sheet.cell(9, 4).value = vat_prd + vat_no_prd sheet.cell(10, 2).value = other_prd + vat_prd sheet.cell(10, 3).value = other_no_prd + vat_no_prd sheet.cell(10, 4).value = other_prd + other_no_prd + vat_prd + vat_no_prd workbook.save(self.name) print('数据处理完成') if __name__ == '__main__': # 获取输入执行时输入参数 month = '2023-07' if len(sys.argv) > 1: month = sys.argv[1] print('执行月份:' + month) csv_list = ['089158463905-aws-cost-allocation-ACTS-'+month+'.csv', '089158463905-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv'] # 1.1 循环附件名 for csv in csv_list: # print(os.path.isfile('./'+csv)) # 1.1.1附件不存在,则拉取 if not os.path.isfile('./' + csv): print('附件不存在:', csv) auto_download_file_form_s3([csv]) name = 'SONY PSC Monthly Bill Analysis-2023.xlsx' ecar_sona_name = 'SONY Ecar&SONA Monthly Bill Analysis-2023.xlsx' prd_li = ['594359020897'] non_prd_li = ['538753690131', '158810359537'] ecar_sona_prd_li = [''] ecar_sona = ['244618405305'] Custom_Excel(path=csv_list, prd_li=prd_li, non_prd_li=non_prd_li, name=name).get_data() Custom_Excel(path=csv_list, prd_li=ecar_sona_prd_li, non_prd_li=ecar_sona, name=ecar_sona_name).get_data()