import numpy as np import pandas as pd from openpyxl import Workbook from openpyxl.styles import NamedStyle, PatternFill, Font, Alignment, Border, Side, colors import boto3 import time import os from collections import defaultdict import openpyxl import time import datetime import calendar from calendar import monthrange from docxtpl import DocxTemplate from pandas import Index import shutil import sys _dict = { # 6910账号aksk 'AWS_ACCESS_KEY_ID': 'AKIA4FU4JFDXO5J2ISNY', 'AWS_SECRET_ACCESS_KEY': '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', # 7422账号aksk 'GLOBAL_AWS_ACCESS_KEY_ID': "AKIAQWIJ3GB7CTO5GAWY", 'GLOBAL_AWS_SECRET_ACCESS_KEY': "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" } aws_aksk = { "785781180852": { "account_type": "amazon-china", "aws_access_key_id": "AKIA3N5BVJG2B7BN7D7F", "aws_secret_access_key": "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", "bucket": "785781180852billing", "credential_type": "Amazon Access Key", "role_arn": "", "external_id": "" }, } def auto_download_file_form_s3(file: tuple): for i in file: payer_id = i.split('-')[0] credential = get_credentials(payer_id) bucket = credential.pop('bucket') s3_client = boto3.client('s3', **credential) print('{}开始下载'.format(i)) s3_client.download_file( bucket,i, i) #shutil.copy(i, data_dir+i) #shutil.move(i, os.path.join(data_dir, i)) print('{}下载完成'.format(i)) def get_credentials(payer_id): credential = aws_aksk.get(payer_id, None) if not credential: return if credential.get('account_type', None) == 'amazon-china': region_name = 'cn-northwest-1' aws_access_key_id = _dict['AWS_ACCESS_KEY_ID'] aws_secret_access_key = _dict['AWS_SECRET_ACCESS_KEY'] else: region_name = 'us-east-2' aws_access_key_id = _dict['GLOBAL_AWS_ACCESS_KEY_ID'] aws_secret_access_key = _dict['GLOBAL_AWS_SECRET_ACCESS_KEY'] if credential.get('credential_type', None) == 'Amazon Access Key': return { 'aws_access_key_id': credential['aws_access_key_id'], 'aws_secret_access_key': credential['aws_secret_access_key'], 'region_name': region_name, 'bucket': credential['bucket'] } else: sts = boto3.client('sts', region_name=region_name, aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key ) sts_credential = sts.assume_role( RoleArn=credential['role_arn'], RoleSessionName='crawl-billing' + str(time.time()), DurationSeconds=900, ExternalId=credential['external_id'] ) return { 'aws_access_key_id': sts_credential['Credentials']['AccessKeyId'], 'aws_secret_access_key': sts_credential['Credentials']['SecretAccessKey'], 'region_name': region_name, 'aws_session_token': sts_credential['Credentials']['SessionToken'], 'bucket': credential['bucket'] } bwm_map = { # 'LingYue': { # '012281355708', # '356785436510', # '658099132956', # '658759333659', # '266213574481', # '292889508331', # '301085154281', # '128030099971', # '130024587818', # '130277835603', # '927108770652', # '462173214055', # }, 'NSC': { "738984596256", "516494992171", "516559113274", "516718973345", "115197553139", "115888370350", "482015173674", "741600887936", "041339000575", "220276650644", "220254894522", "741612116470", "566166420744", "571369485139", "571453974672", "571620293191", "281283439347", "281409454190", "391370694119", "391652259466", "257098133642", "258662998378", "259918045777", "174766421626", "018419931154", "019468012204", "019616485031", "019628801948", "221737289359", "222086759490", "222704261174", "266213574481", "128030099971", "130024587818", "042047776321", "107974775660", "130772574839", "772981767791", "804270813472", "750594069180", "957524751526", "605745121453", "605427007075", "462729772077", "825059405741", "825754710372", "826223612679", "826726830564", "188665867276", "190967990884", "265795087340", "266066444577", "130349229654", }, 'BCS': { "012281355708", "356785436510", "658099132956", "658759333659", "130277835603", "462173214055", "483213877262", }, 'BBA': { "114925441656", "220347068059", "115245083140", "115814403771", "482112011301", "482174782124", "066200769587", "066546212151", "066999274102", "067021776723", "067030146309", "763476499527", "607562784642", "607580047483", "607962074546", "608034771178", "902176823059", "902161166225", "114643259757", "472159510273", "470697101235", "471757415074", "179496244136", "763781648782", "125559227903", "133017054983", "178547782801", "181289317951", "902120954857", "902219438803", "763854435329", "763918937436", "610756541592", "763933527642", "827982547867", "119959612117", "126654413831", "127966691320", "131447637234", "132225845962", "133703668756", "179751599285", "763669372248", "180763457193", "902123337973", "889045227160", "889312712683", } } csv_columns2 = Index( [ 'InvoiceID', 'PayerAccountId', 'LinkedAccountId', 'RecordType', 'RecordID', 'BillingPeriodStartDate', 'BillingPeriodEndDate', 'InvoiceDate', 'PayerAccountName', 'LinkedAccountName', 'TaxationAddress', 'PayerPONumber', 'ProductCode', 'ProductName', 'SellerOfRecord', 'UsageType', 'Operation', 'AvailabilityZone', 'RateId', 'ItemDescription', 'UsageStartDate', 'UsageEndDate', 'UsageQuantity', 'BlendedRate', 'CurrencyCode', 'CostBeforeTax', 'Credits', 'TaxAmount', 'TaxType', 'TotalCost', 'aws:autoscaling:groupName', 'aws:cloudformation:logical-id', 'aws:cloudformation:stack-id', 'aws:cloudformation:stack-name', 'aws:createdBy', 'aws:ec2launchtemplate:id', 'aws:ec2launchtemplate:version', 'aws:elasticmapreduce:instance-group-role', 'aws:elasticmapreduce:job-flow-id', 'user:APP-ID', 'user:AccountID', 'user:Author', 'user:COST TAG 1', 'user:Cost Center', 'user:DLP', 'user:Description', 'user:Environment', 'user:EnvironmentType', 'user:KubernetesCluster', 'user:ManagedByAmazonSageMakerResource', 'user:NAME', 'user:Name', 'user:Patch Group', 'user:Product ID', 'user:ProjectName', 'user:Responsible', 'user:Role', 'user:Scope', 'user:Stage', 'user:Terraform', 'user:Tool', 'user:createdBy', 'user:data-source', 'user:data-source-billing', 'user:desc', 'user:description', 'user:elasticbeanstalk:environment-id', 'user:elasticbeanstalk:environment-name', 'user:environment', 'user:hub', 'user:k8s.io/cluster-autoscaler/node-template/label/kops.k8s.io/instancegroup', 'user:k8s.io/etcd/events', 'user:k8s.io/etcd/main', 'user:k8s.io/role/master', 'user:k8s.io/role/node', 'user:kubernetes.io/cluster/infotain-e2e.k8s.local', 'user:kubernetes.io/cluster/infotain-prod.k8s.local', 'user:kubernetes.io/created-for/pv/name', 'user:kubernetes.io/created-for/pvc/name', 'user:kubernetes.io/created-for/pvc/namespace', 'user:kubernetes.io/service-name', 'user:name', 'user:namespace', 'user:owner', 'user:project', 'user:resourcePrefix', 'user:role', 'user:server-name', 'user:snsTopicArn', 'user:stage', 'user:sub-product', 'user:workload-type']) data_type = { 'UsageStartDate': object, 'UsageEndDate': object, 'InvoiceID': object, 'ProductName': object, 'PayerAccountId': object, 'LinkedAccountId': object, 'UnBlendedCost': str, 'BlendedCost': str, 'ItemDescription': object, 'RecordType': object } def from_filename_get_datetime(filename: str): dt = filename[-7:] date = datetime.datetime.strptime(dt, "%Y-%m") days = calendar.monthrange(date.year, date.month) later1dt = date + datetime.timedelta(days[1] + 1) later1 = later1dt.strftime("%Y-%m-%d") n_days = calendar.monthrange(later1dt.year, later1dt.month) later2dt = later1dt + datetime.timedelta(n_days[1]) later2 = later2dt.strftime("%Y-%m-%d") return dt, later1, later2 def new_round(_float, _len=2) -> float: """ Parameters ---------- _float: _len: int, 指定四舍五入需要保留的小数点后几位数为_len Returns ------- type ==> float, 返回四舍五入后的值 """ try: if isinstance(_float, float): if 'e' in str(_float).lower(): return round(_float, _len) elif str(_float)[::-1].find('.') <= _len: return _float elif str(_float)[-1] == '5': return round(float(str(_float)[:-1] + '6'), _len) else: return round(_float, _len) elif isinstance(_float, str): _float = _float.replace(',', '') return new_round(float(_float), _len) else: return round(_float, _len) except Exception as e: print(_float, _len, e) alter_csv_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/alter_data/' data_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/' word_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/word/' info_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/info/' class CustomCsvByDay(object): def __init__(self, path, *args, **kwargs): super().__init__(*args, **kwargs) self.csv_list = list(path) self.credit = 0 self.account_dict = defaultdict(float) self.tax_dict = defaultdict(float) self.es_total_dict = dict() self.beforetax = 0 self.credit_dict = dict() self.es_total = 0 self.new_filename = None self.init_csv_list() def init_csv_list(self): self.new_filename = os.path.join( alter_csv_dir, '改_{}'.format( self.csv_list[0])) for i in range(len(self.csv_list)): self.csv_list[i] = os.path.join(data_dir, self.csv_list[i]) def data_mergence(self): df = pd.DataFrame() for _csv in self.csv_list: if _csv: csv_data = pd.read_csv( _csv, sep=',', encoding='utf-8', header=None, names=csv_columns2, dtype=data_type) df = pd.concat([df, csv_data], axis=0) df.reset_index(drop=True) return df @staticmethod def set_deleted_data(): del_dict = { 'RecordType': [ 'AccountTotal', 'StatementTotal', 'Rounding', 'InvoiceTotal'], 'ItemDescription': [ '11-11 promotion for new-signup 3', '11-11 promotion for new-signup', 'PD 7398', 'PD 7180', ], 'ProductName': 'Amazon Premium Support', 'PayerAccountId': 'PayerAccountId' } return del_dict def create_new_csv(self): df = self.data_mergence() # # LinkedLineItem和CBRounding数据 data = df[((df['RecordType'] == 'LinkedLineItem') & (df['ItemDescription'] != '11-11 promotion for new-signup 3') & (df['ItemDescription'] != '11-11 promotion for new-signup') & (df['ItemDescription'] != 'PD 7398') & (df['ItemDescription'] != 'PD 7180') & (df['ProductName'] != 'Amazon Premium Support')) # | (df['RecordType'] == 'CBRounding') ] # 获取AWS Premium Support数据 es_data = df[(df['RecordType'] == 'LinkedLineItem') & (df['ProductName'] == 'Amazon Premium Support')] # 获取credit数据 credit_data = df[(df['RecordType'] == 'LinkedLineItem') & ((df['ItemDescription'] == '11-11 promotion for new-signup 3') | (df['ItemDescription'] == '11-11 promotion for new-signup') # | (df['ItemDescription'] == 'PD 7398') | (df['ItemDescription'] == 'PD 7398')) ] # 获取AccountTotal的CostBeforeTax account_data = df[df['RecordType'] == 'AccountTotal'] # InvoiceTotal 数据 # invoice_data = df[df['RecordType'] == 'InvoiceTotal'] # StatementTotal 数据 statement_data = df[df['RecordType'] == 'StatementTotal'] self.get_credit(credit_data) self.get_account(data) self.get_estotal(es_data) self.get_estotal_dict() # 获取credit_dict self.get_credit_dict() insert_data = self.insert_data_to_csv(data) es_total_data = self.insert_es_data_to_csv(data) tax_data = self.insert_tax_data_to_csv(data) account_data = self.alter_account(account_data) statement_data = self.alter_statement(statement_data) self.save_lineitem_data() self.save_alter_data_to_csv(insert_data) self.save_alter_data_to_csv(es_total_data) self.save_alter_data_to_csv(tax_data) self.save_alter_data_to_csv(account_data) self.save_alter_data_to_csv(statement_data) print('ES总额为{}'.format(self.es_total)) print('Credit总额为{}'.format(self.credit)) print('账号消费总额为{}'.format(self.beforetax)) print('update finish') def get_estotal_dict(self): for k, v in self.account_dict.items(): self.es_total_dict[k] = (v / self.beforetax * self.es_total) def get_estotal(self, data): for i in range(data.shape[0]): self.es_total += float(data.iloc[i]['CostBeforeTax']) def alter_account(self, data): data = data.drop_duplicates( subset=['LinkedAccountId'], keep='first', inplace=False) data = data.reset_index(drop=True) for i in range(data.shape[0]): credit = self.credit_dict.get(data.iloc[i]['LinkedAccountId'], 0) costbeforetax = self.account_dict.get( data.iloc[i]['LinkedAccountId'], 0) es_total = self.es_total_dict.get( data.iloc[i]['LinkedAccountId'], 0) data.loc[i, 'CostBeforeTax'] = '{:.6f}'.format( costbeforetax + es_total) data.loc[i, 'Credits'] = '{:.6f}'.format(new_round(credit, 2)) data.loc[i, 'TaxAmount'] = '{:.6f}'.format( (costbeforetax + credit + es_total) * 0.06) data.loc[i, 'TotalCost'] = '{:.6f}'.format( costbeforetax + credit + es_total + (costbeforetax + credit + es_total) * 0.06) return data def alter_statement(self, data): data = data.drop_duplicates( subset=['PayerAccountId'], keep='first', inplace=False) data = data.reset_index(drop=True) for i in range(data.shape[0]): data.loc[i, 'ItemDescription'] = data.iloc[i]['ItemDescription'] data.loc[i, 'CurrencyCode'] = 'CNY' data.loc[i, 'CostBeforeTax'] = '{:.6f}'.format( self.beforetax + self.es_total) data.loc[i, 'Credits'] = '{:.2f}'.format(self.credit) data.loc[i, 'TaxAmount'] = '{:.6f}'.format( (self.beforetax + self.es_total + self.credit) * 0.06) data.loc[i, 'TotalCost'] = '{:.6f}'.format( (self.beforetax + self.es_total + self.credit) * 1.06) return data @staticmethod def get_fill_data(data): data = data.loc[data['LinkedAccountId'].notna()] linked_set = set(data['LinkedAccountId']) insert_data = defaultdict(list) for i in linked_set: linked_data = data[(data['RecordType'] == 'LinkedLineItem') & ( data['LinkedAccountId'] == i)].iloc[0] insert_data['InvoiceID'].append(linked_data['InvoiceID']) insert_data['PayerAccountId'].append(linked_data['PayerAccountId']) insert_data['LinkedAccountId'].append(i) insert_data['RecordType'].append('LinkedLineItem') insert_data['BillingPeriodStartDate'].append( linked_data['BillingPeriodStartDate']) insert_data['BillingPeriodEndDate'].append( linked_data['BillingPeriodEndDate']) insert_data['InvoiceDate'].append(linked_data['InvoiceDate']) insert_data['UsageStartDate'].append(linked_data['UsageStartDate']) insert_data['UsageEndDate'].append(linked_data['UsageEndDate']) insert_data['CurrencyCode'].append(linked_data['CurrencyCode']) return insert_data def insert_data_to_csv(self, data): fill_data = self.get_fill_data(data) for f in fill_data['LinkedAccountId']: credit = self.credit_dict.get(f, 0) fill_data['CostBeforeTax'].append(0) fill_data['Credits'].append(new_round(credit)) fill_data['TaxAmount'].append(new_round(credit * 0.06)) if new_round(credit * 0.06) == 0: fill_data['TaxType'].append('None') else: fill_data['TaxType'].append('VAT') fill_data['TotalCost'].append(new_round(credit + credit * 0.06)) data = pd.DataFrame(fill_data, columns=list(data.columns)) return data def insert_es_data_to_csv(self, data): fill_data = self.get_fill_data(data) for f in fill_data['LinkedAccountId']: es_total = self.es_total_dict.get(f, 0) fill_data['CostBeforeTax'].append(new_round(es_total)) fill_data['Credits'].append(0) fill_data['TaxAmount'].append(new_round(es_total * 0.06)) if new_round(es_total * 0.06) == 0: fill_data['TaxType'].append('None') else: fill_data['TaxType'].append('VAT') fill_data['TotalCost'].append( new_round(es_total + es_total * 0.06)) data = pd.DataFrame(fill_data, columns=list(data.columns)) return data def insert_tax_data_to_csv(self, data): fill_data = self.get_fill_data(data) for f in fill_data['LinkedAccountId']: tax_total = self.tax_dict.get(f, 0) es_tax = new_round(self.es_total_dict.get(f, 0) * 0.06) credit_tax = new_round(self.credit_dict.get(f, 0) * 0.06) account_tax = new_round((self.account_dict.get(f, 0) + self.credit_dict.get(f, 0) + self.es_total_dict.get(f, 0)) * 0.06, 6) fill_data['CostBeforeTax'].append(0) fill_data['Credits'].append(0) fill_data['TaxAmount'].append( '{:.2f}'.format(account_tax - tax_total - es_tax - credit_tax)) if new_round(account_tax - tax_total - es_tax - credit_tax * 0.06) == 0: fill_data['TaxType'].append('None') else: fill_data['TaxType'].append('VAT') fill_data['TotalCost'].append( '{:.2f}'.format(account_tax - tax_total - es_tax - credit_tax)) data = pd.DataFrame(fill_data, columns=list(data.columns)) return data def get_credit(self, data): for i in range(data.shape[0]): self.credit += float(data.iloc[i]['Credits']) def get_account(self, data): for i in range(data.shape[0]): self.account_dict[data.iloc[i]['LinkedAccountId'] ] += float(data.iloc[i]['CostBeforeTax']) self.tax_dict[data.iloc[i]['LinkedAccountId'] ] += float(data.iloc[i]['TaxAmount']) self.beforetax += float(data.iloc[i]['CostBeforeTax']) def get_credit_dict(self): for k, v in self.account_dict.items(): self.credit_dict[k] = v / self.beforetax * self.credit @staticmethod def save_csv(path, data): data.to_csv(path, mode='a', index=False, encoding='UTF-8', header=0) @staticmethod def save_csv_first(path, data): data.to_csv(path, mode='w', encoding='UTF-8', index=False) def save_lineitem_data(self): del_dict = self.set_deleted_data() with open(self.new_filename, 'w', encoding='UTF-8') as w: w.write('"' + '","'.join(list(csv_columns2)) + '"' + '\n') num = 0 for path in self.csv_list: with open(path, 'r', encoding='UTF-8') as r: while True: line = r.readline() if not line: break data = line.split('","') if len(data) < 2: continue line_dict = dict(zip(list(csv_columns2), data)) # 通过 InvoiceID判断时会有问题 if line_dict['RecordType'] in del_dict['RecordType']: continue elif line_dict['ItemDescription'] in del_dict['ItemDescription']: continue elif line_dict['ProductName'] == del_dict['ProductName']: continue elif line_dict['PayerAccountId'] == del_dict['PayerAccountId']: continue w.write('","'.join(data)) num += 1 # print(num) print('LineItem数据加载完成') def save_alter_data_to_csv(self, alter_data): _data = alter_data.fillna('').to_dict(orient='list') with open(self.new_filename, 'a', encoding='utf-8') as a: dbr_dict = {v: int(k) for k, v in enumerate(list(csv_columns2))} for i in range(len(list(_data.values())[0])): empty_data = ["" for _ in range(len(list(csv_columns2)))] for k, v in _data.items(): empty_data[dbr_dict[k]] = v[i] a.write(','.join('"{}"'.format(x) for x in empty_data) + '\n') class GenerateWord(object): def __init__(self, *args, **kwargs): self.args = args self.kwargs = kwargs def my_sum(self, data_dict, loc, length=2): # 模板内使用 count = 0.0 for item in data_dict.values(): x = self.reverse_fill(item[loc]) count += x a = new_round(count, length) return a @staticmethod def reverse_fill(value_str: float, length=6): return new_round(value_str, length) @staticmethod def get_region_display(data: dict): for i in data.keys(): if i == 'cn-north-1': data['北京区资源费'] = data.pop(i) elif i == 'cn-northwest-1': data['宁夏区资源费'] = data.pop(i) return data @staticmethod def get_date_array(_date: str): _array = monthrange(int(_date[:4]), int(_date[5:7])) return [ "%s-%s-01" % (_date[:4], _date[5:7]), "%s-%s-%2d" % (_date[:4], _date[5:7], _array[1]), ] @staticmethod def fill(value, length=2): temp = "{:,.%sf}" % length if isinstance(value, str): value = value.replace(',', '') return temp.format(float(value)) elif isinstance(value, int) or isinstance(value, float): return temp.format(value) else: return temp.format(0) @staticmethod def display_time(day_str, str_fmt="%Y-%m-%d %H:%M:%S"): # 设置中文编码格式 # import locale # locale.setlocale(locale.LC_CTYPE, 'chinese') zh_fmt = '%Y年%m月%d日' # en_fmt = '%B %d, %Y' return time.strftime(zh_fmt, time.strptime(str(day_str), str_fmt)) def get_currency(self): if self.kwargs.get("currency") == 'USD': return "$" return "¥" def get_temple_number(self, number): data = [ self.fill(number * 1.06), self.fill(number), self.fill(0), self.fill(number * 0.06), ] return data @staticmethod def check_path_creat(path): if not os.path.exists(path): os.mkdir(path) class GenerateWordFromCSV(GenerateWord): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.csv_list = [] self.total = 0 self.credit_total = 0 self.es_total = 0 self.linked_dict = dict() self.filepath = list(self.kwargs['csv_list'])[0][:-4] self.init_csv_list() def init_csv_list(self): for i in self.kwargs['csv_list']: self.csv_list.append(os.path.join(data_dir, i)) def data_mergence(self): df = pd.DataFrame() for csv in self.csv_list: csv_data = pd.read_csv( csv, sep=',', encoding='utf-8', header=None, names=csv_columns2, dtype=data_type) df = pd.concat([df, csv_data], axis=0) df.reset_index(drop=True) return df def get_data_form_csv(self): df = self.data_mergence() df['CostBeforeTax'] = df['CostBeforeTax'].apply( pd.to_numeric, errors='coerce').fillna(0.0) df['Credits'] = df['Credits'].apply( pd.to_numeric, errors='coerce').fillna(0.0) df['TaxAmount'] = df['TaxAmount'].apply( pd.to_numeric, errors='coerce').fillna(0.0) df['TotalCost'] = df['TotalCost'].apply( pd.to_numeric, errors='coerce').fillna(0.0) lineitem_df = df[((df['RecordType'] == 'LinkedLineItem') & (df['ItemDescription'] != '11-11 promotion for new-signup 3') & (df['ItemDescription'] != '11-11 promotion for new-signup') & (df['ItemDescription'] != 'PD 7180') & (df['ItemDescription'] != 'PD 7398') & (df['ProductName'] != 'Amazon Premium Support'))] es_df = df[(df['RecordType'] == 'LinkedLineItem') & ( df['ProductName'] == 'Amazon Premium Support')] credit_data = df[(df['RecordType'] == 'LinkedLineItem') & ((df['ItemDescription'] == '11-11 promotion for new-signup 3') | (df['ItemDescription'] == '11-11 promotion for new-signup') | (df['ItemDescription'] == 'PD 7180') | (df['ItemDescription'] == 'PD 7398')) ] self.get_es_total(es_df) self.get_credit_total(credit_data) self.get_linkedaccount_dict(lineitem_df) print('消费总额为{}'.format(self.total)) print('Credit总额为{}'.format(self.credit_total)) print('ES总额为{}'.format(self.es_total)) self.save_mapping_date_to_csv() for i in self.linked_dict: self.linked_dict[i]['AWS Support (Enterprise)'] = \ self.linked_dict[i]['TotalCost'] / self.total * self.es_total self.linked_dict[i]['AWS Credits'] = self.linked_dict[i]['TotalCost'] / \ self.total * self.credit_total self.create_word(i) def get_credit_total(self, data): for i in range(data.shape[0]): self.credit_total += float(data.iloc[i]['Credits']) def get_es_total(self, data): for i in range(data.shape[0]): self.es_total += float(data.iloc[i]['CostBeforeTax']) def get_linkedaccount_dict(self, data): linked_set = set(data['LinkedAccountId']) for i in range(data.shape[0]): self.total += float(data.iloc[i]['CostBeforeTax']) for i in linked_set: linked_data = data[data['LinkedAccountId'] == i] product_set = set(data['ProductName']) linked_dict = { 'InvoiceID': linked_data.iloc[0]['InvoiceID'], 'TotalCost': linked_data['CostBeforeTax'].sum(), } for j in product_set: linked_dict[j] = linked_data[linked_data['ProductName'] == j]['CostBeforeTax'].sum() if i in self.linked_dict.keys(): for k, v in linked_dict.items(): if not k == 'InvoiceID': product_num = self.linked_dict[i].get(k, 0) self.linked_dict[i][k] = float(product_num) + float(v) else: self.linked_dict[i] = linked_dict @staticmethod def get_template(): return os.path.join(info_dir, 'china-template-2.docx') def create_word(self, linked): filedate, noticedate, due_data = from_filename_get_datetime(self.filepath) result = self.linked_dict.get(linked, None) if not result: print('{}数据不存在'.format(linked)) invoiceid = result.pop('InvoiceID') totalcost = result.pop('TotalCost') product_code_dict = defaultdict() for k, v in result.items(): product_code_dict[k] = super().get_temple_number(v) totalcost_li = self.get_temple_number(totalcost) date = self.get_date_array(filedate) for i in range(len(date)): date[i] = self.display_time(date[i], str_fmt="%Y-%m-%d") xx = self.get_template() template = DocxTemplate(xx) content = { 'storage': self.get_storage(linked), 'linkedid': linked, 'noticenumber': invoiceid, 'noticedate': noticedate, 'currency': self.get_currency(), 'region_display': '北京区资源费', 'totalcost': totalcost_li, 'product_code_dict': product_code_dict, 'date': date, 'workday': due_data, } file_name = '{}{}_AWS账单{}.docx'.format( self.get_storage(linked), linked, filedate) doc_file_path = os.path.join(word_dir, self.filepath) self.check_path_creat(doc_file_path) doc_file_name = os.path.join(doc_file_path, file_name) template.render(context=content) template.save(doc_file_name) print('{}{}_AWS账单{}.docx 生成完成'.format(self.get_storage(linked), linked, filedate)) @staticmethod def get_storage(linkedid): for k, v in bwm_map.items(): if linkedid in v: return k return '' def get_temple_number(self, num): data = [ self.fill( ((num / self.total * self.es_total) + num + num / self.total * self.credit_total) * 1.06), self.fill( (num / self.total * self.es_total) + num), self.fill( num / self.total * self.credit_total), self.fill( ((num / self.total * self.es_total) + num + num / self.total * self.credit_total) * 0.06), ] return data def save_mapping_date_to_csv(self): filename = os.path.join( alter_csv_dir, '{}_mapping.csv'.format(self.filepath)) with open(filename, 'w', encoding='utf-8') as w: w.write(','.join('"{}"'.format(x) for x in ['mapping', 'LinkedAccountId', 'UnBlendedCost']) + '\n') for key, value in self.linked_dict.items(): mapping = '' for k, v in bwm_map.items(): if key in v: mapping = k UnBlendedCost = '{:.6f}'.format((value['TotalCost'] * 1.06) + (value['TotalCost'] / self.total * self.es_total) + (value['TotalCost'] / self.total * self.es_total * 0.06) + (value['TotalCost'] / self.total * self.credit_total) + (value['TotalCost'] / self.total * self.credit_total * 0.06)) w.write(','.join('"{}"'.format(x) for x in [mapping, key, UnBlendedCost]) + '\n') if __name__ == '__main__': # 获取输入执行时输入参数 month = '2024-06' if len(sys.argv) > 1: month = sys.argv[1] print('执行月份:' + month) csv_list = ['785781180852-aws-cost-allocation-ACTS-'+month+'.csv', '785781180852-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv',] # 1.1 循环附件名 for csv in csv_list: # print(os.path.isfile('./'+csv)) # 1.1.1附件不存在,则拉取 if not os.path.isfile('./' + csv): print('附件不存在:', csv) auto_download_file_form_s3([csv]) csv_typle = ( '785781180852-aws-cost-allocation-ACTS-'+month+'.csv', '785781180852-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv', ) CustomCsvByDay(csv_typle).create_new_csv() GenerateWordFromCSV(csv_list=csv_typle).get_data_form_csv()