# -*- coding: utf-8 -*- import os from collections import defaultdict import pandas as pd import numpy as np import openpyxl import time from pandas import Index from docxtpl import DocxTemplate from calendar import monthrange import datetime import calendar import logging import platform plat = platform.system().lower() # -*- 跑2023-01月脚本 02-06需求 -*- ''' 戴姆勒脚本变更代码 1. 处理 ES,EDP 文件; 2. 服务费文件单独生成(原修改部分 excel); 3. DGRC 变更为 MBGC;DGRC-Rollout 变更为 MBGC-Rollout; 4. 原修改部分 excel 命名为:X 月 Sinnet Support-payerID(4 月 Sinnet Support- 563347375094); 5. Payer 账号:010265887827,加一行 EDP “first contract year commitment shortfall”, 金额:¥1,654,160.37 (税前)(参考之前月份加行的逻辑) ''' ############初始化目录地址 alter_dbr_dir = '/server/billing/daimler/alter_data' data_dir = '/server/billing/daimler/data' merge_file_dir = '/server/billing/daimler/merge_data/' word_dir = '/server/billing/daimler/word' info_dir = '/server/billing/daimler/info' costs_dir = '/server/billing/daimler/costs' ############初始化变量参数 #####税前扣除 bs = float(-6621.66) # Refund # 增值税率 rate = float(1.06) # 增值税后金额 金额*rate(税率) bsAmountTax = (float(bs) * float(rate)) # 增值税得出的金额. bsTax = float(bsAmountTax) - bs # 增加金额汇总 taxSum = bsTax addSum = bs amountTaxSum = bsAmountTax # 扣除主账号 editPayerAccountId = '1010101013321312' # 扣除子账号 editLinkedAccountId = '131008855685915' editLinkedAccountId2 = '101010101010' daimler_map = { "MBGC": { '231703577464', '231732518549', '233198924213', '233281076386', '315154767884', '315166436895', '318222460809', '318251735617', '319093137346', '319095105578', '319131705619', '032861711361', '032968694919', '033176839117', '412333074862', '412592672283', '413185959261', '571509464092', '571914751254', '572500455038', '604214497778', '605281233780', '605492383956', '606333019050', '607073490803', '607215769011', '607251498050', '745178511853', '745186985354', '745404617525', '843064179036', '843146470903', '843403612003', '938026107066', '959875115713', '846363994602', '847121021275', '847349672033', '847535715113', '787753559055', '157425357330', '157571289045', '222510028891', '222726652783', '394747274994', '641166177863', '641228128543', '150264918770', '150265876719', '154308107197', '154344974850', '154396341553', '154377832036', # ===========2022-12 '000062495386', '000109320591', '016043598990', '187021522403', '206944717367', '207006135149', # ============'+defaultMonth+' '155851258827' }, "BMBS": { '825095481624', '673531903747', '45016790339', '005952357992', '006618809156', '008855685915', '099609831577', '149729312152', '158107609377', '158515190244', '158636931693', '160478048914', '161725851224', '179818971150', '180063110126', '180280548979', '259625653064', '263249970047', '263902424712', '264554558165', '381109879703', '381965952467', '382093053518', '420190688017', '420235015001', '420347211434', '468973200212', '470975648111', '471722715986', '606431034865', '606635571449', '614449149933', '614450235268', '662090766598', '662661601741', '680011596858', '693339510868', '693619437440', '693700422527', '694079957533', '694096809975', '694098823346', '720819555472', '720826711875', '720959775198', '721033167150', '721152002098', '721305584708', '731145591192', '738568990634', '738993122676', '824698755435', '824850862427', '825095481624', '830723988525', '830765046968', '833723158764', '845876480170', '846152349690', '846800936473', '864364323687', '864518602649', '864710874555', '902795390617', '939002646100', '965407060405', '965453987633', '965517804909', '060167615401', '076513887860', '420626856655', '421314535486', '329076050796', '329159661224', '376544640131', '376560970070', '585581760571', '586490397677', '836190164454', '836212206490', '444387297159', '444466323067', '551526584239', '551543229458', '603217519688', '623235613882', '633869915042', '003518598515', '003606583618', '634001137982', '721172726690', '723527083861', '315650031369', '315666852707', '342121670255', '342146215747', '365617091351', '365678264806', '814940158343', '815152760759', '815217109270', '815218645700', '815470951043', '815623564417', '815665691798', '815773864716', '815919514877', '816040880523', "007826675569", "007933598034", "008072868499", "008177890141", "008228029835", "008283276620", "008287392585", "008473233401", "008516644951", "008528838699", "010265887827", "018669973095", "019723979408", "019784009857", "019866036447", "019962796551", "019997264032", "020047295228", "020339118543", "020498088730", "020596821809", "020868571044", "021034476631", "021164366546", "021202811639", "021577333626", "021667202555", "021821803133", "021908435203", "022176271466", "022248774214", "023812125476", "023901143212", "023945165201", "023962011534", "024002785254", "024107434926", "024208508347", "024236970373", "024253823497", "024313569899", "024359695628", "024412100834", "024483855859", "024563654013", "024633815385", "024667173878", "024830901242", "024838505922", "024853602879", "024874476753", "045025227496" }, "BMBS-Rollout": { '111035368608', '143293953769', '143419428863' }, "MBCL": { '115032441317', '115159631000' }, 'MBGC-Rollout': { '157701174715', '157708043020', '157784268223', '157844458947', '157876968059', '157972004829', '158027931531', '767680705323', '778776044487', '778911054371', '779093734419', '779265673788', '779378744728', '779435304058', '779482782114', '779517320026', '779563825200' } } # csv表头字段 dbr_index = Index(['InvoiceID', 'PayerAccountId', 'LinkedAccountId', 'RecordType', 'RecordId', 'ProductName', 'RateId', 'SubscriptionId', 'PricingPlanId', 'UsageType', 'Operation', 'AvailabilityZone', 'ReservedInstance', 'ItemDescription', 'UsageStartDate', 'UsageEndDate', 'UsageQuantity', 'BlendedRate', 'BlendedCost', 'UnBlendedRate', 'UnBlendedCost', 'ResourceId'], dtype='object') # 数据类型 data_type = { 'UsageStartDate': np.object, 'UsageEndDate': np.object, 'InvoiceID': np.object, 'ProductName': np.object, 'PayerAccountId': np.object, 'LinkedAccountId': np.object, 'UnBlendedCost': str, 'BlendedCost': str, 'ItemDescription': np.object, 'RecordType': np.object } ############方法 def check_path_creat(path): """ 检查路径是否存在,不存在就创建 :param path: 文件路径 :return: """ if not os.path.exists(path): os.mkdir(path) def del_file_form_path(filename: str, path: str): if filename in os.listdir(path): os.remove(os.path.join(merge_file_dir, filename)) def new_round(_float, _len=2) -> float: """ Parameters ---------- _float: _len: int, 指定四舍五入需要保留的小数点后几位数为_len Returns ------- type ==> float, 返回四舍五入后的值 """ try: if isinstance(_float, float): if 'e' in str(_float).lower(): return round(_float, _len) elif str(_float)[::-1].find('.') <= _len: return _float elif str(_float)[-1] == '5': return round(float(str(_float)[:-1] + '6'), _len) else: return round(_float, _len) elif isinstance(_float, str): _float = _float.replace(',', '') return new_round(float(_float), _len) else: return round(_float, _len) except Exception as e: print(_float, _len, e) def from_filename_get_datetime(filename: str): dt = filename[-7:] date = datetime.datetime.strptime(dt, "%Y-%m") days = calendar.monthrange(date.year, date.month) later1dt = date + datetime.timedelta(days[1] + 1) later1 = later1dt.strftime("%Y-%m-%d") n_days = calendar.monthrange(later1dt.year, later1dt.month) later2dt = later1dt + datetime.timedelta(n_days[1]) later2 = later2dt.strftime("%Y-%m-%d") return dt, later1, later2 class GenerateWord(object): def __init__(self, *args, **kwargs): self.args = args self.kwargs = kwargs def my_sum(self, data_dict, loc, length=2): # 模板内使用 count = 0.0 for item in data_dict.values(): x = self.reverse_fill(item[loc]) count += x a = new_round(count, length) return a @staticmethod def reverse_fill(value_str: float, length=6): return new_round(value_str, length) @staticmethod def get_bank(**kwargs): info_map = { 'ch bj': os.path.join(info_dir, 'cn_bank_bj.txt'), 'ch hk': os.path.join(info_dir, 'cn_bank_hk.txt'), 'en bj': os.path.join(info_dir, 'en_bank_bj.txt'), 'en hk': os.path.join(info_dir, 'en_bank_hk.txt') } return info_map.get(kwargs.get('bank')) or info_map['ch bj'] @staticmethod def get_region_display(data: dict): for i in data.keys(): if i == 'cn-north-1': data['北京区资源费'] = data.pop(i) elif i == 'cn-northwest-1': data['宁夏区资源费'] = data.pop(i) return data @staticmethod def get_date_array(_date: str): _array = monthrange(int(_date[:4]), int(_date[5:7])) return [ "%s-%s-01" % (_date[:4], _date[5:7]), "%s-%s-%2d" % (_date[:4], _date[5:7], _array[1]), ] @staticmethod def fill(value, length=2): temp = "{:,.%sf}" % length if isinstance(value, str): value = value.replace(',', '') return temp.format(float(value)) elif isinstance(value, int) or isinstance(value, float): return temp.format(value) else: return temp.format(0) @staticmethod def display_time(day_str, str_fmt="%Y-%m-%d %H:%M:%S"): # 设置中文编码格式 # import locale # locale.setlocale(locale.LC_CTYPE, 'chinese') zh_fmt = '%Y年%m月%d日' # en_fmt = '%B %d, %Y' return time.strftime(zh_fmt, time.strptime(str(day_str), str_fmt)) def get_currency(self): if self.kwargs.get("currency") == 'USD': return "$" return "¥" # 得出模板上的金额 def get_temple_number(self, number): data = [ self.fill(number * 1.06), self.fill(number), self.fill(0), self.fill(number * 0.06), ] return data # def get_temple_number_number2(self, number, number2): data = [ self.fill((number * 1.06) + (number2 * 1.06)), self.fill(number + number2), self.fill(0), self.fill((number * 0.06) + (number2 * 1.06) - number2), ] return data # 需要单独处理 得出模板上的金额 加refund def get_temple_number_refund(self, number): data = [ self.fill((number * 1.06) + amountTaxSum), # 标题行 self.fill(number + addSum), # 服务费 self.fill(0), self.fill((number * 0.06) + taxSum), # 增值税 ] return data @staticmethod def check_path_creat(path): if not os.path.exists(path): os.mkdir(path) class DaimlerGenerateWord(GenerateWord): def __init__(self, linkedid, result, filename, *args, **kwargs): super().__init__(*args, **kwargs) self.linkedid = linkedid self.result = result self.filename = filename self.link_dict = kwargs.get('link_dict', None) # 创建 word文件 def create_word(self): filedate, noticedate, due_data = from_filename_get_datetime(self.filename) noticenumber = self.result.pop('InvoiceID') totalcost = self.result.pop('TotalCost') product_code_dict = defaultdict() # order_dict = dict(sorted(self.result.items(), key=lambda t: t[1], reverse=True)) # for k, v in order_dict.items(): for k, v in self.result.items(): product_code_dict[k] = self.get_temple_number(v) totalcost_li = self.get_temple_number(totalcost) # 脚本特点改动处 self.linkedid == editLinkedAccountId 去把总数的计算 带上refund if self.linkedid == editLinkedAccountId or self.linkedid == editLinkedAccountId2: if editPayerAccountId in self.filename: totalcost_li = self.get_temple_number_refund(totalcost) # 脚本特点改动处 date = self.get_date_array(filedate) for i in range(len(date)): date[i] = self.display_time(date[i], str_fmt="%Y-%m-%d") bank_info_list = [] with open(self.get_bank(), 'r', encoding='utf8') as f: for line in f.readlines(): if "{{company_email}}" in line: line = line.replace( '{{company_email}}', self.kwargs.get( 'company_email', '')) bank_info_list.append(line.strip('\n')) xx = self.get_template() template = DocxTemplate(xx) link_data = [{ 'linkedid': self.linkedid, 'totalcost': totalcost_li, 'product_code_dict': product_code_dict, }, { 'linkedid': self.linkedid, 'totalcost': totalcost_li, 'product_code_dict': product_code_dict, } ] content = { 'storage': self.get_storage(), 'linkedid': self.linkedid, 'noticenumber': noticenumber, 'noticedate': noticedate, 'currency': self.get_currency(), 'region_display': '北京区资源费', 'totalcost': totalcost_li, 'product_code_dict': product_code_dict, 'date': date, 'workday': due_data, 'bank_info_list': bank_info_list, 'link_data': link_data, } file_name = '{}{}_AWS账单{}.docx'.format( self.get_storage(), self.linkedid, filedate) doc_file_path = os.path.join(word_dir, self.filename) self.check_path_creat(doc_file_path) doc_file_name = os.path.join(doc_file_path, file_name) template.render(context=content) template.save(doc_file_name) print( '{}{}_AWS账单{}.docx 生成完成'.format( self.get_storage(), self.linkedid, filedate)) return [doc_file_name] # 创建 word文件(主账号汇总) def create_word_total(self): filedate, noticedate, due_data = from_filename_get_datetime(self.filename) noticenumber = self.result.pop('InvoiceID') totalcost = self.result.pop('TotalCost') product_code_dict = defaultdict() # order_dict = dict(sorted(self.result.items(), key=lambda t: t[1], reverse=True)) # for k, v in order_dict.items(): for k, v in self.result.items(): product_code_dict[k] = self.get_temple_number(v) totalcost_li = self.get_temple_number(totalcost) # 脚本特点改动处 self.linkedid == editLinkedAccountId 去把总数的计算 带上refund if self.linkedid == editLinkedAccountId or self.linkedid == editLinkedAccountId2: if editPayerAccountId in self.filename: totalcost_li = self.get_temple_number_refund(totalcost) # 脚本特点改动处 date = self.get_date_array(filedate) for i in range(len(date)): date[i] = self.display_time(date[i], str_fmt="%Y-%m-%d") bank_info_list = [] with open(self.get_bank(), 'r', encoding='utf8') as f: for line in f.readlines(): if "{{company_email}}" in line: line = line.replace( '{{company_email}}', self.kwargs.get( 'company_email', '')) bank_info_list.append(line.strip('\n')) xx = self.get_template_total() template = DocxTemplate(xx) link_data = [{ 'linkedid': self.linkedid, 'totalcost': totalcost_li, 'product_code_dict': product_code_dict, }, { 'linkedid': self.linkedid, 'totalcost': totalcost_li, 'product_code_dict': product_code_dict, } ] content = { 'storage': self.get_storage(), 'linkedid': self.linkedid, 'noticenumber': noticenumber, 'noticedate': noticedate, 'currency': self.get_currency(), 'region_display': '北京区资源费', 'totalcost': totalcost_li, 'product_code_dict': product_code_dict, 'date': date, 'workday': due_data, 'bank_info_list': bank_info_list, 'link_data': link_data, } file_name = '{}{}_AWS账单{}.docx'.format( self.get_storage(), self.linkedid, filedate) doc_file_path = os.path.join(word_dir) self.check_path_creat(doc_file_path) doc_file_name = os.path.join(doc_file_path, file_name) template.render(context=content) template.save(doc_file_name) print( '{}{}_AWS账单{}.docx 生成完成'.format( self.get_storage(), self.linkedid, filedate)) return [doc_file_name] # 获取word的模板 @staticmethod def get_template(): return os.path.join(info_dir, 'china-template.docx') @staticmethod def get_template_total(): return os.path.join(info_dir, 'china-template_payerTotal.docx') def get_storage(self): for k, v in daimler_map.items(): if self.linkedid in v: return k return '' class CustomCsvByHour(object): def __init__(self, pathlist, *args, **kwargs): super().__init__(*args, **kwargs) self.costs_maps = defaultdict() self.path = [pathlist] self.finally_dict = defaultdict(float) self.sourcename = None self.es_iid = None self.es_total = 0 self.edp_iid = '' self.edp_total = 0 self.exclude_id = list() self.unblendedcost = 0 self.payer = None self.accountdt = pd.DataFrame() self.statementdt = pd.DataFrame() self.name = None self.filename = None self.partname = None self.filepath = None self.word_dict = defaultdict() self.init_edp_dict = defaultdict() self.es_data = None # self.dbr_index = None def set_deleted_data(self): del_dict = { 'InvoiceID': self.exclude_id, 'RecordType': ['AccountTotal', 'StatementTotal', 'Rounding'], 'ItemDescription': 'AWS Solution Provider Program Discount', 'PayerAccountId': 'PayerAccountId' } return del_dict # 初始化导出的文件名称 def init_dbr_fill_data(self, path): self.sourcename = path self.filename = '改_{}'.format(path) self.partname = defaultMonth + '月Sinnet Support-{}'.format(path) def get_dbr_data(self, path, name): self.payer = name.split('-')[0] self.filepath = path self.name = name self.init_dbr_fill_data(name) check_path_creat(os.path.join(alter_dbr_dir, self.sourcename[:-4])) es_rule = 'Enterprise Support for month of' print(path) reader = pd.read_csv( path, iterator=True, sep=',', chunksize=10000, header=None, names=dbr_index, dtype=data_type, low_memory=False) num = 0 for result in reader: word_dict = defaultdict(dict) _edp_dict = defaultdict(dict) num += 1 # if '563646727715' in self.filepath: # result = result[~result['ItemDescription'].str.contains('credit')] # result = result[~result['ItemDescription'].str.contains('Credit')] # result = result[~result['ItemDescription'].str.contains('Credits')] # result = result[~((result['ItemDescription'].astype(str).str.contains('credit') | # result['ItemDescription'].astype(str).str.contains('Credit')))] # '+defaultMonth+'-15 新加,不要dbr里的EDP Discount,RVD Discount. # result = result[result['ProductName'].str.contains('EDP Discount') == False] # result = result[result['ProductName'].str.contains('RVD Discount') == False] result['UnBlendedCost'] = result['UnBlendedCost'].apply(pd.to_numeric, errors='coerce').fillna(0.0) # result['ItemDescription'] = result['ItemDescription'].fillna('') # 不是7827的账号 忽略SPPD # if not '010265887827' in self.filepath: # data = result[~result['ItemDescription'].str.contains('AWS Solution Provider Program Discount')] if '010265887827' in self.filepath: data = result else: # data = result[~result['ItemDescription'].str.contains('AWS Solution Provider Program Discount')] data = result[(~result['ItemDescription'].str.contains('AWS Solution Provider Program Discount')) | (~result['ItemDescription'].str.contains('AWS SOLUTION PROVIDER PROGRAM DISCOUNT'))] # data = data[~data['ItemDescription'].str.contains('credit')] # data 不要 ItemDescription 是 POC 6337的. data = data[(~data['ItemDescription'].str.contains('POC 6337')) | ~data['ItemDescription'].str.contains( 'poc 6337')] # 定义asppd_data 要 ItemDescription 是 AWS Solution Provider Program Discount 或 AWS SOLUTION PROVIDER PROGRAM DISCOUNT 的. asppd_data = result[(result['ItemDescription'].str.contains('AWS Solution Provider Program Discount')) | (result['ItemDescription'].str.contains('AWS SOLUTION PROVIDER PROGRAM DISCOUNT'))] # 定义exclude_data 是 data中 ProductName == Amazon Premium Support 的数据 exclude_data = data[data['ProductName'] == 'Amazon Premium Support'] # 定义spp_data 是 data中 ItemDescription == Billing error - SPP overdiscount issued for November period 的数据 spp_data = data[data['ItemDescription'] == 'Billing error - SPP overdiscount issued for November period'] # self.exclude_id 追加 asppd_data中的InvoiceID. # 不是7827的排除掉asppd_data里的invoiceid if not '010265887827' in self.filepath: self.exclude_id.extend(list(set(asppd_data['InvoiceID']))) # self.exclude_id 追加 exclude_data中的InvoiceID. self.exclude_id.extend(list(exclude_data['InvoiceID'])) # self.exclude_id 追加 spp_data中的InvoiceID. self.exclude_id.extend(list(spp_data['InvoiceID'])) # 取edp # exclude_data中ItemDescription=='Enterprise Program Discount' 不为空 的情况下. if not exclude_data[exclude_data['ItemDescription'] == 'Enterprise Program Discount'].empty: # self.edp_total 是 exclude_data中ItemDescription=='Enterprise Program Discount' 条件下的UnBlendedCost之和 # self.edp_total = exclude_data[exclude_data['ItemDescription'] == 'Enterprise Program Discount']['UnBlendedCost'].sum() self.es_total += exclude_data[exclude_data['ItemDescription'] == 'Enterprise Program Discount'][ 'UnBlendedCost'].sum() self.edp_iid = exclude_data[exclude_data['ItemDescription'] == 'Enterprise Program Discount'].iloc[0][ 'InvoiceID'] # 取es if not exclude_data[exclude_data['ItemDescription'].str.startswith(es_rule)].empty: self.es_total += exclude_data[exclude_data['ItemDescription'].str.startswith(es_rule)][ 'UnBlendedCost'].sum() if not self.es_iid: self.es_iid = exclude_data[exclude_data['ItemDescription'].str.startswith(es_rule)].iloc[0][ 'InvoiceID'] self.es_data = exclude_data[exclude_data['ItemDescription'].str.startswith(es_rule)].iloc[0][ 'ItemDescription'] # 去除不展示的部分 ~isin不包含 dt = data[((data['RecordType'] == 'LineItem') & (data['LinkedAccountId'] != '') & ( ~data['InvoiceID'].isin(self.exclude_id))) | ((data['RecordType'] == 'InvoiceTotal') & (data['LinkedAccountId'] == '') & ( ~data['InvoiceID'].isin(self.exclude_id)))] # 去除不参与计算的部分 # notna 根据缺省值(是否有空的值 None,参考:https://vimsky.com/examples/usage/python-pandas-dataframe-notna.html)返回True,false,这里是取出不为空的数据 np_dt = dt.loc[dt['ProductName'].notna()] # 从dt里取出productName不为空的数据 linkedaccountid_set = set(np_dt['LinkedAccountId']) # 将np_dt里的LinkedAccountId存成一个Set集合 productname_set = set(np_dt['ProductName']) # 将np_dt里的ProductName存成一个Set集合 account_dt = data[data['RecordType'] == 'AccountTotal'] statement_dt = data[data['RecordType'] == 'StatementTotal'] self.unblendedcost += np_dt['UnBlendedCost'].sum() # 将np_dt里的UnBlendedCost总和累加到一起 for i in linkedaccountid_set: # 遍历linkedaccountid_set _linked_data = np_dt[np_dt['LinkedAccountId'] == i] # 从np_dt里取出当前遍历linkedaccountid的对象集合 self.finally_dict[i] += _linked_data[ 'UnBlendedCost'].sum() # 将_linked_data的UnBlendedCost总和累加给finally_dict中的这个linkedaccountid字典 edp_data = _linked_data[ _linked_data['ItemDescription'] == 'Enterprise Program Discount'] # 获取epd描述的数据集合 l_data = _linked_data[ _linked_data['ItemDescription'] != 'Enterprise Program Discount'] ##获取不是epd描述的数据集合 _edp_dict[i] = edp_data['UnBlendedCost'].sum() # epd金额的总和累加给_edp_dict中 for p in productname_set: word_dict[i][p] = l_data[l_data['ProductName'] == p]['UnBlendedCost'].sum() word_dict[i]['TotalCost'] = _linked_data['UnBlendedCost'].sum() word_dict[i]['InvoiceID'] = _linked_data.iloc[0]['InvoiceID'] # word_dict[i]={"ProductName1":100,"ProductName2":200,"totalCost":300,"InvoiceID":"111111"} ,i=linkedAccountId for k, v in word_dict.items(): if k in self.word_dict.keys(): for c_k, c_v in v.items(): if c_k != 'InvoiceID': chunk_c_k = self.word_dict[k].get(c_k, 0) self.word_dict[k][c_k] = chunk_c_k + c_v else: self.word_dict[k] = v for k, v in _edp_dict.items(): if k in self.init_edp_dict.keys(): self.init_edp_dict[k] += v else: self.init_edp_dict[k] = v self.accountdt = pd.concat([self.accountdt, account_dt], axis=0) self.statementdt = pd.concat([self.statementdt, statement_dt], axis=0) # 当前账号下word生成的合计 k_total = 0 k_InvoiceID = '' ####整理word导出的数据 for k, v in self.word_dict.items(): # ================= # v['Enterprise Program Discount'] = self.init_edp_dict.get(k, 0) + v.get('TotalCost', 0) / self.unblendedcost * self.edp_total # ================='+defaultMonth+'-15 v['Enterprise Program Discount'] = float(self.costs_maps.get(k, "0")) self.edp_total += v.get('Enterprise Program Discount', 0) # ================= v['Enterprise Support'] = v.get('TotalCost', 0) / self.unblendedcost * self.es_total # v['TotalCost'] = v.get('TotalCost', 0) + v.get('TotalCost',0) / self.unblendedcost * self.es_total + v.get('TotalCost', 0) / self.unblendedcost * self.edp_total # 不管epd v['TotalCost'] = v.get('TotalCost', 0) + v.get('TotalCost', 0) / self.unblendedcost * self.es_total + v.get( 'Enterprise Program Discount', 0) # + v.get('TotalCost', 0) / self.unblendedcost * self.edp_total # v['TotalCost'] = v.get('TotalCost', 0) + v.get('TotalCost', 0) / self.unblendedcost * self.es_total + self.es_total #+ v.get('TotalCost', 0) / self.unblendedcost * self.edp_total # 脚本特点改动处- '+defaultMonth+'-14戴姆勒脚本变更代码二101个增值服务 aws_f = self.aws_feb.get(k, None) if not aws_f == None: v['Sinnet/L2C Support BJS'] = float(aws_f) v['TotalCost'] = v.get('TotalCost', 0) + float(aws_f) # 脚本特点改动处 这个账号的 税前-30000=税后-31800 if (k == editLinkedAccountId) or (k == editLinkedAccountId2): if editPayerAccountId in name: # v['first contract year commitment shortfall'] = float(bs) v['Refund'] = float(bs) ####导出到word文件 k_total += float(v.get('TotalCost', 0)) k_InvoiceID = v.get('InvoiceID', '') # if plat != 'windows': # DaimlerGenerateWord(k, v, name[:-4]).create_word() payerWord = self.word_dict.get(self.payer, 'null') if 1 == 1 and payerWord != 'null': payerWord['TotalCost'] = float(k_total) payerWord['InvoiceID'] = k_InvoiceID print('生成主账号word,payerAccountId:{}'.format(self.payer)) DaimlerGenerateWord(self.payer, payerWord, name[:-4]).create_word_total() self.save_lineitem_data() # 导出改_文件中的 self.init_part_file() # 导出 修改部分_文件的标题 self.save_alter_data() self.save_mapping_date_to_csv() self.save_mapping() # 导出 改_文件 简称导出dbr文件 def save_lineitem_data(self): del_dict = self.set_deleted_data() filepath = os.path.join(alter_dbr_dir, self.sourcename[:-4], self.filename) with open(filepath, 'w', encoding='utf8') as w, open(self.filepath, 'r', encoding='utf8') as r: w.write('"' + '","'.join(list(dbr_index)) + '"' + '\n') # 第一行,标题 num = 0 for line in r: num += 1 # if num % 10000 == 0: # print(num) line_data = line.replace('"', '') line_dict = dict(zip(list(dbr_index), line_data.split(','))) if line_dict['InvoiceID'] in del_dict['InvoiceID']: continue elif line_dict['RecordType'] in del_dict['RecordType']: continue elif line_dict['ItemDescription'].startswith(del_dict['ItemDescription']): continue elif line_dict['ItemDescription'] == 'POC 6337': continue elif line_dict['PayerAccountId'] == del_dict['PayerAccountId']: continue # if '563646727715' in self.filepath: # if 'Credit' in line_dict['ItemDescription'] or 'credit' in line_dict['ItemDescription'] or 'Credits' in line_dict['ItemDescription']: # print('credit:') # print(line_dict) # continue w.write(line) print('LineItem数据加载完成') # # 1.文件合并 # 2.edp,es 比例分摊处理 # 3.es 还是es的,epd还是epd的,消费总额还是消费总额 # 4. # ================= # 1.文件合并 # 2.epd新文件, # 3.es比例分摊 # 4.es 还是es的,epd还是epd的,消费总额还是消费总额 # 5.俩账号credit 计算。-15000 *2 (税前) # 5094 (1468727.30 - 31800 / 1.06) def save_alter_data(self): print('ES总数为{}'.format(self.es_total)) print('EDP总数为{}'.format(self.edp_total)) logging.info('----------------------------------') logging.info('当前账号为{}'.format(self.name)) logging.info('ES总数为{}'.format(self.es_total)) logging.info('EDP总数为{}'.format(self.edp_total)) # 脚本特点改动处- '+defaultMonth+'-14戴姆勒脚本变更代码二101个增值服务 aws_f = self.aws_feb.get("total", 0) # 脚本特点改动处 # 如果是 editPayerAccountId 账号 则消费总额 调整 if editPayerAccountId in self.name: print('消费总额为{}'.format(self.unblendedcost + (addSum) + float(aws_f))) logging.info('消费总额为{}'.format(self.unblendedcost + (addSum) + float(aws_f))) else: print('消费总额为{}'.format(self.unblendedcost + float(aws_f))) logging.info('消费总额为{}'.format(self.unblendedcost + float(aws_f))) print(self.finally_dict) # end. # print(self.exclude_id) ####导出修改部分_文件数据的过程. fill_data = self.get_fill_data(self.name) # 获取导出LineItem的数据 self.save_data_to_dbr(fill_data) # 写入修改部分_文件 self.add_round_to_dbr() accountdt = self.alter_account_data(self.accountdt) # 获取导出AccountTotal的数据 accountdt_dt = accountdt.to_dict(orient='list') # dict转成list self.save_data_to_dbr(accountdt_dt) statementdt = self.alter_statement_data(self.statementdt) # 获取导出StatementTotal的数据 statementdt_dt = statementdt.to_dict(orient='list') self.save_data_to_dbr(statementdt_dt) ##### print('{}文件修改完成'.format(self.sourcename)) # 获取修改部分_ 要导出的 AccountTotal行 def alter_account_data(self, data): data = data.drop_duplicates( subset=['LinkedAccountId'], keep='first', inplace=False) data = data.reset_index(drop=True) for i in range(data.shape[0]): cost = self.finally_dict.get(data.loc[i, 'LinkedAccountId'], 0) if data.loc[i, 'LinkedAccountId'] == self.payer: data.loc[i, 'UnBlendedCost'] = '{:.6f}'.format((self.finally_dict.get(self.payer, 0) * 1.06) + (cost / self.unblendedcost * self.es_total) + float(self.costs_maps.get(data.loc[i, 'LinkedAccountId'], "0")) * 1.06 # +(cost / self.unblendedcost * self.edp_total) + # (cost / self.unblendedcost * self.edp_total * 0.06) ) else: data.loc[i, 'UnBlendedCost'] = '{:.6f}'.format((cost * 1.06) + (cost / self.unblendedcost * self.es_total) + (cost / self.unblendedcost * self.es_total * 0.06) + float(self.costs_maps.get(data.loc[i, 'LinkedAccountId'], "0")) * 1.06 # + (cost / self.unblendedcost * self.edp_total) + # (cost / self.unblendedcost * self.edp_total * 0.06) ) ###################################### if data.loc[i, 'LinkedAccountId'] == editLinkedAccountId or data.loc[ i, 'LinkedAccountId'] == editLinkedAccountId2: data.loc[i, 'UnBlendedCost'] = float(data.loc[i, 'UnBlendedCost']) + (amountTaxSum) # 脚本特点改动处- '+defaultMonth+'-14戴姆勒脚本变更代码二101个增值服务 aws_f = self.aws_feb.get(data.loc[i, 'LinkedAccountId'], None) if not aws_f == None: data.loc[i, 'UnBlendedCost'] = float(data.loc[i, 'UnBlendedCost']) + (float(aws_f) * 1.06) return data.fillna('') # 获取修改部分_ 要导出的 StatementTotal行 def alter_statement_data(self, data): data = data.drop_duplicates( subset=['PayerAccountId'], keep='first', inplace=False) data = data.reset_index(drop=True) for i in range(data.shape[0]): # data.loc[i, 'UnBlendedCost'] = (self.unblendedcost + self.es_total + self.edp_total) * 1.06 data.loc[i, 'UnBlendedCost'] = (self.unblendedcost + self.es_total) * 1.06 # '+defaultMonth+'-15 add. for k, v in self.costs_maps.items(): data.loc[i, 'UnBlendedCost'] = (data.loc[i, 'UnBlendedCost']) + (float(v) * 1.06) ###################################### # print('data.loc[i, `UnBlendedCost`] :{}'.format(data.loc[i,'UnBlendedCost'])) if editPayerAccountId in self.filepath: data.loc[i, 'UnBlendedCost'] = float(data.loc[i, 'UnBlendedCost']) + float(amountTaxSum) # 脚本特点改动处- '+defaultMonth+'-14戴姆勒脚本变更代码二101个增值服务 aws_f = self.aws_feb.get('total', 0) if not aws_f == 0: data.loc[i, 'UnBlendedCost'] = float(data.loc[i, 'UnBlendedCost']) + (float(aws_f) * 1.06) return data.fillna(value='') # 获取修改部分_ 要导出的 LineItem行 def get_fill_data(self, name): payer = name.split('-')[0] self.payer = payer date = name[-11:-4] insert_data = defaultdict(list) startdate, enddate = self.get_month_date(date) for k, v in self.finally_dict.items(): insert_data['InvoiceID'].extend([self.es_iid, self.es_iid]) insert_data['PayerAccountId'].extend([payer, payer]) insert_data['LinkedAccountId'].extend([k, k]) insert_data['RecordType'].extend(['LineItem' for _ in range(2)]) insert_data['ProductName'].extend( ['Amazon Premium Support', '']) insert_data['UsageStartDate'].extend([startdate for _ in range(2)]) insert_data['UsageEndDate'].extend([enddate for _ in range(2)]) insert_data['ItemDescription'].extend( [ self.es_data, '税金 VAT 类型']) insert_data['UnBlendedCost'].extend([ '{:.6f}'.format(v / self.unblendedcost * self.es_total), '{:.6f}'.format(v / self.unblendedcost * self.es_total * 0.06), ]) ####修改部分_ LineItem导出追加 # editLinkedAccountId 账号的加 金额 if k == editLinkedAccountId or k == editLinkedAccountId2: if editPayerAccountId in name: leng = 2 insert_data['InvoiceID'].extend([self.edp_iid for _ in range(leng)]) insert_data['PayerAccountId'].extend([payer for _ in range(leng)]) insert_data['LinkedAccountId'].extend([k for _ in range(leng)]) insert_data['RecordType'].extend(['LineItem' for _ in range(leng)]) insert_data['ProductName'].extend(['Refund', '']) # 改这俩 insert_data['UsageStartDate'].extend([startdate for _ in range(leng)]) insert_data['UsageEndDate'].extend([enddate for _ in range(leng)]) insert_data['ItemDescription'].extend(['Refund', '税金 VAT 类型']) insert_data['UnBlendedCost'].extend(['{:.6f}'.format(bs), '{:.6f}'.format(bsTax)]) # 脚本特点改动处- '+defaultMonth+'-14戴姆勒脚本变更代码二101个增值服务 aws_f = self.aws_feb.get(k, None) if not aws_f == None: leng = 2 insert_data['InvoiceID'].extend([self.edp_iid for _ in range(leng)]) insert_data['PayerAccountId'].extend([payer for _ in range(leng)]) insert_data['LinkedAccountId'].extend([k for _ in range(leng)]) insert_data['RecordType'].extend(['LineItem' for _ in range(leng)]) insert_data['ProductName'].extend(['Sinnet/L2C Support BJS', '']) # 改这俩 insert_data['UsageStartDate'].extend([startdate for _ in range(leng)]) insert_data['UsageEndDate'].extend([enddate for _ in range(leng)]) insert_data['ItemDescription'].extend(['Sinnet/L2C Support BJS', '税金 VAT 类型']) insert_data['UnBlendedCost'].extend( ['{:.2f}'.format(float(aws_f)), '{:.2f}'.format(float(aws_f) * 0.06)]) # '+defaultMonth+'-15 for k, v in self.costs_maps.items(): insert_data['InvoiceID'].extend([self.es_iid, self.es_iid]) insert_data['PayerAccountId'].extend([payer, payer]) insert_data['LinkedAccountId'].extend([k, k]) insert_data['RecordType'].extend(['LineItem' for _ in range(2)]) insert_data['ProductName'].extend( ['Enterprise Program Discount', '']) insert_data['UsageStartDate'].extend([startdate for _ in range(2)]) insert_data['UsageEndDate'].extend([enddate for _ in range(2)]) insert_data['ItemDescription'].extend( ['Enterprise Program Discount', '税金 VAT 类型']) insert_data['UnBlendedCost'].extend([ '{:.6f}'.format(float(v)), '{:.6f}'.format(float(v) * 0.06), ]) # data = pd.DataFrame(insert_data, columns=list(self.dbr_index)) return insert_data # 请在这里改Rounding。 def get_round_fill_data(self, name): payer = name.split('-')[0] self.payer = payer insert_data = defaultdict(list) linkedaccountid = 843403612003 insert_data['PayerAccountId'].extend([payer for _ in range(6)]) insert_data['LinkedAccountId'].extend( [linkedaccountid for _ in range(6)]) insert_data['RecordType'].extend(['Rounding' for _ in range(6)]) # ======================= insert_data['ItemDescription'].extend( ['Rounding of 563347375094', 'Rounding of 563646727715', 'Rounding of 565095721352', 'Rounding of 565359735310', 'Rounding of 010265887827', 'Rounding of 011562250191']) insert_data['InvoiceID'].extend( ['1341453335', '1341519427', '1341507711', '1341422927', '1341422927', '1341422927']) insert_data['UnBlendedCost'].extend([ '{:.6f}'.format(-31.48), '{:.6f}'.format(-45.36), '{:.6f}'.format(-0.10), '{:.6f}'.format(-0.11), '{:.6f}'.format(-0.09), '{:.6f}'.format(-0.07) ]) # ========================== # 0.20 # 0.05 # 0.01 ''' 10月 '{:.6f}'.format(-19.67), '{:.6f}'.format(-1.89), '{:.6f}'.format(-0.13), '{:.6f}'.format(-0.15) 11月 '{:.6f}'.format(-12.59), '{:.6f}'.format(-15.34), '{:.6f}'.format(-0.12), '{:.6f}'.format(-0.20), '{:.6f}'.format(-0.05), '{:.6f}'.format(0.01) 12月 '{:.6f}'.format(8.73), '{:.6f}'.format(6.50), '{:.6f}'.format(0.15), '{:.6f}'.format(0.23), '{:.6f}'.format(0.04), '{:.6f}'.format(0.05) ''' data = pd.DataFrame(insert_data, columns=list(dbr_index)) return data.fillna('') def save_new_dbr(self, path, data, first=False): filepath = os.path.join(alter_dbr_dir, self.sourcename[:-4], path) if first: data.to_csv(filepath, mode='w', encoding='UTF-8', index=False) else: data.to_csv( filepath, mode='a', index=False, encoding='UTF-8', header=0) @staticmethod def get_month_date(date): start = datetime.datetime.strptime(date, "%Y-%m") month = start.month year = start.year if month == 12: end = datetime.datetime(year + 1, 1, 1) - \ datetime.timedelta(seconds=1) else: end = datetime.datetime(year, month + 1, 1) - \ datetime.timedelta(seconds=1) start_dt = start.strftime("%Y-%m-%d %H:%M:%S") end_dt = end.strftime("%Y-%m-%d %H:%M:%S") return start_dt, end_dt def create_new_csv(self): self.aws_feb = {} self.costs_maps = {} for path in self.path: print(path.split('-')[0]) if path in os.listdir(merge_file_dir): self.daimler_read_edp_costs(path.split('-')[0]) # self.daimler_read_aws_feb_sinnetcostallocation(path.split('-')[0]) self.get_dbr_data(os.path.join(merge_file_dir, path), path) else: print('请合并文件') # partname保存 修改部分_文件,self.filename 改文件 def save_data_to_dbr(self, insert_data): partname = os.path.join(alter_dbr_dir, self.sourcename[:-4], self.partname) filename = os.path.join(alter_dbr_dir, self.sourcename[:-4], self.filename) with open(filename, 'a', encoding='utf-8') as fw, open(partname, 'a', encoding='utf-8') as pw: dbr_dict = {v: int(k) for k, v in enumerate(list(dbr_index))} for i in range(len(list(insert_data.values())[0])): empty_data = ["" for _ in range(len(dbr_index))] for k, v in insert_data.items(): empty_data[dbr_dict[k]] = v[i] # if k == 'LinkedAccountId' and self.is_number(v[i]): # 修改部分写入 pw.write(','.join('"{}"'.format(x) for x in empty_data) + '\n') # 改_文件写入 # if '服务费用' in empty_data[dbr_dict['ProductName']]: # fw.write(','.join('"{}"'.format(x) for x in empty_data) + '\n') fw.write(','.join('"{}"'.format(x) for x in empty_data) + '\n') def is_number(self, s): try: # 如果能运行float(s)语句,返回True(字符串s是浮点数) float(s) return True except ValueError: # ValueError为Python的一种标准异常,表示"传入无效的参数" pass # 如果引发了ValueError这种异常,不做任何事情(pass:不做任何事情,一般用做占位语句) try: import unicodedata # 处理ASCii码的包 unicodedata.numeric(s) # 把一个表示数字的字符串转换为浮点数返回的函数 return True except (TypeError, ValueError): pass return False # 初始化:修改部分_ 文件标题行 def init_part_file(self): partname = os.path.join(alter_dbr_dir, self.sourcename[:-4], self.partname) with open(partname, 'w', encoding='utf-8') as f: f.write(','.join('"{}"'.format(x) for x in list(dbr_index)) + '\n') def add_round_to_dbr(self): payer = self.name.split('-')[0] if payer == '563646727715': round_fill_data = self.get_round_fill_data(self.name) round_data = round_fill_data.to_dict(orient='list') self.save_data_to_dbr(round_data) # 输出_mapping文件 def save_mapping_date_to_csv(self): filename = os.path.join( alter_dbr_dir, self.sourcename[:-4], '{}_mapping.csv'.format(self.payer)) with open(filename, 'w', encoding='utf-8') as w: w.write(','.join('"{}"'.format(x) for x in ['mapping', 'LinkedAccountId', 'UnBlendedCost']) + '\n') for key, value in self.finally_dict.items(): mapping = '' for k, v in daimler_map.items(): if key in v: mapping = k UnBlendedCost = '{:.6f}'.format((value * 1.06) + (value / self.unblendedcost * self.es_total) + (value / self.unblendedcost * self.es_total * 0.06) # +(value / self.unblendedcost * self.edp_total) + # (value / self.unblendedcost * self.edp_total * 0.06) ) # start-createBy:ZhouWenTao. for:2022-12-19 处理12月-30000 ################# mapping_文件 editLinkedAccountId 的 UnBlendedCost -31800 aws_f = 0 # if len(key) == 12: UnBlendedCost = float(UnBlendedCost) + (float(self.costs_maps.get(key, "0")) * 1.06) aws_f = self.aws_feb.get(key, 0) if len(key) == 11: UnBlendedCost = float(UnBlendedCost) + (float(self.costs_maps.get('0' + key, "0")) * 1.06) aws_f = self.aws_feb.get('0' + key, 0) elif len(key) == 10: UnBlendedCost = float(UnBlendedCost) + (float(self.costs_maps.get('00' + key, "0")) * 1.06) aws_f = self.aws_feb.get('00' + key, 0) elif len(key) == 9: UnBlendedCost = float(UnBlendedCost) + (float(self.costs_maps.get('000' + key, "0")) * 1.06) aws_f = self.aws_feb.get('000' + key, 0) elif len(key) == 8: UnBlendedCost = float(UnBlendedCost) + (float(self.costs_maps.get('0000' + key, "0")) * 1.06) aws_f = self.aws_feb.get('0000' + key, 0) elif len(key) == 7: UnBlendedCost = float(UnBlendedCost) + (float(self.costs_maps.get('00000' + key, "0")) * 1.06) aws_f = self.aws_feb.get('00000' + key, 0) elif len(key) == 6: UnBlendedCost = float(UnBlendedCost) + (float(self.costs_maps.get('000000' + key, "0")) * 1.06) aws_f = self.aws_feb.get('000000' + key, 0) if (key == editLinkedAccountId or key == editLinkedAccountId2) and editPayerAccountId in filename: UnBlendedCost = float(UnBlendedCost) + (amountTaxSum) # 脚本特点改动处- '+defaultMonth+'-14戴姆勒脚本变更代码二101个增值服务 # aws_f = self.aws_feb.get(key, 0) if not aws_f == 0: UnBlendedCost = float(UnBlendedCost) + (float(aws_f) * 1.06) w.write(','.join('"{}"'.format(x) for x in [mapping, key, UnBlendedCost]) + '\n') # end # 输出 核对表 def save_mapping(self): path = '/server/billing/daimler/bill_checklist/Daimler账单核对表.xlsx' # path = '/server/billing/daimler/bill_checklist/Daimler'+defaultMonth+'月账单核对表.xlsx' if not '563347375094' in self.filename: path = '/server/billing/daimler/bill_checklist/Daimler' + defaultMonth + '月账单核对表.xlsx' wb = openpyxl.load_workbook(path) table = wb['Daimler账单核对表'] for row in table.iter_rows(): if str(row[4].value).isdigit(): _total = self.finally_dict.get(str(row[4].value), None) if _total: row[5].value = ((_total * 1.06) + (_total / self.unblendedcost * self.es_total) + (_total / self.unblendedcost * self.es_total * 0.06) + (float(self.costs_maps.get(str(row[4].value), "0")) * 1.06) # +(_total / self.unblendedcost * self.edp_total) + # (_total / self.unblendedcost * self.edp_total * 0.06) ) # 脚本特点改动处- '+defaultMonth+'-14戴姆勒脚本变更代码二101个增值服务 aws_f = self.aws_feb.get(str(row[4].value), 0) if not aws_f == 0: row[5].value = float(row[5].value) + (float(aws_f) * 1.06) # start-createBy:ZhouWenTao. for:2022-12-19 处理12月-30000 if str(row[4].value) == editLinkedAccountId or str(row[4].value) == editLinkedAccountId2: if editPayerAccountId in self.filename: row[5].value = float(row[5].value) + (amountTaxSum) wb.save(path) print('mapping数据处理完成') def daimler_read_edp_costs(self, file): self.costs_maps = {} read_file = os.path.join(costs_dir, file + ".csv") linkaccountIds = [] unBlendedCosts = [] with open(read_file, 'r', encoding='utf-8') as r: # open(alterpath, 'w', encoding='utf-8') as w: num = 0 while True: line = r.readline() if not line: break data = line[1:-2].split('","') num += 1 if ('"关联账户' in data) or '关联账户' in data: linkaccountIds = data if ('"关联账户 总计' in data) or ('关联账户 总计' in data): unBlendedCosts = data person = [] for index, i in enumerate(linkaccountIds): if i == '' or '关联账户' in i: continue self.costs_maps[i] = unBlendedCosts[index] if len(i) == 11: line = ['0' + i, unBlendedCosts[index]] elif len(i) == 10: line = ['00' + i, unBlendedCosts[index]] elif len(i) == 9: line = ['000' + i, unBlendedCosts[index]] elif len(i) == 8: line = ['0000' + i, unBlendedCosts[index]] elif len(i) == 7: line = ['00000' + i, unBlendedCosts[index]] elif len(i) == 6: line = ['000000' + i, unBlendedCosts[index]] person.append(line) def daimler_read_aws_feb_sinnetcostallocation(self, payerId): self.aws_feb = {} read_file = "/server/billing/daimler/config/aws_feb" + defaultMonth + ".csv" if plat == 'windows': read_file = 'E:\\workspace_2\\施耐德\\daimler\\config\\aws_feb' + defaultMonth + '.csv' total = 0 with open(read_file, 'r', encoding='utf-8') as r: # open(alterpath, 'w', encoding='utf-8') as w: num = 0 while True: line = r.readline() if not line: break num += 1 data = line.split(',') payer = data[0].replace(' ', '') linkedAccountId = data[1].replace(' ', '') if len(linkedAccountId) == 10: linkedAccountId = "00" + linkedAccountId elif len(linkedAccountId) == 9: linkedAccountId = "000" + linkedAccountId elif len(linkedAccountId) == 8: linkedAccountId = "0000" + linkedAccountId elif len(linkedAccountId) == 7: linkedAccountId = "00000" + linkedAccountId elif len(linkedAccountId) == 6: linkedAccountId = "000000" + linkedAccountId elif len(linkedAccountId) == 11: linkedAccountId = "0" + linkedAccountId unBlendedCost = data[2].replace('\n', '') if not payer in payerId: continue self.aws_feb[linkedAccountId] = unBlendedCost total += float(unBlendedCost) self.aws_feb["total"] = total print(self.aws_feb) # 合并 def merge_file(filetuple: tuple): for file in filetuple: filename = list(file)[0] del_file_form_path(filename, merge_file_dir) for f in file: filepath = os.path.join(data_dir, f) alterpath = os.path.join(merge_file_dir, filename) with open(filepath, 'r', encoding='utf-8') as r, open(alterpath, 'a', encoding='utf-8') as a: num = 0 while True: line = r.readline() if not line: break num += 1 a.write(line) if num % 10000 == 0: print(num) print('{}合并完成'.format(f)) # 数据裁剪 def daimler_del_tags_to_dbr(filetuple: tuple): for file in filetuple: filename = list(file)[0] filepath = merge_file_dir + filename # 修改后目录 alterpath = merge_file_dir + filename.split('-')[ 0] + "-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-" + defaultMonth + "-1.csv" with open(filepath, 'r', encoding='utf-8') as r, open(alterpath, 'w', encoding='utf-8') as w: num = 0 is_valid = False columns_index = [] while True: line = r.readline() if not line: break data = line[1:-2].split('","') if len(data) < len(dbr_index): print('无效数据已忽略') continue elif not is_valid: columns_index = [i for i in range(len(data)) if data[i] in dbr_index] is_valid = True target_data = [] for k, v in enumerate(data): if k in columns_index: target_data.append(v) w.write('"' + '","'.join(target_data) + '"' + '\n') num += 1 if num % 10000 == 0: print(num) print('{}数据裁剪完成'.format("010265887827")) # 运行月份 defaultMonth = '2023-07' import sys if __name__ == '__main__': if (plat == 'windows'): # 1==1 本地运行是目录 alter_dbr_dir = 'E:\\workspace_2\\施耐德\\daimler\\alter_data\\' data_dir = 'E:\\workspace_2\\施耐德\\daimler\\data\\' merge_file_dir = 'E:\workspace_2\施耐德\daimler\merge_data\\' word_dir = 'E:\\workspace_2\\施耐德\\daimler\\word\\' info_dir = 'E:\\workspace_2\\施耐德\\daimler\\info\\' costs_dir = 'E:\\workspace_2\\施耐德\\costs\\' dbr_typle = ( #('563347375094-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-' + defaultMonth + '.csv', # '563347375094-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-' + defaultMonth + '.csv',), ('563646727715-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-' + defaultMonth + '.csv', '563646727715-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-' + defaultMonth + '.csv',), # ('565095721352-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-'+defaultMonth+'.csv', # '565095721352-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-'+defaultMonth+'.csv',), # ('565359735310-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-'+defaultMonth+'.csv', # '565359735310-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-'+defaultMonth+'.csv',), # ('010265887827-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-'+defaultMonth+'.csv', # '010265887827-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-'+defaultMonth+'.csv',), # ('011562250191-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-'+defaultMonth+'.csv', # '011562250191-aws-billing-detailed-line-items-with-resources-and-tags-ACTS-Ningxia-'+defaultMonth+'.csv'), ) type = '' if len(sys.argv) > 1: type = sys.argv[1] if type == 'merge': # 1.先合并文件 merge_file(dbr_typle) elif type == 'del': daimler_del_tags_to_dbr(dbr_typle) elif type == 'merge_del': # 1.先合并文件 merge_file(dbr_typle) # 2.数据裁剪. daimler_del_tags_to_dbr(dbr_typle) else: # # 1.先合并文件 # merge_file(dbr_typle) # # 2.数据裁剪. # daimler_del_tags_to_dbr(dbr_typle) # 获取当前系统时间的年月日时分秒 import datetime now_time02 = datetime.datetime.now().strftime("%Y_%m_%d-%H_%M_%S") loggerfile = now_time02 + '_logger.log' open(loggerfile, 'w') logging.basicConfig(filename=loggerfile, level=logging.INFO, format='%(message)s') # 3.核心计算 for i in dbr_typle: CustomCsvByHour(i[0]).create_new_csv()