zhangdanjiaoben/other/BMW/main.py

import numpy as np
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import NamedStyle, PatternFill, Font, Alignment, Border, Side, colors
import boto3
import time
import os
from collections import defaultdict
import openpyxl
import time
import datetime
import calendar
from calendar import monthrange
from docxtpl import DocxTemplate
from pandas import Index
import shutil
import sys

_dict = {
    # 6910账号aksk
    'AWS_ACCESS_KEY_ID': 'AKIA4FU4JFDXO5J2ISNY',
    'AWS_SECRET_ACCESS_KEY': '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',
    # 7422账号aksk
    'GLOBAL_AWS_ACCESS_KEY_ID': "AKIAQWIJ3GB7CTO5GAWY",
    'GLOBAL_AWS_SECRET_ACCESS_KEY': "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"
}
aws_aksk = {
     "785781180852": {
        "account_type": "amazon-china",
        "aws_access_key_id": "AKIA3N5BVJG2B7BN7D7F",
        "aws_secret_access_key": "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",
        "bucket": "785781180852billing",
        "credential_type": "Amazon Access Key",
        "role_arn": "",
        "external_id": ""
    },
}
def auto_download_file_form_s3(file: tuple):
    for i in file:
        payer_id = i.split('-')[0]
        credential = get_credentials(payer_id)
        bucket = credential.pop('bucket')
        s3_client = boto3.client('s3', **credential)
        print('{}开始下载'.format(i))
        s3_client.download_file(
            bucket,i, i)
        #shutil.copy(i, data_dir+i)
        #shutil.move(i, os.path.join(data_dir, i))

        print('{}下载完成'.format(i))
def get_credentials(payer_id):
    credential = aws_aksk.get(payer_id, None)
    if not credential:
        return
    if credential.get('account_type', None) == 'amazon-china':
        region_name = 'cn-northwest-1'
        aws_access_key_id = _dict['AWS_ACCESS_KEY_ID']
        aws_secret_access_key = _dict['AWS_SECRET_ACCESS_KEY']
    else:
        region_name = 'us-east-2'
        aws_access_key_id = _dict['GLOBAL_AWS_ACCESS_KEY_ID']
        aws_secret_access_key = _dict['GLOBAL_AWS_SECRET_ACCESS_KEY']
    if credential.get('credential_type', None) == 'Amazon Access Key':
        return {
            'aws_access_key_id': credential['aws_access_key_id'],
            'aws_secret_access_key': credential['aws_secret_access_key'],
            'region_name': region_name,
            'bucket': credential['bucket']
        }
    else:
        sts = boto3.client('sts',
                           region_name=region_name,
                           aws_access_key_id=aws_access_key_id,
                           aws_secret_access_key=aws_secret_access_key
                           )
        sts_credential = sts.assume_role(
            RoleArn=credential['role_arn'],
            RoleSessionName='crawl-billing' + str(time.time()),
            DurationSeconds=900,
            ExternalId=credential['external_id']
        )
        return {
            'aws_access_key_id': sts_credential['Credentials']['AccessKeyId'],
            'aws_secret_access_key': sts_credential['Credentials']['SecretAccessKey'],
            'region_name': region_name,
            'aws_session_token': sts_credential['Credentials']['SessionToken'],
            'bucket': credential['bucket']
        }

bwm_map = {
    # 'LingYue': {
    #     '012281355708',
    #     '356785436510',
    #     '658099132956',
    #     '658759333659',
    #     '266213574481',
    #     '292889508331',
    #     '301085154281',
    #     '128030099971',
    #     '130024587818',
    #     '130277835603',
    #     '927108770652',
    #     '462173214055',
    # },
    'NSC': {
            "738984596256",
            "516494992171",
            "516559113274",
            "516718973345",
            "115197553139",
            "115888370350",
            "482015173674",
            "741600887936",
            "041339000575",
            "220276650644",
            "220254894522",
            "741612116470",
            "566166420744",
            "571369485139",
            "571453974672",
            "571620293191",
            "281283439347",
            "281409454190",
            "391370694119",
            "391652259466",
            "257098133642",
            "258662998378",
            "259918045777",
            "174766421626",
            "018419931154",
            "019468012204",
            "019616485031",
            "019628801948",
            "221737289359",
            "222086759490",
            "222704261174",
            "266213574481",
            "128030099971",
            "130024587818",
            "042047776321",
            "107974775660",
            "130772574839",
            "772981767791",
            "804270813472",
            "750594069180",
            "957524751526",
            "605745121453",
            "605427007075",
            "462729772077",
            "825059405741",
            "825754710372",
            "826223612679",
            "826726830564",
            "188665867276",
            "190967990884",
            "265795087340",
            "266066444577",
            "130349229654",
    },
    'BCS': {
            "012281355708",
            "356785436510",
            "658099132956",
            "658759333659",
            "130277835603",
            "462173214055",
            "483213877262",
    },
    'BBA': {
           "114925441656",
           "220347068059",
           "115245083140",
           "115814403771",
           "482112011301",
           "482174782124",
           "066200769587",
           "066546212151",
           "066999274102",
           "067021776723",
           "067030146309",
           "763476499527",
           "607562784642",
           "607580047483",
           "607962074546",
           "608034771178",
           "902176823059",
           "902161166225",
           "114643259757",
           "472159510273",
           "470697101235",
           "471757415074",
           "179496244136",
           "763781648782",
           "125559227903",
           "133017054983",
           "178547782801",
           "181289317951",
           "902120954857",
           "902219438803",
           "763854435329",
           "763918937436",
           "610756541592",
           "763933527642",
           "827982547867",
           "119959612117",
           "126654413831",
           "127966691320",
           "131447637234",
           "132225845962",
           "133703668756",
           "179751599285",
           "763669372248",
           "180763457193",
           "902123337973",
           "889045227160",
           "889312712683",
    }
}

csv_columns2 = Index(
    [
        'InvoiceID',
        'PayerAccountId',
        'LinkedAccountId',
        'RecordType',
        'RecordID',
        'BillingPeriodStartDate',
        'BillingPeriodEndDate',
        'InvoiceDate',
        'PayerAccountName',
        'LinkedAccountName',
        'TaxationAddress',
        'PayerPONumber',
        'ProductCode',
        'ProductName',
        'SellerOfRecord',
        'UsageType',
        'Operation',
        'AvailabilityZone',
        'RateId',
        'ItemDescription',
        'UsageStartDate',
        'UsageEndDate',
        'UsageQuantity',
        'BlendedRate',
        'CurrencyCode',
        'CostBeforeTax',
        'Credits',
        'TaxAmount',
        'TaxType',
        'TotalCost',
        'aws:autoscaling:groupName',
        'aws:cloudformation:logical-id',
        'aws:cloudformation:stack-id',
        'aws:cloudformation:stack-name',
        'aws:createdBy',
        'aws:ec2launchtemplate:id',
        'aws:ec2launchtemplate:version',
        'aws:elasticmapreduce:instance-group-role',
        'aws:elasticmapreduce:job-flow-id',
        'user:APP-ID',
        'user:AccountID',
        'user:Author',
        'user:COST TAG 1',
        'user:Cost Center',
        'user:DLP',
        'user:Description',
        'user:Environment',
        'user:EnvironmentType',
        'user:KubernetesCluster',
        'user:ManagedByAmazonSageMakerResource',
        'user:NAME',
        'user:Name',
        'user:Patch Group',
        'user:Product ID',
        'user:ProjectName',
        'user:Responsible',
        'user:Role',
        'user:Scope',
        'user:Stage',
        'user:Terraform',
        'user:Tool',
        'user:createdBy',
        'user:data-source',
        'user:data-source-billing',
        'user:desc',
        'user:description',
        'user:elasticbeanstalk:environment-id',
        'user:elasticbeanstalk:environment-name',
        'user:environment',
        'user:hub',
        'user:k8s.io/cluster-autoscaler/node-template/label/kops.k8s.io/instancegroup',
        'user:k8s.io/etcd/events',
        'user:k8s.io/etcd/main',
        'user:k8s.io/role/master',
        'user:k8s.io/role/node',
        'user:kubernetes.io/cluster/infotain-e2e.k8s.local',
        'user:kubernetes.io/cluster/infotain-prod.k8s.local',
        'user:kubernetes.io/created-for/pv/name',
        'user:kubernetes.io/created-for/pvc/name',
        'user:kubernetes.io/created-for/pvc/namespace',
        'user:kubernetes.io/service-name',
        'user:name',
        'user:namespace',
        'user:owner',
        'user:project',
        'user:resourcePrefix',
        'user:role',
        'user:server-name',
        'user:snsTopicArn',
        'user:stage',
        'user:sub-product',
        'user:workload-type'])

data_type = {
    'UsageStartDate': object,
    'UsageEndDate': object,
    'InvoiceID': object,
    'ProductName': object,
    'PayerAccountId': object,
    'LinkedAccountId': object,
    'UnBlendedCost': str,
    'BlendedCost': str,
    'ItemDescription': object,
    'RecordType': object
}

def from_filename_get_datetime(filename: str):
    dt = filename[-7:]
    date = datetime.datetime.strptime(dt, "%Y-%m")
    days = calendar.monthrange(date.year, date.month)
    later1dt = date + datetime.timedelta(days[1] + 1)
    later1 = later1dt.strftime("%Y-%m-%d")
    n_days = calendar.monthrange(later1dt.year, later1dt.month)
    later2dt = later1dt + datetime.timedelta(n_days[1])
    later2 = later2dt.strftime("%Y-%m-%d")
    return dt, later1, later2

def new_round(_float, _len=2) -> float:
    """
    Parameters
    ----------
    _float:
    _len: int, 指定四舍五入需要保留的小数点后几位数为_len

    Returns
    -------
    type ==> float, 返回四舍五入后的值
    """
    try:
        if isinstance(_float, float):
            if 'e' in str(_float).lower():
                return round(_float, _len)
            elif str(_float)[::-1].find('.') <= _len:
                return _float
            elif str(_float)[-1] == '5':
                return round(float(str(_float)[:-1] + '6'), _len)
            else:
                return round(_float, _len)
        elif isinstance(_float, str):
            _float = _float.replace(',', '')
            return new_round(float(_float), _len)
        else:
            return round(_float, _len)
    except Exception as e:
        print(_float, _len, e)

alter_csv_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/alter_data/'
data_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/'
word_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/word/'
info_dir = 'D:\workspace\zhangdanjiaoben\other/BMW/info/'


class CustomCsvByDay(object):

    def __init__(self, path, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.csv_list = list(path)
        self.credit = 0
        self.account_dict = defaultdict(float)
        self.tax_dict = defaultdict(float)
        self.es_total_dict = dict()
        self.beforetax = 0
        self.credit_dict = dict()
        self.es_total = 0
        self.new_filename = None
        self.init_csv_list()

    def init_csv_list(self):
        self.new_filename = os.path.join(
            alter_csv_dir, '改_{}'.format(
                self.csv_list[0]))
        for i in range(len(self.csv_list)):
            self.csv_list[i] = os.path.join(data_dir, self.csv_list[i])

    def data_mergence(self):
        df = pd.DataFrame()
        for _csv in self.csv_list:
            if _csv:
                csv_data = pd.read_csv(
                    _csv,
                    sep=',',
                    encoding='utf-8',
                    header=None,
                    names=csv_columns2,
                    dtype=data_type)
                df = pd.concat([df, csv_data], axis=0)
        df.reset_index(drop=True)
        return df

    @staticmethod
    def set_deleted_data():
        del_dict = {
            'RecordType': [
                'AccountTotal',
                'StatementTotal',
                'Rounding',
                'InvoiceTotal'],
            'ItemDescription': [
                '11-11 promotion for new-signup 3',
                '11-11 promotion for new-signup',
                'PD 7398',
                'PD 7180',
            ],
            'ProductName': 'Amazon Premium Support',
            'PayerAccountId': 'PayerAccountId'
        }
        return del_dict

    def create_new_csv(self):
        df = self.data_mergence()
        # # LinkedLineItem和CBRounding数据
        data = df[((df['RecordType'] == 'LinkedLineItem')
                   & (df['ItemDescription'] != '11-11 promotion for new-signup 3')
                   & (df['ItemDescription'] != '11-11 promotion for new-signup')
                   & (df['ItemDescription'] != 'PD 7398')
                   & (df['ItemDescription'] != 'PD 7180')
                   & (df['ProductName'] != 'Amazon Premium Support'))
                  # | (df['RecordType'] == 'CBRounding')
                  ]
        # 获取AWS Premium Support数据
        es_data = df[(df['RecordType'] == 'LinkedLineItem') &
                     (df['ProductName'] == 'Amazon Premium Support')]
        # 获取credit数据
        credit_data = df[(df['RecordType'] == 'LinkedLineItem')
                         & ((df['ItemDescription'] == '11-11 promotion for new-signup 3')
                            | (df['ItemDescription'] == '11-11 promotion for new-signup')
                            # | (df['ItemDescription'] == 'PD 7398')
                            | (df['ItemDescription'] == 'PD 7398'))
                         ]
        # 获取AccountTotal的CostBeforeTax
        account_data = df[df['RecordType'] == 'AccountTotal']
        # InvoiceTotal 数据
        # invoice_data = df[df['RecordType'] == 'InvoiceTotal']
        # StatementTotal 数据
        statement_data = df[df['RecordType'] == 'StatementTotal']
        self.get_credit(credit_data)
        self.get_account(data)
        self.get_estotal(es_data)
        self.get_estotal_dict()
        # 获取credit_dict
        self.get_credit_dict()
        insert_data = self.insert_data_to_csv(data)
        es_total_data = self.insert_es_data_to_csv(data)
        tax_data = self.insert_tax_data_to_csv(data)
        account_data = self.alter_account(account_data)
        statement_data = self.alter_statement(statement_data)
        self.save_lineitem_data()
        self.save_alter_data_to_csv(insert_data)
        self.save_alter_data_to_csv(es_total_data)
        self.save_alter_data_to_csv(tax_data)
        self.save_alter_data_to_csv(account_data)
        self.save_alter_data_to_csv(statement_data)
        print('ES总额为{}'.format(self.es_total))
        print('Credit总额为{}'.format(self.credit))
        print('账号消费总额为{}'.format(self.beforetax))
        print('update finish')

    def get_estotal_dict(self):
        for k, v in self.account_dict.items():
            self.es_total_dict[k] = (v / self.beforetax * self.es_total)

    def get_estotal(self, data):
        for i in range(data.shape[0]):
            self.es_total += float(data.iloc[i]['CostBeforeTax'])

    def alter_account(self, data):
        data = data.drop_duplicates(
            subset=['LinkedAccountId'],
            keep='first',
            inplace=False)
        data = data.reset_index(drop=True)
        for i in range(data.shape[0]):
            credit = self.credit_dict.get(data.iloc[i]['LinkedAccountId'], 0)
            costbeforetax = self.account_dict.get(
                data.iloc[i]['LinkedAccountId'], 0)
            es_total = self.es_total_dict.get(
                data.iloc[i]['LinkedAccountId'], 0)
            data.loc[i, 'CostBeforeTax'] = '{:.6f}'.format(
                costbeforetax + es_total)
            data.loc[i, 'Credits'] = '{:.6f}'.format(new_round(credit, 2))
            data.loc[i, 'TaxAmount'] = '{:.6f}'.format(
                (costbeforetax + credit + es_total) * 0.06)
            data.loc[i, 'TotalCost'] = '{:.6f}'.format(
                costbeforetax + credit + es_total + (costbeforetax + credit + es_total) * 0.06)
        return data

    def alter_statement(self, data):
        data = data.drop_duplicates(
            subset=['PayerAccountId'],
            keep='first',
            inplace=False)
        data = data.reset_index(drop=True)
        for i in range(data.shape[0]):
            data.loc[i, 'ItemDescription'] = data.iloc[i]['ItemDescription']
            data.loc[i, 'CurrencyCode'] = 'CNY'
            data.loc[i, 'CostBeforeTax'] = '{:.6f}'.format(
                self.beforetax + self.es_total)
            data.loc[i, 'Credits'] = '{:.2f}'.format(self.credit)
            data.loc[i, 'TaxAmount'] = '{:.6f}'.format(
                (self.beforetax + self.es_total + self.credit) * 0.06)
            data.loc[i, 'TotalCost'] = '{:.6f}'.format(
                (self.beforetax + self.es_total + self.credit) * 1.06)
        return data

    @staticmethod
    def get_fill_data(data):
        data = data.loc[data['LinkedAccountId'].notna()]
        linked_set = set(data['LinkedAccountId'])
        insert_data = defaultdict(list)
        for i in linked_set:
            linked_data = data[(data['RecordType'] == 'LinkedLineItem') & (
                data['LinkedAccountId'] == i)].iloc[0]
            insert_data['InvoiceID'].append(linked_data['InvoiceID'])
            insert_data['PayerAccountId'].append(linked_data['PayerAccountId'])
            insert_data['LinkedAccountId'].append(i)
            insert_data['RecordType'].append('LinkedLineItem')
            insert_data['BillingPeriodStartDate'].append(
                linked_data['BillingPeriodStartDate'])
            insert_data['BillingPeriodEndDate'].append(
                linked_data['BillingPeriodEndDate'])
            insert_data['InvoiceDate'].append(linked_data['InvoiceDate'])
            insert_data['UsageStartDate'].append(linked_data['UsageStartDate'])
            insert_data['UsageEndDate'].append(linked_data['UsageEndDate'])
            insert_data['CurrencyCode'].append(linked_data['CurrencyCode'])
        return insert_data

    def insert_data_to_csv(self, data):
        fill_data = self.get_fill_data(data)
        for f in fill_data['LinkedAccountId']:
            credit = self.credit_dict.get(f, 0)
            fill_data['CostBeforeTax'].append(0)
            fill_data['Credits'].append(new_round(credit))
            fill_data['TaxAmount'].append(new_round(credit * 0.06))
            if new_round(credit * 0.06) == 0:
                fill_data['TaxType'].append('None')
            else:
                fill_data['TaxType'].append('VAT')
            fill_data['TotalCost'].append(new_round(credit + credit * 0.06))
        data = pd.DataFrame(fill_data, columns=list(data.columns))
        return data

    def insert_es_data_to_csv(self, data):
        fill_data = self.get_fill_data(data)
        for f in fill_data['LinkedAccountId']:
            es_total = self.es_total_dict.get(f, 0)
            fill_data['CostBeforeTax'].append(new_round(es_total))
            fill_data['Credits'].append(0)
            fill_data['TaxAmount'].append(new_round(es_total * 0.06))
            if new_round(es_total * 0.06) == 0:
                fill_data['TaxType'].append('None')
            else:
                fill_data['TaxType'].append('VAT')
            fill_data['TotalCost'].append(
                new_round(es_total + es_total * 0.06))
        data = pd.DataFrame(fill_data, columns=list(data.columns))
        return data

    def insert_tax_data_to_csv(self, data):
        fill_data = self.get_fill_data(data)
        for f in fill_data['LinkedAccountId']:
            tax_total = self.tax_dict.get(f, 0)
            es_tax = new_round(self.es_total_dict.get(f, 0) * 0.06)
            credit_tax = new_round(self.credit_dict.get(f, 0) * 0.06)
            account_tax = new_round((self.account_dict.get(f, 0) +
                                     self.credit_dict.get(f, 0) + self.es_total_dict.get(f, 0)) * 0.06, 6)
            fill_data['CostBeforeTax'].append(0)
            fill_data['Credits'].append(0)
            fill_data['TaxAmount'].append(
                '{:.2f}'.format(account_tax - tax_total - es_tax - credit_tax))
            if new_round(account_tax - tax_total - es_tax - credit_tax * 0.06) == 0:
                fill_data['TaxType'].append('None')
            else:
                fill_data['TaxType'].append('VAT')
            fill_data['TotalCost'].append(
                '{:.2f}'.format(account_tax - tax_total - es_tax - credit_tax))
        data = pd.DataFrame(fill_data, columns=list(data.columns))
        return data

    def get_credit(self, data):
        for i in range(data.shape[0]):
            self.credit += float(data.iloc[i]['Credits'])

    def get_account(self, data):
        for i in range(data.shape[0]):
            self.account_dict[data.iloc[i]['LinkedAccountId']
                              ] += float(data.iloc[i]['CostBeforeTax'])
            self.tax_dict[data.iloc[i]['LinkedAccountId']
                          ] += float(data.iloc[i]['TaxAmount'])
            self.beforetax += float(data.iloc[i]['CostBeforeTax'])

    def get_credit_dict(self):
        for k, v in self.account_dict.items():
            self.credit_dict[k] = v / self.beforetax * self.credit

    @staticmethod
    def save_csv(path, data):
        data.to_csv(path, mode='a', index=False, encoding='UTF-8', header=0)

    @staticmethod
    def save_csv_first(path, data):
        data.to_csv(path, mode='w', encoding='UTF-8', index=False)

    def save_lineitem_data(self):
        del_dict = self.set_deleted_data()
        with open(self.new_filename, 'w', encoding='UTF-8') as w:
            w.write('"' + '","'.join(list(csv_columns2)) + '"' + '\n')
            num = 0
            for path in self.csv_list:
                with open(path, 'r', encoding='UTF-8') as r:
                    while True:
                        line = r.readline()
                        if not line:
                            break
                        data = line.split('","')
                        if len(data) < 2:
                            continue
                        line_dict = dict(zip(list(csv_columns2), data))
                        # 通过 InvoiceID判断时会有问题
                        if line_dict['RecordType'] in del_dict['RecordType']:
                            continue
                        elif line_dict['ItemDescription'] in del_dict['ItemDescription']:
                            continue
                        elif line_dict['ProductName'] == del_dict['ProductName']:
                            continue
                        elif line_dict['PayerAccountId'] == del_dict['PayerAccountId']:
                            continue
                        w.write('","'.join(data))
                        num += 1
                        # print(num)
        print('LineItem数据加载完成')

    def save_alter_data_to_csv(self, alter_data):
        _data = alter_data.fillna('').to_dict(orient='list')
        with open(self.new_filename, 'a', encoding='utf-8') as a:
            dbr_dict = {v: int(k) for k, v in enumerate(list(csv_columns2))}
            for i in range(len(list(_data.values())[0])):
                empty_data = ["" for _ in range(len(list(csv_columns2)))]
                for k, v in _data.items():
                    empty_data[dbr_dict[k]] = v[i]
                a.write(','.join('"{}"'.format(x) for x in empty_data) + '\n')

class GenerateWord(object):

    def __init__(self, *args, **kwargs):
        self.args = args
        self.kwargs = kwargs

    def my_sum(self, data_dict, loc, length=2):
        # 模板内使用
        count = 0.0
        for item in data_dict.values():
            x = self.reverse_fill(item[loc])
            count += x
        a = new_round(count, length)
        return a

    @staticmethod
    def reverse_fill(value_str: float, length=6):
        return new_round(value_str, length)

    @staticmethod
    def get_region_display(data: dict):
        for i in data.keys():
            if i == 'cn-north-1':
                data['北京区资源费'] = data.pop(i)
            elif i == 'cn-northwest-1':
                data['宁夏区资源费'] = data.pop(i)
        return data

    @staticmethod
    def get_date_array(_date: str):
        _array = monthrange(int(_date[:4]), int(_date[5:7]))
        return [
            "%s-%s-01" % (_date[:4], _date[5:7]),
            "%s-%s-%2d" % (_date[:4], _date[5:7], _array[1]),
        ]

    @staticmethod
    def fill(value, length=2):
        temp = "{:,.%sf}" % length
        if isinstance(value, str):
            value = value.replace(',', '')
            return temp.format(float(value))
        elif isinstance(value, int) or isinstance(value, float):
            return temp.format(value)
        else:
            return temp.format(0)

    @staticmethod
    def display_time(day_str, str_fmt="%Y-%m-%d %H:%M:%S"):
        # 设置中文编码格式
        # import locale
        # locale.setlocale(locale.LC_CTYPE, 'chinese')
        zh_fmt = '%Y年%m月%d日'
        # en_fmt = '%B %d, %Y'
        return time.strftime(zh_fmt, time.strptime(str(day_str), str_fmt))

    def get_currency(self):
        if self.kwargs.get("currency") == 'USD':
            return "$"
        return "￥"

    def get_temple_number(self, number):
        data = [
            self.fill(number * 1.06),
            self.fill(number),
            self.fill(0),
            self.fill(number * 0.06),
        ]
        return data

    @staticmethod
    def check_path_creat(path):
        if not os.path.exists(path):
            os.mkdir(path)

class GenerateWordFromCSV(GenerateWord):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.csv_list = []
        self.total = 0
        self.credit_total = 0
        self.es_total = 0
        self.linked_dict = dict()
        self.filepath = list(self.kwargs['csv_list'])[0][:-4]
        self.init_csv_list()

    def init_csv_list(self):
        for i in self.kwargs['csv_list']:
            self.csv_list.append(os.path.join(data_dir, i))

    def data_mergence(self):
        df = pd.DataFrame()
        for csv in self.csv_list:
            csv_data = pd.read_csv(
                csv,
                sep=',',
                encoding='utf-8',
                header=None,
                names=csv_columns2,
                dtype=data_type)
            df = pd.concat([df, csv_data], axis=0)
        df.reset_index(drop=True)
        return df

    def get_data_form_csv(self):
        df = self.data_mergence()
        df['CostBeforeTax'] = df['CostBeforeTax'].apply(
            pd.to_numeric, errors='coerce').fillna(0.0)
        df['Credits'] = df['Credits'].apply(
            pd.to_numeric, errors='coerce').fillna(0.0)
        df['TaxAmount'] = df['TaxAmount'].apply(
            pd.to_numeric, errors='coerce').fillna(0.0)
        df['TotalCost'] = df['TotalCost'].apply(
            pd.to_numeric, errors='coerce').fillna(0.0)
        lineitem_df = df[((df['RecordType'] == 'LinkedLineItem')
                          & (df['ItemDescription'] != '11-11 promotion for new-signup 3')
                          & (df['ItemDescription'] != '11-11 promotion for new-signup')
                          & (df['ItemDescription'] != 'PD 7180')
                          & (df['ItemDescription'] != 'PD 7398')
                          & (df['ProductName'] != 'Amazon Premium Support'))]
        es_df = df[(df['RecordType'] == 'LinkedLineItem') & (
            df['ProductName'] == 'Amazon Premium Support')]
        credit_data = df[(df['RecordType'] == 'LinkedLineItem')
                         & ((df['ItemDescription'] == '11-11 promotion for new-signup 3')
                            | (df['ItemDescription'] == '11-11 promotion for new-signup')
                            | (df['ItemDescription'] == 'PD 7180')
                            | (df['ItemDescription'] == 'PD 7398'))
                         ]
        self.get_es_total(es_df)
        self.get_credit_total(credit_data)
        self.get_linkedaccount_dict(lineitem_df)
        print('消费总额为{}'.format(self.total))
        print('Credit总额为{}'.format(self.credit_total))
        print('ES总额为{}'.format(self.es_total))
        self.save_mapping_date_to_csv()
        for i in self.linked_dict:
            self.linked_dict[i]['AWS Support (Enterprise)'] = \
                self.linked_dict[i]['TotalCost'] / self.total * self.es_total
            self.linked_dict[i]['AWS Credits'] = self.linked_dict[i]['TotalCost'] / \
                self.total * self.credit_total
            self.create_word(i)

    def get_credit_total(self, data):
        for i in range(data.shape[0]):
            self.credit_total += float(data.iloc[i]['Credits'])

    def get_es_total(self, data):
        for i in range(data.shape[0]):
            self.es_total += float(data.iloc[i]['CostBeforeTax'])

    def get_linkedaccount_dict(self, data):
        linked_set = set(data['LinkedAccountId'])
        for i in range(data.shape[0]):
            self.total += float(data.iloc[i]['CostBeforeTax'])
        for i in linked_set:
            linked_data = data[data['LinkedAccountId'] == i]
            product_set = set(data['ProductName'])
            linked_dict = {
                'InvoiceID': linked_data.iloc[0]['InvoiceID'],
                'TotalCost': linked_data['CostBeforeTax'].sum(),
            }
            for j in product_set:
                linked_dict[j] = linked_data[linked_data['ProductName']
                                             == j]['CostBeforeTax'].sum()
            if i in self.linked_dict.keys():
                for k, v in linked_dict.items():
                    if not k == 'InvoiceID':
                        product_num = self.linked_dict[i].get(k, 0)
                        self.linked_dict[i][k] = float(product_num) + float(v)
            else:
                self.linked_dict[i] = linked_dict

    @staticmethod
    def get_template():
        return os.path.join(info_dir, 'china-template-2.docx')

    def create_word(self, linked):
        filedate, noticedate, due_data = from_filename_get_datetime(self.filepath)
        result = self.linked_dict.get(linked, None)
        if not result:
            print('{}数据不存在'.format(linked))
        invoiceid = result.pop('InvoiceID')
        totalcost = result.pop('TotalCost')
        product_code_dict = defaultdict()
        for k, v in result.items():
            product_code_dict[k] = super().get_temple_number(v)
        totalcost_li = self.get_temple_number(totalcost)
        date = self.get_date_array(filedate)
        for i in range(len(date)):
            date[i] = self.display_time(date[i], str_fmt="%Y-%m-%d")
        xx = self.get_template()
        template = DocxTemplate(xx)
        content = {
            'storage': self.get_storage(linked),
            'linkedid': linked,
            'noticenumber': invoiceid,
            'noticedate': noticedate,
            'currency': self.get_currency(),
            'region_display': '北京区资源费',
            'totalcost': totalcost_li,
            'product_code_dict': product_code_dict,
            'date': date,
            'workday': due_data,
        }
        file_name = '{}{}_AWS账单{}.docx'.format(
            self.get_storage(linked), linked, filedate)
        doc_file_path = os.path.join(word_dir, self.filepath)
        self.check_path_creat(doc_file_path)
        doc_file_name = os.path.join(doc_file_path, file_name)
        template.render(context=content)
        template.save(doc_file_name)
        print('{}{}_AWS账单{}.docx  生成完成'.format(self.get_storage(linked), linked, filedate))

    @staticmethod
    def get_storage(linkedid):
        for k, v in bwm_map.items():
            if linkedid in v:
                return k
        return ''

    def get_temple_number(self, num):
        data = [
            self.fill(
                ((num / self.total * self.es_total) + num + num / self.total * self.credit_total) * 1.06),
            self.fill(
                (num / self.total * self.es_total) + num),
            self.fill(
                num / self.total * self.credit_total),
            self.fill(
                ((num / self.total * self.es_total) + num + num / self.total * self.credit_total) * 0.06),
        ]
        return data

    def save_mapping_date_to_csv(self):
        filename = os.path.join(
            alter_csv_dir, '{}_mapping.csv'.format(self.filepath))
        with open(filename, 'w', encoding='utf-8') as w:
            w.write(','.join('"{}"'.format(x) for x in ['mapping', 'LinkedAccountId', 'UnBlendedCost']) + '\n')
            for key, value in self.linked_dict.items():
                mapping = ''
                for k, v in bwm_map.items():
                    if key in v:
                        mapping = k
                UnBlendedCost = '{:.6f}'.format((value['TotalCost'] * 1.06) +
                                                (value['TotalCost'] / self.total * self.es_total) +
                                                (value['TotalCost'] / self.total * self.es_total * 0.06) +
                                                (value['TotalCost'] / self.total * self.credit_total) +
                                                (value['TotalCost'] / self.total * self.credit_total * 0.06))
                w.write(','.join('"{}"'.format(x) for x in [mapping, key, UnBlendedCost]) + '\n')


if __name__ == '__main__':
    # 获取输入执行时输入参数
    month = '2024-06'
    if len(sys.argv) > 1:
        month = sys.argv[1]
    print('执行月份:' + month)
    csv_list = ['785781180852-aws-cost-allocation-ACTS-'+month+'.csv',
        '785781180852-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv',]
    # 1.1 循环附件名
    for csv in csv_list:
        # print(os.path.isfile('./'+csv))
        # 1.1.1附件不存在，则拉取
        if not os.path.isfile('./' + csv):
            print('附件不存在:', csv)
            auto_download_file_form_s3([csv])

    csv_typle = (
        '785781180852-aws-cost-allocation-ACTS-'+month+'.csv',
        '785781180852-aws-cost-allocation-ACTS-Ningxia-'+month+'.csv',
    )
    CustomCsvByDay(csv_typle).create_new_csv()
    GenerateWordFromCSV(csv_list=csv_typle).get_data_form_csv()