import requests
from flask import jsonify, current_app
from lxml import etree
from apps.models import *
from apps.utils.response_code import RET
from apps import db
from apps.view_xiaocx import api_xiaocx


# 数据通报
@api_xiaocx.route("/DataNotification", methods=["GET"])
def data_notification():
    page_html_list = [
        "index.shtml",  # 首页
        "index_1.shtml",  # 第2页
        "index_2.shtml",  # 第3页
        "index_3.shtml",  # 第4页
        "index_4.shtml",  # 第5页
        "index_5.shtml",  # 第6页
        "index_6.shtml",  # 第7页
    ]
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54'}

    data_list = []
    for page_index in page_html_list:
        url = "http://zsj.jcgov.gov.cn/sjtb/sjtb/{}".format(page_index)

        try:
            respose = requests.get(url, headers=headers)
            respose.encoding = 'utf-8'
            html_etree = ''
            if respose.status_code == 200:
                html_etree = etree.HTML(respose.text)
            # handeled_html_str = etree.tostring(html_etree).decode()
            # print(handeled_html_str)
            content_name_list = html_etree.xpath('//div[@class="newslist newslistdixx"]//span[@class="list_newstitle"]/a/text()')
            content_url_list = html_etree.xpath('//div[@class="newslist newslistdixx"]//span[@class="list_newstitle"]/a/@href')
            time_list = html_etree.xpath('//div[@class="newslist newslistdixx"]//span[2]/text()')

            i = 0
            for content_name in content_name_list:
                content_url = content_url_list[i].replace("./", 'http://zsj.jcgov.gov.cn/sjtb/sjtb/'),
                respose = requests.get(content_url[0], headers=headers)
                respose.encoding = 'utf-8'
                html_etree1 = ''
                if respose.status_code == 200:
                    html_etree1 = etree.HTML(respose.text)
                # handeled_html_str1 = etree.tostring(html_etree).decode()
                # print(handeled_html_str1)
                content = html_etree1.xpath('//table[1]')[2]
                result = etree.tostring(content, encoding='utf-8').decode()

                data_dic = {
                    "content_name": content_name,
                    "content": result,
                    "time": time_list[i][1:-1]
                }
                data_list.append(data_dic)
                i += 1
                continue
            continue
        except Exception as e:
            current_app.logger.error(e)
            return jsonify(code=RET.DBERR, msg="数据库错误")

    # 存入数据库
    for data in data_list:
        content_name = data['content_name']
        inform_obj = InvestmentInformation.query.filter_by(flag=2, name=content_name).first()
        if inform_obj:
            inform_obj.content = data['content']
            db.session.commit()
            continue
        inform_obj = InvestmentInformation()
        inform_obj.name = data['content_name']
        inform_obj.flag = 2
        inform_obj.content = data['content']
        inform_obj.time = data['time']
        inform_obj.source = '晋城市投资促进中心'
        inform_obj.info = ''
        inform_obj.time = data['time']
        db.session.add(inform_obj)
        db.session.commit()
    return jsonify(code=RET.OK, msg="入库成功", data=data_list)


# 工作动态
@api_xiaocx.route("/WorkTrend", methods=["GET"])
def work_trend():
    page_html_list = ["index.shtml"]
    for i in range(1, 25):
        html_index = 'index_{}.shtml'.format(i)
        page_html_list.append(html_index)

    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36 Edg/108.0.1462.54'}

    data_list = []
    for page_index in page_html_list:

        url = "http://zsj.jcgov.gov.cn/xwdt/zhxx/{}".format(page_index)

        try:
            respose = requests.get(url, headers=headers)
            respose.encoding = 'utf-8'
            if respose.status_code == 200:
                html_etree = etree.HTML(respose.text)
            # handeled_html_str = etree.tostring(html_etree).decode()
            # print(handeled_html_str)
            content_name_list = html_etree.xpath('//div[@class="newslist newslistdixx"]//span[@class="list_newstitle"]/a/text()')
            content_url_list = html_etree.xpath('//div[@class="newslist newslistdixx"]//span[@class="list_newstitle"]/a/@href')
            time_list = html_etree.xpath('//div[@class="newslist newslistdixx"]//span[2]/text()')

            i = 0
            for content_name in content_name_list:
                content_url = content_url_list[i].replace('./', 'http://zsj.jcgov.gov.cn/xwdt/zhxx/')
                content_html = requests.get(content_url, headers=headers)
                content_html.encoding = 'utf-8'
                if content_html.status_code == 200:
                    html_etree1 = etree.HTML(content_html.text)
                    if content_name == "《党委（党组）网络安全工作责任制实施办法》解读":
                        handeled_html_str = etree.tostring(html_etree1).decode()
                        print(handeled_html_str)
                    # content_info_list = html_etree1.xpath(
                    #     '//div[@class="view TRS_UEDITOR trs_paper_default trs_web"]/p/text()')

                    content_info_list = html_etree1.xpath(
                        '//div[@class="view TRS_UEDITOR trs_paper_default trs_web"]//text()')
                    if not content_info_list:
                        content_info_list = html_etree1.xpath(
                            '//div[@class="view TRS_UEDITOR trs_paper_default trs_word trs_web"]//text()')
                    if not content_info_list:
                        content_info_list = html_etree1.xpath(
                            '//div[@class="view TRS_UEDITOR trs_paper_default"]//@href')
                        if content_info_list and content_info_list[0].startswith('./'):
                            content_info_list = ['请复制链接到浏览器查看：' + 'http://zsj.jcgov.gov.cn/xwdt/zhxx/202301/' + content_info_list[0].replace('./', '')]
                        if content_info_list and content_info_list[0].startswith('http'):
                            content_info_list = ['请复制链接到浏览器查看：' + content_info_list[0]]


                    content_info1 = ''
                    for content_info in content_info_list:
                        content_info1 = content_info1 + '<br>' + content_info
                    data_dic = {
                        "content_name": content_name,
                        "content_info": content_info1,
                        "time": time_list[i][1:-1]
                    }
                    data_list.append(data_dic)
                    i += 1
                    continue
            continue
        except Exception as e:
            current_app.logger.error(e)
            return jsonify(code=RET.DBERR, msg="数据库错误")

    # # 存入数据库
    for data in data_list:
        content_name = data['content_name']
        inform_obj = InvestmentInformation.query.filter_by(flag=1, name=content_name).first()
        if inform_obj:
            inform_obj.content = data['content_info']
            db.session.commit()
            continue
        inform_obj = InvestmentInformation()
        inform_obj.name = data['content_name']
        inform_obj.flag = 1
        inform_obj.time = data['time']
        inform_obj.source = '晋城市投资促进中心'
        inform_obj.content = data['content_info']
        db.session.add(inform_obj)
        db.session.commit()
    return jsonify(code=RET.OK, msg="入库成功", data=data_list)

