Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
M
mancheng
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Administrator
mancheng
Commits
75213564
Commit
75213564
authored
Mar 18, 2024
by
dong
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix
parent
4c111fde
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
143 additions
and
125 deletions
+143
-125
apps/models.py
+15
-0
apps/utils/industry_enterprise_num.py
+1
-1
test.py
+127
-124
No files found.
apps/models.py
View file @
75213564
...
@@ -678,6 +678,21 @@ class City(BaseModel, db.Model):
...
@@ -678,6 +678,21 @@ class City(BaseModel, db.Model):
flag
=
db
.
Column
(
db
.
Integer
,
comment
=
'数据标识 1整年数据,2阶段数据'
)
# 人口
flag
=
db
.
Column
(
db
.
Integer
,
comment
=
'数据标识 1整年数据,2阶段数据'
)
# 人口
# 招商驾驶舱-开发区经济指标表
class
KfqEco
(
BaseModel
,
db
.
Model
):
__tablename__
=
"kfq_eco"
__table_args__
=
({
'comment'
:
'招商驾驶舱-开发区经济指标表'
})
# 添加表注释
id
=
db
.
Column
(
db
.
Integer
,
primary_key
=
True
,
autoincrement
=
True
,
comment
=
'经济指标主键id'
)
xzqsy
=
db
.
Column
(
db
.
Float
,
comment
=
'新增企事业单位中长期贷款增速:
%
'
)
# 区县面积
gyydlzs
=
db
.
Column
(
db
.
Float
,
comment
=
'工业用电量增速:
%
'
)
# 区县面积
yysrzs
=
db
.
Column
(
db
.
Float
,
comment
=
'营业收入增速:
%
'
)
# 区县面积
yysrzl
=
db
.
Column
(
db
.
Float
,
comment
=
'营业收入增量:万元'
)
# 年份
jckzs
=
db
.
Column
(
db
.
Float
,
comment
=
'进出口总值增速:
%
'
)
# 人口
sjsyxmff
=
db
.
Column
(
db
.
String
(
10
),
comment
=
'实际使用内资项目赋分:分值'
)
# GDP(万元)
sjsywzzs
=
db
.
Column
(
db
.
Float
,
comment
=
'实际使用外资增速:
%
'
)
# 规上工业增加值
sjsywzzl
=
db
.
Column
(
db
.
Float
,
comment
=
'实际使用外资增量:万元'
)
# 固定投资资产
# 招商驾驶舱-各区县视频
# 招商驾驶舱-各区县视频
class
Video
(
db
.
Model
):
class
Video
(
db
.
Model
):
__tablename__
=
"video"
__tablename__
=
"video"
...
...
apps/utils/industry_enterprise_num.py
View file @
75213564
...
@@ -109,7 +109,7 @@ def get_product_li(product, chain_id):
...
@@ -109,7 +109,7 @@ def get_product_li(product, chain_id):
def
handle
():
def
handle
():
with
app
.
app_context
():
with
app
.
app_context
():
# industry_obj_list = IndustryChain.query.all()
# industry_obj_list = IndustryChain.query.all()
industry_obj_list
=
IndustryChain
.
query
.
filter
(
IndustryChain
.
chain_id
==
9
)
.
all
()
industry_obj_list
=
IndustryChain
.
query
.
filter
(
IndustryChain
.
chain_id
==
1
)
.
all
()
# print(industry_obj_list)
# print(industry_obj_list)
# 读取industry_chain数据表的产业名称
# 读取industry_chain数据表的产业名称
for
industry_obj
in
industry_obj_list
:
for
industry_obj
in
industry_obj_list
:
...
...
test.py
View file @
75213564
import
pandas
as
pd
# import pandas as pd
# 链接数据库
# # 链接数据库
import
pymysql
# import pymysql
"""# 第一列是产业链名称和id单独保存,用作匹配完成后的赋值
# """# 第一列是产业链名称和id单独保存,用作匹配完成后的赋值
# 第二列读取到二级产业,然后遍历企业表去匹配企业“企业名称”、“所属行业”、“经营范围”、“企业简介”、“主营产品”
# # 第二列读取到二级产业,然后遍历企业表去匹配企业“企业名称”、“所属行业”、“经营范围”、“企业简介”、“主营产品”
# 如果匹配到了,就把二级产业进行保存,然后读取第三列的三级产业去匹配,以此类推
# # 如果匹配到了,就把二级产业进行保存,然后读取第三列的三级产业去匹配,以此类推
# 如果为空pass,读取下一个产业链数据
# # 如果为空pass,读取下一个产业链数据
# 如果有值把产业环节列表的数据填充到product_all,把产业链id保存到列表里"""
# # 如果有值把产业环节列表的数据填充到product_all,把产业链id保存到列表里"""
#
#
class
ChainTag
:
# class ChainTag:
def
__init__
(
self
):
# def __init__(self):
self
.
db
=
pymysql
.
connect
(
# self.db = pymysql.connect(
host
=
'rm-8vbn50m65w332c23aso.mysql.zhangbei.rds.aliyuncs.com'
,
# host='rm-8vbn50m65w332c23aso.mysql.zhangbei.rds.aliyuncs.com',
user
=
'root'
,
# user='root',
password
=
'Root@2020'
,
# password='Root@2020',
db
=
'industry_chain_online'
,
# db='industry_chain_online',
charset
=
'utf8'
# charset='utf8'
)
# )
self
.
cur
=
self
.
db
.
cursor
()
# self.cur = self.db.cursor()
#
# 获取某个产业链的id
# # 获取某个产业链的id
def
get_chain_id
(
self
,
chain_name
):
# def get_chain_id(self, chain_name):
try
:
# try:
sql
=
"SELECT nid FROM navigation WHERE name='{}';"
.
format
(
chain_name
)
# sql = "SELECT nid FROM navigation WHERE name='{}';".format(chain_name)
self
.
cur
.
execute
(
sql
)
# self.cur.execute(sql)
chain_id
=
self
.
cur
.
fetchone
()
# chain_id = self.cur.fetchone()
return
chain_id
[
0
]
# return chain_id[0]
except
:
# except:
return
''
# return ''
#
# 合并要匹配的企业的相关字段,然后匹配
# # 合并要匹配的企业的相关字段,然后匹配
def
get_company_info
(
self
,
com_id
):
# def get_company_info(self, com_id):
try
:
# try:
sql
=
'SELECT company_name,company_industry,business_scope,short_info FROM `enterprise` WHERE id={};'
.
format
(
com_id
)
# sql = 'SELECT company_name,company_industry,business_scope,short_info FROM `enterprise` WHERE id={};'.format(com_id)
self
.
cur
.
execute
(
sql
)
# self.cur.execute(sql)
company_name
,
company_industry
,
business_scope
,
short_info
=
self
.
cur
.
fetchone
()
# company_name, company_industry, business_scope, short_info = self.cur.fetchone()
info
=
company_name
+
company_industry
+
business_scope
+
short_info
# info = company_name + company_industry + business_scope + short_info
return
info
# return info
except
:
# except:
return
''
# return ''
#
def
match_indu
(
self
,
indu_li
,
chain_id_li
,
indu_name
,
company_info
,
chain_id
):
# def match_indu(self, indu_li, chain_id_li, indu_name, company_info, chain_id):
try
:
# try:
# 拿产业名称模糊匹配
# # 拿产业名称模糊匹配
if
indu_name
and
indu_name
in
company_info
:
# 如果匹配上保存环节名称和产业链id,跳出该产业链表读取下一个产业链文件
# if indu_name and indu_name in company_info: # 如果匹配上保存环节名称和产业链id,跳出该产业链表读取下一个产业链文件
indu_li
.
append
(
indu_name
)
# indu_li.append(indu_name)
if
chain_id
not
in
chain_id_li
:
# if chain_id not in chain_id_li:
chain_id_li
.
append
(
chain_id
)
# chain_id_li.append(chain_id)
return
True
# return True
return
False
# return False
except
:
# except:
return
False
# return False
#
def
read_excel
(
self
,
i
):
# def read_excel(self, i):
# 读取产业词表
# # 读取产业词表
path_li
=
[
'产业链文件/硬质合金(处理后).xlsx'
,
'产业链文件/不锈钢(处理后).xlsx'
]
# path_li = ['产业链文件/硬质合金(处理后).xlsx', '产业链文件/不锈钢(处理后).xlsx']
data
=
pd
.
read_excel
(
path_li
[
i
])
# data = pd.read_excel(path_li[i])
return
data
# return data
#
def
make_tag
(
self
,
company_info
):
# def make_tag(self, company_info):
chain_name1
=
''
# chain_name1 = ''
excel_id
=
0
# excel_id = 0
chain_id_li
=
[]
# chain_id_li = []
#
while
True
:
# while True:
if
excel_id
>
1
:
# 文件遍历结束,该企业匹配结束,进行打标
# if excel_id > 1: # 文件遍历结束,该企业匹配结束,进行打标
sql
=
"UPDATE `enterprise` SET indu_id_list='{}' WHERE id={};"
.
format
(
chain_id_li
,
com_id
)
# sql = "UPDATE `enterprise` SET indu_id_list='{}' WHERE id={};".format(chain_id_li, com_id)
self
.
cur
.
execute
(
sql
)
# self.cur.execute(sql)
self
.
db
.
commit
()
# self.db.commit()
break
# break
#
chain_id
=
''
# chain_id = ''
indu_li
=
[]
# indu_li = []
data
=
self
.
read_excel
(
excel_id
)
# data = self.read_excel(excel_id)
for
i
in
range
(
len
(
data
)):
# for i in range(len(data)):
chain_name
=
data
.
loc
[
i
,
"一级实体"
]
# 获取产业链名称
# chain_name = data.loc[i, "一级实体"] # 获取产业链名称
#
if
chain_name
!=
chain_name1
:
# 避免重复获取产业链id
# if chain_name != chain_name1: # 避免重复获取产业链id
chain_id
=
self
.
get_chain_id
(
chain_name
)
# 获取产业链id并保存
# chain_id = self.get_chain_id(chain_name) # 获取产业链id并保存
if
not
chain_id
:
# if not chain_id:
print
(
'{} 产业链不存在,请核实!'
.
format
(
chain_name
))
# print('{} 产业链不存在,请核实!'.format(chain_name))
excel_id
+=
1
# excel_id += 1
break
# break
chain_name1
=
chain_name
# chain_name1 = chain_name
pass
# pass
#
# 获取第二列的下级产业链
# # 获取第二列的下级产业链
indu_name
=
data
.
loc
[
i
,
"二级实体"
]
# 获取产业链名称
# indu_name = data.loc[i, "二级实体"] # 获取产业链名称
# # 合并要匹配的企业的相关字段,然后匹配
# # # 合并要匹配的企业的相关字段,然后匹配
# 拿产业名称模糊匹配
# # 拿产业名称模糊匹配
res
=
self
.
match_indu
(
indu_li
,
chain_id_li
,
indu_name
,
company_info
,
chain_id
)
# res = self.match_indu(indu_li, chain_id_li, indu_name, company_info, chain_id)
if
res
:
# if res:
break
# 跳出当前产业链,匹配下一个文件的产业链
# break # 跳出当前产业链,匹配下一个文件的产业链
# 如果没有匹配上就循环下一级进行匹配
# # 如果没有匹配上就循环下一级进行匹配
indu_name
=
data
.
loc
[
i
,
"三级实体"
]
# 获取产业链名称
# indu_name = data.loc[i, "三级实体"] # 获取产业链名称
res
=
self
.
match_indu
(
indu_li
,
chain_id_li
,
indu_name
,
company_info
,
chain_id
)
# res = self.match_indu(indu_li, chain_id_li, indu_name, company_info, chain_id)
if
res
:
# if res:
break
# 跳出当前产业链,匹配下一个文件的产业链
# break # 跳出当前产业链,匹配下一个文件的产业链
indu_name
=
data
.
loc
[
i
,
"四级实体"
]
# 获取产业链名称
# indu_name = data.loc[i, "四级实体"] # 获取产业链名称
res
=
self
.
match_indu
(
indu_li
,
chain_id_li
,
indu_name
,
company_info
,
chain_id
)
# res = self.match_indu(indu_li, chain_id_li, indu_name, company_info, chain_id)
if
res
:
# if res:
break
# 跳出当前产业链,匹配下一个文件的产业链
# break # 跳出当前产业链,匹配下一个文件的产业链
indu_name
=
data
.
loc
[
i
,
"五级实体"
]
# 获取产业链名称
# indu_name = data.loc[i, "五级实体"] # 获取产业链名称
res
=
self
.
match_indu
(
indu_li
,
chain_id_li
,
indu_name
,
company_info
,
chain_id
)
# res = self.match_indu(indu_li, chain_id_li, indu_name, company_info, chain_id)
if
res
:
# if res:
break
# 跳出当前产业链,匹配下一个文件的产业链
# break # 跳出当前产业链,匹配下一个文件的产业链
# 如果产业匹配结束,indu_li为空,说明没有匹配上,跳出当前产业链,匹配下一个文件的产业链
# # 如果产业匹配结束,indu_li为空,说明没有匹配上,跳出当前产业链,匹配下一个文件的产业链
excel_id
+=
1
# excel_id += 1
continue
# continue
#
#
if
__name__
==
'__main__'
:
# if __name__ == '__main__':
chain
=
ChainTag
()
# chain = ChainTag()
#
# 合并要匹配的企业的相关字段,然后匹配
# # 合并要匹配的企业的相关字段,然后匹配
com_id
=
16
# com_id = 16
while
True
:
# while True:
com_id
+=
1
# com_id += 1
company_info
=
chain
.
get_company_info
(
com_id
)
# company_info = chain.get_company_info(com_id)
company_info
=
'钨矿开采北京新源博艺文化发展有限公司其他体育组织文化艺术交流活动(不含演出);模型设计;产品设计;销售教学用模型、模具;电子产品、通讯设备、计算机软硬件及外围设备、五金交电(不含电动自行车)、建筑材料、机械设备、化工产品(不含危险化学品)、塑料制品、服装鞋帽、金属制品、矿产品、钢材、家具;技术推广服务;园林绿化服务;承办展览展示;工程设计。(企业依法自主选择经营项目,开展经营活动;依法须经批准的项目,经相关部门批准后依批准的内容开展经营活动;不得从事本市产业政策禁止和限制类项目的经营活动。)北京新源博艺文化发展有限公司 我公司是一家专业从事军事礼品及高精密仿真模型研发、生产、销售集一体的综合企业。公司座落于北京中关村科技园区上地信息产业基地东。公司有近10年从事军事模型、军事礼品、军事纪念品设计、研发、生产制造经验。 公司自设生产基地位于河北省霸州市厂房占地1500多平米,配备了全套的模型模具加工设备及流水装配生产线。 秉承企业“传承精工品质”的核心理念,不断吸纳更多专业人才和成熟管理模式。“艰苦坚实、诚信承诺、实干实效”:以艰苦的作风打拼坚实的企业基础;以诚实的信念承诺一流的企业服务;实干的精神创造高效的企业来服务我们的客户! 远大的理想,美好的明天,正是新源博艺前进的源动力,展望未来,新源博艺诚邀各界有识之士共同合作、携手并进,共创远大、美好的前程! 主营产品:仿真车模型、军事模型、军事礼品、军事纪念品、导弹车模型、飞机模型、坦克模型、军舰模型、航姆模型、部队礼品、退伍礼品、男士礼品、外事礼品、81军事模型'
# company_info = '钨矿开采北京新源博艺文化发展有限公司其他体育组织文化艺术交流活动(不含演出);模型设计;产品设计;销售教学用模型、模具;电子产品、通讯设备、计算机软硬件及外围设备、五金交电(不含电动自行车)、建筑材料、机械设备、化工产品(不含危险化学品)、塑料制品、服装鞋帽、金属制品、矿产品、钢材、家具;技术推广服务;园林绿化服务;承办展览展示;工程设计。(企业依法自主选择经营项目,开展经营活动;依法须经批准的项目,经相关部门批准后依批准的内容开展经营活动;不得从事本市产业政策禁止和限制类项目的经营活动。)北京新源博艺文化发展有限公司 我公司是一家专业从事军事礼品及高精密仿真模型研发、生产、销售集一体的综合企业。公司座落于北京中关村科技园区上地信息产业基地东。公司有近10年从事军事模型、军事礼品、军事纪念品设计、研发、生产制造经验。 公司自设生产基地位于河北省霸州市厂房占地1500多平米,配备了全套的模型模具加工设备及流水装配生产线。 秉承企业“传承精工品质”的核心理念,不断吸纳更多专业人才和成熟管理模式。“艰苦坚实、诚信承诺、实干实效”:以艰苦的作风打拼坚实的企业基础;以诚实的信念承诺一流的企业服务;实干的精神创造高效的企业来服务我们的客户! 远大的理想,美好的明天,正是新源博艺前进的源动力,展望未来,新源博艺诚邀各界有识之士共同合作、携手并进,共创远大、美好的前程! 主营产品:仿真车模型、军事模型、军事礼品、军事纪念品、导弹车模型、飞机模型、坦克模型、军舰模型、航姆模型、部队礼品、退伍礼品、男士礼品、外事礼品、81军事模型'
if
not
company_info
:
# if not company_info:
continue
# continue
chain
.
make_tag
(
company_info
)
# chain.make_tag(company_info)
# SELECT company_name, status, legal, capital, capital_nums, capital_id, build_date, yearid, province, city, district, lng, lat, c_lng, c_lat, d_lng, d_lat, address, telephone, telephone_more, email, social_code, tax_code, register_code, company_code, bao_num, entype, entypeid, company_industry, web_site, business_scope, register_org, money_type, money_type_id, high_new, parti_year, tbe, tbe_sjmy, fianacing, fianacing_rounds, roundid, financing_amount, software_copyright, num_software, public_sector, quoted_company, public_id, foreign_investment, patent, num_patent, company_info, unicorn, isfive, takingn, product_all, scale, serve, c_name, c_type, f_name, f_type, hots, zjtg, zjtg_gjjxjr, zjtg_sjxjr
# FROM company WHERE c_type=1;
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment