Commit 6634b3fb by Jialei Yang

init git project

parents
######################
# Intellij
######################
.idea/
*.iml
*.iws
*.ipr
*.ids
*.orig
######################
# Mac OSX
######################
.DS_Store
.svn
._*
.Spotlight-V100
.Trashes
######################
# Logs
######################
*.log
*.log.gz
*.tmp
######################
# Project Specific
######################
*build/
*out/
*classes/
######################
# Gradle
######################
.gradle/
######################
# Flyway Files
######################
flyway/jars/*.jar
######################
# Others
######################
*.class
*.*~
*~
.merge_file*
.DS_Store
######################
# Gradle Wrapper
######################
!gradle/wrapper/gradle-wrapper.jar
######################
# Python
######################
*.pyc
######################
# environment
######################
venv/
######################
# reports
######################
/results
{
"cells": [],
"metadata": {},
"nbformat": 4,
"nbformat_minor": 2
}
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df=pd.read_csv(\"results/hs_rule_regression_re2.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style>\n",
" .dataframe thead tr:only-child th {\n",
" text-align: right;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th>conclusion</th>\n",
" <th>FAILED</th>\n",
" <th>MANUAL</th>\n",
" <th>PASSED</th>\n",
" </tr>\n",
" <tr>\n",
" <th>is_overdue</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2877</td>\n",
" <td>7</td>\n",
" <td>1524</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2283</td>\n",
" <td>7</td>\n",
" <td>1197</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
"conclusion FAILED MANUAL PASSED\n",
"is_overdue \n",
"0 2877 7 1524\n",
"1 2283 7 1197"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.pivot_table(df,index='is_overdue',columns='conclusion',values='loan_app_id',aggfunc='count')"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"failed_df=df.loc[df.conclusion=='FAILED']"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"scrolled": false
},
"outputs": [
{
"data": {
"text/plain": [
"CONTACT_IN_PHONE_LOG 1099\n",
"OVERDUE_DAY 451\n",
"PHONE_SMS_LOG 382\n",
"CONTACT_SHARE 167\n",
"PHONE_LOG_COUNT 98\n",
"CHECK_SMS_LOG 34\n",
"IMEI_SHARE 21\n",
"BANKCARD_SHARE 9\n",
"CHECK_PHONE_LOG 8\n",
"OVERDUE 4\n",
"CONTACT_APPLY_COUNT 3\n",
"KTP_VALIDATE 3\n",
"KTP_SHARE 2\n",
"AGE_VALIDATE 2\n",
"Name: rule, dtype: int64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"failed_df.loc[failed_df.is_overdue==1].rule.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"CONTACT_IN_PHONE_LOG 2474\n",
"OVERDUE_DAY 1136\n",
"PHONE_SMS_LOG 602\n",
"CONTACT_SHARE 282\n",
"PHONE_LOG_COUNT 214\n",
"KTP_SHARE 137\n",
"CHECK_SMS_LOG 107\n",
"OVERDUE 56\n",
"BANKCARD_SHARE 45\n",
"IMEI_SHARE 42\n",
"CHECK_PHONE_LOG 33\n",
"AGE_VALIDATE 13\n",
"KTP_VALIDATE 8\n",
"CONTACT_APPLY_COUNT 6\n",
"REJECTED 5\n",
"Name: rule, dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"failed_df.rule.value_counts()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.1"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
# -*- coding: utf-8 -*-
import logging
def get_logger(name):
# 创建一个loggerA
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
# 创建一个handler,用于写入日志文件
fh = logging.FileHandler('log.log')
fh.setLevel(logging.DEBUG)
# 再创建一个handler,用于输出到控制台
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
# 定义handler的输出格式
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
ch.setFormatter(formatter)
# 给logger添加handler
logger.addHandler(fh)
logger.addHandler(ch)
return logger
import time
def timeit(func):
def wrap(*args):
start_time = time.time()
result = func(*args)
print("%s running time: %2.2f sec" % (func.__name__, time.time() - start_time))
return result
wrap.unwrapped = func
return wrap
mx_database = dict(
host='149.129.224.81',
port=5437,
user='mxbkro',
password='mangxingbkdb!',
database='mangxing'
)
demo_database = dict(
host='13.250.16.94',
port=5555,
user='sulu',
password='14f80acc',
database='sulu'
)
mango_database = dict(
host='rm-k1a8cq8273mqoe4rbfo.pgsql.ap-southeast-5.rds.aliyuncs.com',
port=3432,
user='mangocashro',
password='297BCE2D',
database='mangocash'
)
flash_database = dict(
host='rm-k1az54c65p7s67u987o.pgsql.ap-southeast-5.rds.aliyuncs.com',
port=3432,
user='pinjamanflashro',
password='AF7FF6BC46D9',
database='pinjamanflash'
)
huishi_database=dict(
host='149.129.219.2',
port=5432,
user='risk_huishishuju',
password='!qaz<lo92018',
database='risk_huishishuju'
)
db = huishi_database
risk_url = 'http://localhost:8891/review'
import psycopg2
from config import db
connection = psycopg2.connect(**db)
connection.set_session(autocommit=True)
from common import logger
from . import connection
logger = logger.get_logger(__name__)
class DbUtils:
# pg
def __init__(self):
self.cur = connection.cursor()
def close_cursor(self):
self.cur.close()
def insert(self, sql, args):
try:
self.cur.execute(sql, args)
self.connection.commit()
except Exception as e:
logger.error('========> SQL[%s] insert error: %s', sql, str(e))
def select(self, sql):
records = []
try:
self.cur.execute(sql)
records = self.cur.fetchall()
except Exception as e:
logger.error('========> SQL[%s] select error: %s', sql, str(e))
return records
def get_columns_last_query(self):
return [desc[0] for desc in self.cur.description]
def select_with_column(self, sql):
results = self.select(sql)
cols = self.get_columns_last_query()
return [dict(zip(cols, row)) for row in results]
from service.loan_dao import LoanDao
from service.rule_regression import rule_regression
import pandas as pd
if __name__ == '__main__':
regression_results_df = rule_regression(reapply_count=2)
regression_results_df.to_csv("results/%s_rule_regression_re2.csv" % 'hs')
from core.db_util import DbUtils
from common import logger
logger = logger.get_logger(__name__)
class LoanDao(object):
@classmethod
def get_reapply_loan(cls, reapply_count):
sql = """select t_loan_app.id as loan_app_id,t_loan_app.status loan_status,imei,t_loan_app_status_log.create_time as prereview_time,
t_customer.id as customer_id,t_customer.mobile as mobile,foo.is_overdue is_overdue
from (
select customer_id,
count(1) loan_with_result,
sum(case when t_loan_app.status in ('GRACE_PERIOD','OVERDUE') then 1 else 0 end) as is_overdue,
sum(case when t_loan_app.status='PAID_OFF' then 1 else 0 end) as paid_off_count,
sum(case when t_loan_app.status='PAID_OFF' and t_loan_app.sub_status<>'M0' then 1 else 0 end) as paid_off_m1_count,
max(case when t_loan_app.status='PAID_OFF' then id else null end) latest_paid_off_id,
max(id) latest_loan_id
from t_loan_app
where t_loan_app.status in ('GRACE_PERIOD','OVERDUE','PAID_OFF')
group by customer_id) foo
left join t_loan_app on foo.latest_loan_id=t_loan_app.id
left join t_loan_app_status_log
on t_loan_app.id=t_loan_app_status_log.loan_app_id and t_loan_app_status_log.old_status='PRE_REVIEW'
left join t_customer on t_loan_app.customer_id=t_customer.id
where foo.loan_with_result>=%d ;
""" % reapply_count
db = DbUtils()
results = db.select_with_column(sql)
return results
from .loan_dao import LoanDao
import requests
from config import risk_url
from common import logger
import json
import pandas as pd
logger = logger.get_logger(__name__)
def call_risk_api(loan_app_id):
params = {'loanAppId': loan_app_id}
try:
response = requests.get(risk_url, params=params)
result = json.loads(response.text)
return result
except Exception as e:
logger.error('call_risk_api error,%s,%s', loan_app_id, response, exc_info=True)
return None
def rule_regression(reapply_count=2):
regression_results = []
count = 0
for row in LoanDao.get_reapply_loan(reapply_count=reapply_count):
try:
loan_app_id = row.get('loan_app_id')
customer_id = row.get('customer_id')
is_overdue = row.get('is_overdue')
result = call_risk_api(loan_app_id)
conclusion = None
rule = None
if result:
conclusion = result.get('result')
rule = result.get('rule')
regression_result = [loan_app_id, customer_id, is_overdue, conclusion, rule]
regression_results.append(regression_result)
count += 1
except Exception as e:
logger.error('rule regression error:%s', loan_app_id, exc_info=True)
if count % 100 == 0:
logger.info('run regression %d', count)
regression_results_df = pd.DataFrame(regression_results,
columns=['loan_app_id', 'customer_id', 'is_overdue', 'conclusion', 'rule'])
return regression_results_df
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment