Article Outline
Python mysql example 'basespider date parse'
Functions in program:
def date_unit(unit):
def date_scale(dt, scale='MM'):
def _parse(x, now=None):
def tz_offset(tz):
def parse_date(x, fmt='auto', tz='+08:00', err=None):
Modules used in program:
import re
python basespider date parse
Python mysql example: basespider date parse
# -*- coding: utf-8 -*-
# ! /usr/bin/env python
# !/usr/bin/env python
# -*- coding: utf-8 -*-
from datetime import datetime, timedelta
from collections import OrderedDict
import re
__all__ = ['parse_date', 'tz_offset']
def parse_date(x, fmt='auto', tz='+08:00', err=None):
"""
Parse datetime `x` with format `fmt` and timezone `tz`.
Return datetime in UTC
'tz' 支持类型为('+00:00','cst','utc')时间区域类型
:param x: datetime string
:type x: str
:param fmt: datetime format
:type fmt: str
:param tz: timezone
:type fmt: str
"""
try:
x = unicode(x)
fmt = unicode(fmt)
utcnow = datetime.utcnow()
offset = tz_offset(tz)
now = utcnow + offset
if fmt == 'auto':
date = _parse(x, now)
elif fmt in ['epoch', 'unix']:
date = datetime.utcfromtimestamp(int(x))
offset = timedelta(0)
else:
date = datetime.strptime(x.encode('utf-8'), fmt.encode('utf-8'))
date = (date + (offset - timedelta(hours=8)))
return date
except:
if err:
raise
return datetime.utcfromtimestamp(0)
# 转换对应时差时间格式
def tz_offset(tz):
tz = tz.lower().strip()
if tz == 'cst':
offset = timedelta(hours=8)
elif tz == 'utc':
offset = timedelta()
else:
res = re.search(r'(?P<F>[-+])(?P<HH>\d{2}):?(?P<MM>\d{2})', tz).groupdict()
offset = timedelta(
hours=int(res['HH']),
minutes=int(res['MM'])
) * (1 if res.get('F', '+') == '+' else -1)
return offset
def _parse(x, now=None):
# 当前时间
# now = now or datetime.utcnow()
# 秒
now_SS = date_scale(now, 'SS')
# 分
now_MM = date_scale(now, 'MM')
# 小时
now_HH = date_scale(now, 'HH')
# 天
now_dd = date_scale(now, 'dd')
# 月
now_mm = date_scale(now, 'mm')
# 年
now_YY = date_scale(now, 'YY')
# 预处理
x = re.sub(u'刚刚|刚才', now_MM.strftime('%Y-%m-%d %H:%M:%S'), x)
# x = re.sub(u'刚刚|刚才', now_MM.strftime('%F %T'), x)
x = re.sub(u'几', u'0', x)
x = re.sub(ur'(?<=[\d半前昨今明后])(天|号)', u'日', x)
# 获取一天时间
one_dd = date_unit('dd')
rdays = {
u'前日': now_dd - one_dd * 2,
u'昨日': now_dd - one_dd * 1,
u'今日': now_dd,
u'明日': now_dd + one_dd * 1,
u'后日': now_dd + one_dd * 2,
}
# 将x值转换成rdays对应时间格式
for k, v in rdays.iteritems():
x = x.replace(k, v.strftime(' %Y-%m-%d '))
# x = x.replace(k, v.strftime(' %F '))
x = re.sub(ur'(?<=\d)[/.](?=\d)', u'-', x)
x = re.sub(ur'[^-:\s\d前后半秒分时日周月年]', u'', x)
x = re.sub(ur'(?<=\d)\s+(?!\d)', u'', x)
x = re.sub(ur'(?<!\d)\s+(?=\d)', u'', x)
x = re.sub(ur'(?<!\d)\s+(?!\d)', u'', x)
x = re.sub(ur'(?<!年)(?=(^(1[0-2]|\d+))月(\d+)日)', u' %d年' % now.year, x)
x = re.sub(ur'(\d+)年(\d+)月(\d+)日', ur'\g<1>-\g<2>-\g<3> ', x)
x = x.strip()
if '-' in x or ':' in x:
parts = {}
pats = [
ur'(?P<year>\d{4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})',
ur'(?P<hour>\d{1,2}):(?P<minute>\d{1,2})(:(?P<second>\d{1,2}))?',
]
for p in pats:
m = re.search(p, x)
if m:
parts.update(m.groupdict())
for k, v in parts.items():
if v == None:
del parts[k]
else:
parts[k] = int(v)
if parts:
parts['year'] = parts.get('year', now.year)
parts['month'] = parts.get('month', now.month)
parts['day'] = parts.get('day', now.day)
return datetime(**parts)
if u'半' in x:
halves = {
u'半分': u'30秒',
u'半时': u'30分',
u'半日': u'12时',
u'半周': u'84时',
u'半月': u'15日',
u'半年': u'6月',
}
for k, v in halves.iteritems():
x = re.sub(k, v, x)
us = {
u'年': 'YY',
u'月': 'mm',
u'周': 'ww',
u'日': 'dd',
u'时': 'HH',
u'分': 'MM',
u'秒': 'SS',
}
m = re.search(ur'(?P<num>\d+)(?P<unit>%s)(?P<flag>前|后)' % (u'|'.join(us.keys())), x)
if m:
d = m.groupdict()
k = d['unit']
f = -1 if d['flag'] == u'前' else 1
v = f * int(d['num'])
u = date_unit(us[k])
s = 'dd' if us[k] == 'ww' else us[k]
date = date_scale(now + u * v, s)
return date
for i in re.findall(ur'(?<!\d)(\d{8}|\d{10}|\d{13})(?!\d)', x):
k = len(i)
v = int(i)
if k == 8:
date = datetime.strptime(i, '%Y%m%d')
elif k == 10:
date = datetime.fromtimestamp(v)
elif k == 13:
date = datetime.fromtimestamp(v / 1000)
else:
raise Exception()
return date
raise Exception()
def date_scale(dt, scale='MM'):
scales = OrderedDict([
('MS', 'microsecond'), # 微秒
('SS', 'second'), # 秒
('MM', 'minute'), # 分钟
('HH', 'hour'), # 小时
('dd', 'day'), # 天
('mm', 'month'), # 月
('YY', 'year'), # 年
])
assert scale in scales
for k, v in scales.iteritems():
if k == scale:
return dt
dt = dt.replace(**{v: 1 if k in ['dd', 'mm'] else 0})
raise Exception()
_units = dict(
SS=timedelta(seconds=1),
MM=timedelta(minutes=1),
HH=timedelta(hours=1),
dd=timedelta(days=1),
ww=timedelta(days=7),
mm=timedelta(days=30),
YY=timedelta(days=365)
)
def date_unit(unit):
return _units[unit]
if __name__ == '__main__':
xs = [
u'2014-01-01',
u'2014/11/01',
u'2014.12.01',
u'01:23',
u'01:23:45',
u'01 : 23 : 45',
u'2014-01-01 01:23',
u'2014-01-01 01:23:45',
u'今天',
u'昨天',
u'前天',
u'刚刚',
u'刚才',
u'几秒前',
u'5秒前',
u'5分钟前',
u'5小时前',
u'5天前',
u'5周前',
u'5年前',
u'5分钟后',
u'5小时后',
u'5天后',
u'5周后',
u'5年后',
u'半分钟前',
u'半小时前',
u'半天前',
u'半周前',
u'半月前',
u'半年前',
u'20140101',
u'20140101 012345',
u'1400641135',
u'1400641135000',
u'4月19号的预售,今天都5月21号了',
u'刚才 你去哪了?',
u'2014 年 1 月 1 日',
]
# print(datetime.utcfromtimestamp(0))
# for i, x in enumerate(xs,1):
# print('IN [%d]: %s' % (i, x))
# y = parse_date(x, 'auto', 'cst', True)
# print('OUT[%d]: %s [%s]' % (i, y, type(y).__name__))
# print
#
# print('>>>', parse_date('01012014080000', '%m%d%Y%H%M%S', '+08:00'))
print('>>>', parse_date('2014-01-01 01:23', tz='cst'))
Python links
- Learn Python: https://pythonbasics.org/
- Python Tutorial: https://pythonprogramminglanguage.com