Commit bb072bd3 by Nicolas Joyard

Commit initial

Structure générale + import des organes AN
parents
.env
*.egg-info
*.pyc
*~
data/*.json
reference
Requirements
- python 2.7
- virtualenv
Installation
- git clone
- cd parlapi
- virtualenv ve
- source ve/bin/activate
- pip install -e .
Utilisation:
- parlapi createdb
- parlapi update_organes_an
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import click
@click.group()
def cli():
pass
@cli.command(short_help=u'Exécute le serveur web flask intégré')
def runserver():
from .parlapi import app
app.run()
@cli.command(short_help=u'Crée ou met à jour le schéma BDD')
def createdb():
from .parlapi import app
from .models import db
with app.app_context():
db.create_all()
@cli.command(short_help=u'Met à jour les organes depuis l\'AN')
@click.option('--force', is_flag=True)
def update_organes_an(force):
from .parlapi import app
from .jobs.an_organes import run
with app.app_context():
run(app, force)
if __name__ == '__main__':
cli()
# -*- coding: utf-8 -*-
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
class Config(object):
DEBUG = False
SECRET_KEY = 'not secret'
SQLALCHEMY_DATABASE_URI = 'sqlite:////tmp/test.db'
SQLALCHEMY_TRACK_MODIFICATIONS = False
DATA_DIR = os.path.join(BASE_DIR, 'data')
class DebugConfig(Config):
DEBUG = True
CurrentConfig = DebugConfig
# -*- coding: utf-8 -*-
import dateparser
import ijson
from .base import BaseANJob
from ..models import Organe, Legislature, Regime
class ImportOrganesJob(BaseANJob):
cache_legislatures = {}
cache_regimes = {}
@property
def job_name(self):
return u'Import organes AN'
def __init__(self, app):
super(ImportOrganesJob, self).__init__(
app, '/acteurs/deputes-en-exercice')
def parse_json(self, filename, stream):
for organe_json in ijson.items(stream, 'export.organes.organe.item'):
self.save_organe(organe_json)
def get_regime(self, nom):
if nom not in self.cache_regimes:
self.cache_regimes[nom] = self.get_or_create(Regime, nom=nom)
return self.cache_regimes[nom]
def get_legislature(self, num):
if num not in self.cache_legislatures:
self.cache_legislatures[num] = self.get_or_create(Legislature,
numero=int(num))
return self.cache_legislatures[num]
def save_organe(self, json):
organe = self.get_or_create(Organe, id_an=json['uid'])
organe.type = json['codeType']
organe.libelle = json['libelle']
organe.libelle_court = json['libelleAbrege']
organe.abbreviation = json['libelleAbrev']
if json['viMoDe']['dateDebut']:
organe.date_debut = dateparser.parse(json['viMoDe']['dateDebut'])
else:
organe.date_debut = None
if json['viMoDe']['dateFin']:
organe.date_fin = dateparser.parse(json['viMoDe']['dateFin'])
else:
organe.date_fin = None
if json.get('regime', None):
organe.regime = self.get_regime(json['regime'])
if json.get('legislature', None):
organe.legislature = self.get_legislature(json['legislature'])
if organe.regime:
organe.legislature.regime = organe.regime
def run(app, force):
ImportOrganesJob(app).run(force)
# -*- coding: utf-8 -*-
from builtins import filter
from datetime import datetime
import logging
import os
from zipfile import ZipFile
from bs4 import BeautifulSoup
import dateparser
import requests
from ..models import db, Job
class BaseJob(object):
@property
def job_name(self):
raise NotImplementedError()
@property
def job(self):
if not self._job:
self._job = self.get_or_create(Job, nom=self.job_name)
return self._job
def __init__(self, app):
self.app = app
self._job = None
def debug(self, msg):
self.app.logger.debug('<%s> %s' % (self.job_name, msg))
def info(self, msg):
self.app.logger.info('<%s> %s' % (self.job_name, msg))
def warn(self, msg):
self.app.logger.warn('<%s> %s' % (self.job_name, msg))
def error(self, msg):
self.app.logger.error('<%s> %s' % (self.job_name, msg))
def get_or_create(self, model, **kwargs):
item = model.query.filter_by(**kwargs).first()
if not item:
item = model(**kwargs)
db.session.add(item)
return item
def update_status(self, status=None, file=None, filedate=None):
job = self.job
job.date_exec = datetime.now()
job.resultat = status or ''
if file:
job.url_fichier = file
if filedate:
job.date_fichier = filedate
db.session.commit()
class BaseANJob(BaseJob):
base_url = 'http://data.assemblee-nationale.fr'
def __init__(self, app, url):
super(BaseANJob, self).__init__(app)
self.url = 'http://data.assemblee-nationale.fr%s' % url
def parse_json(self, json_filename, json_stream):
raise NotImplementedError()
def run(self, ignore_lmd=False):
self.info(u'Téléchargement %s' % self.url)
soup = BeautifulSoup(requests.get(self.url).content, 'html5lib')
def match_link(a):
return a['href'].endswith('.json.zip')
try:
link = next(filter(match_link, soup.select('a[href]')))
except:
self.error(u'Lien vers dump JSON introuvable')
self.update_status('error:json-link')
return
jsonzip_url = link['href']
if jsonzip_url.startswith('/'):
jsonzip_url = '%s%s' % (self.base_url, jsonzip_url)
self.info(u'URL JSON zippé : %s' % jsonzip_url)
try:
lastmod = requests.head(jsonzip_url).headers['Last-Modified']
except:
self.error(u'Date du dump JSON introuvable')
self.update_status('error:json-lastmod')
return
self.info(u'Date modification JSON zippé: %s' % lastmod)
jsonzip_lmd = dateparser.parse(lastmod)
if not ignore_lmd:
if self.job.date_fichier and self.job.date_fichier >= jsonzip_lmd:
self.info(u'JSON zippé non modifié')
self.update_status('ok')
return
self.info(u'Téléchargement JSON zippé')
localzip = os.path.join(self.app.config['DATA_DIR'],
os.path.basename(jsonzip_url))
with open(localzip, 'wb') as out:
r = requests.get(jsonzip_url, stream=True)
for block in r.iter_content(1024):
out.write(block)
with ZipFile(localzip, 'r') as z:
for f in [f for f in z.namelist() if f.endswith('.json')]:
self.info(u'JSON extrait : %s' % f)
with z.open(f) as zf:
try:
self.parse_json(f, zf)
except Exception, e:
self.error(u'Erreur: %s' % e)
self.info(u'Job terminé')
self.update_status('ok', jsonzip_url, jsonzip_lmd)
# -*- coding: utf-8 -*-
from flask_sqlalchemy import SQLAlchemy
db = SQLAlchemy()
SHORT_STRING = 64
MEDIUM_STRING = 255
LARGE_STRING = 2048
class Job(db.Model):
__tablename__ = 'jobs'
id = db.Column(db.Integer, primary_key=True)
nom = db.Column(db.String(MEDIUM_STRING))
date_exec = db.Column(db.DateTime)
url_fichier = db.Column(db.String(LARGE_STRING))
date_fichier = db.Column(db.DateTime)
resultat = db.Column(db.String(MEDIUM_STRING))
class Regime(db.Model):
__tablename__ = 'regimes'
id = db.Column(db.Integer, primary_key=True)
nom = db.Column(db.String(MEDIUM_STRING))
class Legislature(db.Model):
__tablename__ = 'legislatures'
id = db.Column(db.Integer, primary_key=True)
numero = db.Column(db.Integer)
date_debut = db.Column(db.Date)
date_fin = db.Column(db.Date)
regime_id = db.Column(db.Integer, db.ForeignKey('regimes.id'))
regime = db.relationship("Regime", back_populates="legislatures")
Regime.legislatures = db.relationship("Legislature", order_by=Legislature.id,
back_populates="regime")
class Organe(db.Model):
__tablename__ = 'organes'
id = db.Column(db.Integer, primary_key=True)
id_an = db.Column(db.String)
type = db.Column(db.String(SHORT_STRING))
libelle = db.Column(db.String(LARGE_STRING))
libelle_court = db.Column(db.String(MEDIUM_STRING))
abbreviation = db.Column(db.String(SHORT_STRING))
date_debut = db.Column(db.Date)
date_fin = db.Column(db.Date)
regime_id = db.Column(db.Integer, db.ForeignKey('regimes.id'))
regime = db.relationship("Regime", back_populates="organes")
legislature_id = db.Column(db.Integer, db.ForeignKey('legislatures.id'))
legislature = db.relationship("Legislature", back_populates="organes")
Regime.organes = db.relationship("Organe", order_by=Organe.id,
back_populates="regime")
Legislature.organes = db.relationship("Organe", order_by=Organe.id,
back_populates="legislature")
# -*- coding: utf-8 -*-
import sys
from .setup_app import setup_app
app = setup_app(__name__)
# -*- coding: utf-8 -*-
from flask import redirect
def setup_routes(app):
@app.errorhandler(404)
def error404(e):
return redirect('/')
@app.route('/')
def hello():
return 'Hi!'
# -*- coding: utf-8 -*-
from flask import Flask
def setup_app(name):
# Create app
app = Flask(name)
# Load config
app.config.from_object('parlapi.config.CurrentConfig')
# Setup DB
from .models import db
db.init_app(app)
# Setup routes
from .routes import setup_routes
setup_routes(app)
return app
import os
from setuptools import setup
def read(fname):
return open(os.path.join(os.path.dirname(__file__), fname)).read()
setup(
name="parlapi",
version="0.0.1",
author="Nicolas Joyard",
author_email="joyard.nicolas@gmail.com",
description=("A browsable API over French parliament data dumps"),
license="MIT",
keywords="django politics open data france french parliament senat"
"assemblee nationale",
url="https://github.com/njoyard/parlapi",
packages=['parlapi'],
long_description=read('README.md'),
install_requires=[
'beautifulsoup4>=4.4,<5',
'click>=6.6,<7',
'flask>=0.11,<0.12',
'flask-sqlalchemy>=2.1,<3',
'html5lib>=0.9999999,<1',
'ijson>=2.3,<3',
'requests>=2.10,<3'
],
classifiers=[
"Development Status :: 3 - Alpha",
"Framework :: Flask",
"Intended Audience :: Developers",
"License :: OSI Approved :: MIT License",
],
entry_points='''
[console_scripts]
parlapi=parlapi.cli:cli
'''
)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or sign in to comment