HOME/Articles/

mysql example DegreesOfSep (snippet)

Article Outline

Python mysql example 'DegreesOfSep'

Functions in program:

  • def getBackwardBuffer():
  • def getForwardBuffer(level):
  • def isFirstConnection(n1, n2):
  • def isFirstConnectionByPerson(comp1, comp2):
  • def load(varName):
  • def save(var, varName):
  • def toc():
  • def tic():

Modules used in program:

  • import mysql.connector
  • import time
  • import json

python DegreesOfSep

Python mysql example: DegreesOfSep

#!/usr/bin/python3
import json
import time

import mysql.connector

def tic():
    tic.start = time.time()
def toc():
    print('Time elapsed:', round(time.time() - tic.start, 3),'s\n')
    return (time.time() - tic.start)

def save(var, varName):
    with open(varName+'.json','w') as saveFile:
        json.dump(var, saveFile)

def load(varName):
    with open(varName+'.json') as loadFile:
        result = json.load(loadFile)
    return result

tic()

conn = mysql.connector.connect(host = "localhost",user="root",passwd="root", db="mysql")
cursor = conn.cursor()

# print('Creating Competitor Table...')
# cursor.execute('DROP TABLE Competitors;')
# cursor.execute('CREATE TABLE Competitors AS SELECT DISTINCT PersonId, CompetitionId FROM Results;')
# toc()

print('Finding competitions...')
cursor.execute('SELECT DISTINCT CompetitionId FROM Competitors;')
compsList = [c[0] for c in cursor.fetchall()]
save(compsList, 'compsList')
toc()

print('Fetching Competitor Attendance...')
counter = 0

try:
    compRegCount = load('compRegCount')
    compReg = load('compReg')
except FileNotFoundError:
    compRegCount = {}
    compReg = {}

for comp in compsList:
    if comp not in compReg.keys():
        cursor.execute('SELECT PersonId FROM Competitors WHERE CompetitionId=%s;', (comp,))
        compReg[comp] = [p[0] for p in cursor.fetchall()]
        compRegCount[comp] = len(compReg[comp])
    counter += 1
    if not counter%1000:
        print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList), counter/len(compsList)*100))
toc()

print('Saving...')
save(compReg, 'compReg')
save(compRegCount, 'compRegCount')
toc()


compsAttended = {}
counter = 0
print('Fetching Competitions per Person...')
for comp in compsList:
    for person in compReg[comp]:
        if not person in compsAttended.keys():
            compsAttended[person] = []
        compsAttended[person].append(comp)

    counter += 1
    if not counter%1000:
        print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList), counter/len(compsList)*100))
save(compsAttended, 'compsAttended')
toc()


def isFirstConnectionByPerson(comp1, comp2):
    for person in compReg[comp1]:
        if person in compReg[comp2]:
            return True
    return False



def isFirstConnection(n1, n2):
    return n2 in connections[n1]


print('Finding Connections...')
try:
    connections = load('connections')
except FileNotFoundError:
    connections = [[] for _ in compsList]
    counter = 1
    for i in range(len(compsList)-1):
        for j in range(i+1, len(compsList)):
            if isFirstConnection(compsList[i], compsList[j]):
                connections[i].append(j)
                connections[j].append(i)
            counter += 1
            if not counter%50000:
                print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*len(compsList)/2, counter/(len(compsList)*len(compsList)/2)*100))
save(connections, 'connections')
nConnections = [len(c) for c in connections]
toc()


# print('Checking first degree connectedness...')
# try:
#     firstdegrees = load('firstdegrees')
# except FileNotFoundError:
#     connections = load('connections')
#     counter = 0
#     firstdegrees = [[0 for _ in range(len(compsList))] for _ in range(len(compsList))]
#     for i in range(len(compsList)-1):
#         for j in range(i+1, len(compsList)):
#             if isFirstConnection(i, j):
#                 firstdegrees[i][j] = 1
#                 firstdegrees[j][i] = 1
#             counter += 1
#             if not counter%200000:
#                 print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*(len(compsList)-1)/2, counter/(len(compsList)*(len(compsList)-1)/2)*100))
# save(firstdegrees, 'firstdegrees')
# toc()
# 
#         
# print('{:.2f} % first degree connections.'.format(sum([sum(d) for d in firstdegrees])/(len(compsList)*(len(compsList)-1))*100))
# 
# 
# print('Checking full connectedness...')
# counter = 0
# degrees = [[1 if firstdegrees[i][j] else 0 for i in range(len(compsList))] for j in range(len(compsList))]
# 
# 
# def connectionDegree(n1, n2):
#     degree = 0
#     buffer = [n1]
#     connectionFound = False
#     while not connectionFound:
#         nextBuffer = []
#         for b in buffer:
#             if degrees[b][n2] > 0:
#                 return degree + degrees[b][n2]
#             else:
#                 nextBuffer += connections[b]
#         degree += 1
#         buffer = list(set(nextBuffer))
#         if degree > 4:
#             raise Exception('Degree greater than max allowed: ({}, {})'.format(n1,n2))
# 
# 
# try:
#     degrees = load('degrees')
# except FileNotFoundError:
#     for i in range(len(compsList)-1):
#         for j in range(i+1, len(compsList)):
#             thisDegree = connectionDegree(i,j)
#             degrees[i][j] = thisDegree
#             degrees[j][i] = thisDegree        
#             counter += 1
#             if not counter%50000:
#                 print('{:.0f}/{:.0f} ({:.2f} %)'.format(counter, len(compsList)*(len(compsList)-1)/2, counter/(len(compsList)*(len(compsList)-1)/2)*100))
# toc()
# save(degrees, 'degrees')


print('Filling distance table...')
distances = [[0 if i==j else -1 for i in range(len(compsList))] for j in range(len(compsList))]

def getForwardBuffer(level):
    return [(i,j) for i in range(len(compsList)) for j in range(i,len(compsList)) if distances[i][j] == level]

def getBackwardBuffer():
    return [(i,j) for i in range(len(compsList)) for j in range(i,len(compsList)) if distances[i][j] == -1]


filled = 0
level = 0
total = len(compsList)*len(compsList)
while level<10:
    if filled < total*0.33:
        buffer = getForwardBuffer(level)
        print('Level: {:.0f}, buffer length: {:.0f}. Forward search'.format(level, len(buffer)))
        counter = 0
        toc()
        for i,j in buffer:
            for k in connections[i]:
                if distances[j][k] == -1:
                    distances[j][k] = level+1
                    distances[k][j] = level+1
                    filled += 1
            for k in connections[j]:
                if distances[i][k] == -1:
                    distances[i][k] = level+1
                    distances[k][i] = level+1
                    filled += 1
            counter += 1
            if not counter%20000:
                print('filled: {:.0f}/{:.0f}\tlevel: {:.0f}\t({:.2f} % total, {:.2f} % of level)'.format(filled, total, level, filled/total*100, counter/len(buffer)*100))
    else:
        buffer = getBackwardBuffer()
        print('Level: {:.0f}, buffer length: {:.0f}. Backward search'.format(level, len(buffer)))
        counter = 0
        for i,j in buffer:
            for k in connections[i]:
                if distances[j][k] == level and distances[i][j] == -1:
                    distances[i][j] = level+1
                    distances[j][i] = level+1
                    filled += 1
            for k in connections[j]:
                if distances[i][k] == level and distances[i][j] == -1:
                    distances[i][j] = level+1
                    distances[j][i] = level+1
                    filled += 1
            counter += 1
            if not counter%20000:
                print('filled: {:.0f}/{:.0f}\tlevel: {:.0f}\t({:.2f} % total, {:.2f} % of level)'.format(filled, total, level, filled/total*100, counter/len(buffer)*100))
    level += 1
toc()  

degrees = distances
save(degrees, 'degrees')

for x in range(15):
    compCount = 0
    for i in range(len(degrees)):
        for j in range(len(degrees[i])):
            if distances[i][j] == x:
                compCount += 1
    print('By competition: Distance: {:3.0f}, Count: {:8.0f}, ({:6.2f} %)'.format(x, compCount, compCount/(len(compsList)*(len(compsList)-1))*100))


print('Maximum degree of separation: {}'.format(max([max(d) for d in degrees])))

conn.close()