Skip to content
15 changes: 2 additions & 13 deletions augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,8 +112,6 @@
repo = repo = get_repo_by_repo_git(repo_git)
repo_id = repo.repo_id

start_date = facade_helper.get_setting('start_date')

logger.info(f"Generating sequence for repo {repo_id}")

repo = get_repo_by_repo_git(repo_git)
Expand All @@ -123,7 +121,7 @@
repo_loc = (f"{absolute_path}/.git")
# Grab the parents of HEAD

parent_commits = get_parent_commits_set(repo_loc, start_date)
parent_commits = get_parent_commits_set(repo_loc)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Per @MoralCode 's prior comment; The analyze commits in parallel method would go from the last time facade ran, but this doesn't really get used. RIght now it gets the parent commits first, and gets the existing commits in the database already, and only goes through the commit hashes for commits that are "missing commits" =parent commits - existing commits

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not intended to affect the main contributor analysis process. It is less about 'commits' per se.

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also note that the start_date is never used in the function called with it, which is another reason to simplify/remove it ...


# Grab the existing commits from the database
existing_commits = get_existing_commits_set(repo_id)
Expand Down Expand Up @@ -237,8 +235,6 @@
repo = get_repo_by_repo_git(repo_git)
repo_id = repo.repo_id

start_date = facade_helper.get_setting('start_date')

logger.info(f"Generating sequence for repo {repo_id}")

repo = get_repo_by_repo_id(repo_id)
Expand All @@ -248,7 +244,7 @@
repo_loc = (f"{absolute_path}/.git")
# Grab the parents of HEAD

parent_commits = get_parent_commits_set(repo_loc, start_date)
parent_commits = get_parent_commits_set(repo_loc)

# Grab the existing commits from the database
existing_commits = get_existing_commits_set(repo_id)
Expand All @@ -259,7 +255,7 @@
facade_helper.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}")


if not len(missing_commits) or repo_id is None:

Check warning on line 258 in augur/tasks/git/facade_tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len) Raw Output: augur/tasks/git/facade_tasks.py:258:7: C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len)
#session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits))
return

Expand Down Expand Up @@ -438,11 +434,6 @@

analysis_sequence = []

#repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git)
#repos = fetchall_data_from_sql_text(repo_list)

start_date = facade_helper.get_setting('start_date')

#repo_ids = [repo['repo_id'] for repo in repos]

#repo_id = repo_ids.pop(0)
Expand Down Expand Up @@ -473,8 +464,6 @@
#repo_list = s.sql.text("""SELECT repo_id,repo_group_id,repo_path,repo_name FROM repo WHERE repo_git=:value""").bindparams(value=repo_git)
#repos = fetchall_data_from_sql_text(repo_list)

start_date = facade_helper.get_setting('start_date')

#repo_ids = [repo['repo_id'] for repo in repos]

#repo_id = repo_ids.pop(0)
Expand Down
11 changes: 11 additions & 0 deletions augur/tasks/git/util/facade_worker/facade_worker/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,17 @@ def insert_or_update_data(self, query, **bind_args)-> None:
return
def inc_repos_processed(self):
self.repos_processed += 1

def get_last_collected_commit_date(self,repo_id):
Comment thread
IsaacMilarky marked this conversation as resolved.
Outdated
commit_date_query = s.sql.text("""
SELECT cmt_committer_timestamp FROM commits
WHERE repo_id=:repo_id
ORDER BY data_collection_date DESC
LIMIT 1;
""").bindparams(repo_id=repo_id)

result = execute_sql(commit_date_query).fetchone()
return result[0]

"""
class FacadeConfig:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,10 @@ def get_absolute_repo_path(repo_base_dir, repo_id, repo_path,repo_name):

return f"{repo_base_dir}{repo_id}-{repo_path}/{repo_name}"

def get_parent_commits_set(absolute_repo_path, start_date):
def get_parent_commits_set(absolute_repo_path):

parents = subprocess.Popen(["git --git-dir %s log --ignore-missing "
"--pretty=format:'%%H' --since=%s" % (absolute_repo_path,start_date)],
"--pretty=format:'%%H'" % (absolute_repo_path)],
stdout=subprocess.PIPE, shell=True)

parent_commits = set(parents.stdout.read().decode("utf-8",errors="ignore").split(os.linesep))
Expand Down
13 changes: 10 additions & 3 deletions augur/tasks/github/facade_github/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask
from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException
from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth
from augur.application.db.models import Contributor

Check warning on line 8 in augur/tasks/github/facade_github/tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0611: Unused Contributor imported from augur.application.db.models (unused-import) Raw Output: augur/tasks/github/facade_github/tasks.py:8:0: W0611: Unused Contributor imported from augur.application.db.models (unused-import)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[pylint] reported by reviewdog 🐶
W0611: Unused Contributor imported from augur.application.db.models (unused-import)

from augur.tasks.github.facade_github.core import *
from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors
from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import *
Expand Down Expand Up @@ -134,7 +134,7 @@
insert_alias(logger, cntrb, emailFromCommitData)
except LookupError as e:
logger.error(
''.join(traceback.format_exception(None, e, e.__traceback__)))

Check warning on line 137 in augur/tasks/github/facade_github/tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E0602: Undefined variable 'traceback' (undefined-variable) Raw Output: augur/tasks/github/facade_github/tasks.py:137:24: E0602: Undefined variable 'traceback' (undefined-variable)

Check warning on line 137 in augur/tasks/github/facade_github/tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 E1101: Class 'traceback' has no 'format_exception' member (no-member) Raw Output: augur/tasks/github/facade_github/tasks.py:137:24: E1101: Class 'traceback' has no 'format_exception' member (no-member)
logger.error(
f"Contributor id not able to be found in database despite the user_id existing. Something very wrong is happening. Error: {e}")
return
Expand Down Expand Up @@ -198,6 +198,10 @@
logger = logging.getLogger(insert_facade_contributors.__name__)
repo = get_repo_by_repo_git(repo_git)
repo_id = repo.repo_id
facade_helper = FacadeHelper(logger)

collection_status = repo.collection_status[0]
last_collected_date = collection_status.facade_data_last_collected

# Get all of the commit data's emails and names from the commit table that do not appear
# in the contributors table or the contributors_aliases table.
Expand All @@ -214,6 +218,7 @@
commits
WHERE
commits.repo_id = :repo_id
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
AND (NOT EXISTS ( SELECT contributors.cntrb_canonical FROM contributors WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email )
or NOT EXISTS ( SELECT contributors_aliases.alias_email from contributors_aliases where contributors_aliases.alias_email = commits.cmt_author_raw_email)
AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name ))
Expand All @@ -231,6 +236,7 @@
commits
WHERE
commits.repo_id = :repo_id
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
Comment thread
IsaacMilarky marked this conversation as resolved.
AND EXISTS ( SELECT unresolved_commit_emails.email FROM unresolved_commit_emails WHERE unresolved_commit_emails.email = commits.cmt_author_raw_email )
AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name )
GROUP BY
Expand All @@ -239,7 +245,7 @@
commits.cmt_author_raw_email
ORDER BY
hash
""").bindparams(repo_id=repo_id)
""").bindparams(repo_id=repo_id,since_date=last_collected_date)
Comment thread
IsaacMilarky marked this conversation as resolved.

#Execute statement with session.
result = execute_sql(new_contrib_sql)
Expand All @@ -257,7 +263,6 @@

logger.debug("DEBUG: Got through the new_contribs")

facade_helper = FacadeHelper(logger)
# sql query used to find corresponding cntrb_id's of emails found in the contributor's table
# i.e., if a contributor already exists, we use it!
resolve_email_to_cntrb_id_sql = s.sql.text("""
Expand All @@ -271,6 +276,7 @@
commits
WHERE
contributors.cntrb_canonical = commits.cmt_author_raw_email
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
Comment thread
IsaacMilarky marked this conversation as resolved.
AND commits.repo_id = :repo_id
UNION
SELECT DISTINCT
Expand All @@ -286,7 +292,8 @@
contributors_aliases.alias_email = commits.cmt_author_raw_email
AND contributors.cntrb_id = contributors_aliases.cntrb_id
AND commits.repo_id = :repo_id
""").bindparams(repo_id=repo_id)
AND (:since_date is NULL OR commits.data_collection_date > :since_date)
""").bindparams(repo_id=repo_id,since_date=last_collected_date)
Comment thread
IsaacMilarky marked this conversation as resolved.


result = execute_sql(resolve_email_to_cntrb_id_sql)
Expand Down
Loading