-
Notifications
You must be signed in to change notification settings - Fork 992
add date filter to contributer resolution logic queries #3253
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
8539825
fe85b3f
0df6bdd
6a6c760
267f95e
9ede8af
f41e8f8
c2e87ff
8ae5002
a87c6a0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,7 @@ | |
| from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask | ||
| from augur.tasks.github.util.github_data_access import GithubDataAccess, UrlNotFoundException | ||
| from augur.tasks.github.util.github_random_key_auth import GithubRandomKeyAuth | ||
| from augur.application.db.models import Contributor | ||
|
Check warning on line 8 in augur/tasks/github/facade_github/tasks.py
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [pylint] reported by reviewdog 🐶 |
||
| from augur.tasks.github.facade_github.core import * | ||
| from augur.application.db.lib import execute_sql, get_contributor_aliases_by_email, get_unresolved_commit_emails_by_name, get_contributors_by_full_name, get_repo_by_repo_git, batch_insert_contributors | ||
| from augur.tasks.git.util.facade_worker.facade_worker.facade00mainprogram import * | ||
|
|
@@ -134,7 +134,7 @@ | |
| insert_alias(logger, cntrb, emailFromCommitData) | ||
| except LookupError as e: | ||
| logger.error( | ||
| ''.join(traceback.format_exception(None, e, e.__traceback__))) | ||
|
Check warning on line 137 in augur/tasks/github/facade_github/tasks.py
|
||
| logger.error( | ||
| f"Contributor id not able to be found in database despite the user_id existing. Something very wrong is happening. Error: {e}") | ||
| return | ||
|
|
@@ -198,6 +198,10 @@ | |
| logger = logging.getLogger(insert_facade_contributors.__name__) | ||
| repo = get_repo_by_repo_git(repo_git) | ||
| repo_id = repo.repo_id | ||
| facade_helper = FacadeHelper(logger) | ||
|
|
||
| collection_status = repo.collection_status[0] | ||
| last_collected_date = collection_status.facade_data_last_collected | ||
|
|
||
| # Get all of the commit data's emails and names from the commit table that do not appear | ||
| # in the contributors table or the contributors_aliases table. | ||
|
|
@@ -214,6 +218,7 @@ | |
| commits | ||
| WHERE | ||
| commits.repo_id = :repo_id | ||
| AND (:since_date is NULL OR commits.data_collection_date > :since_date) | ||
| AND (NOT EXISTS ( SELECT contributors.cntrb_canonical FROM contributors WHERE contributors.cntrb_canonical = commits.cmt_author_raw_email ) | ||
| or NOT EXISTS ( SELECT contributors_aliases.alias_email from contributors_aliases where contributors_aliases.alias_email = commits.cmt_author_raw_email) | ||
| AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name )) | ||
|
|
@@ -231,6 +236,7 @@ | |
| commits | ||
| WHERE | ||
| commits.repo_id = :repo_id | ||
| AND (:since_date is NULL OR commits.data_collection_date > :since_date) | ||
|
IsaacMilarky marked this conversation as resolved.
|
||
| AND EXISTS ( SELECT unresolved_commit_emails.email FROM unresolved_commit_emails WHERE unresolved_commit_emails.email = commits.cmt_author_raw_email ) | ||
| AND ( commits.cmt_author_name ) IN ( SELECT C.cmt_author_name FROM commits AS C WHERE C.repo_id = :repo_id GROUP BY C.cmt_author_name ) | ||
| GROUP BY | ||
|
|
@@ -239,7 +245,7 @@ | |
| commits.cmt_author_raw_email | ||
| ORDER BY | ||
| hash | ||
| """).bindparams(repo_id=repo_id) | ||
| """).bindparams(repo_id=repo_id,since_date=last_collected_date) | ||
|
IsaacMilarky marked this conversation as resolved.
|
||
|
|
||
| #Execute statement with session. | ||
| result = execute_sql(new_contrib_sql) | ||
|
|
@@ -257,7 +263,6 @@ | |
|
|
||
| logger.debug("DEBUG: Got through the new_contribs") | ||
|
|
||
| facade_helper = FacadeHelper(logger) | ||
| # sql query used to find corresponding cntrb_id's of emails found in the contributor's table | ||
| # i.e., if a contributor already exists, we use it! | ||
| resolve_email_to_cntrb_id_sql = s.sql.text(""" | ||
|
|
@@ -271,6 +276,7 @@ | |
| commits | ||
| WHERE | ||
| contributors.cntrb_canonical = commits.cmt_author_raw_email | ||
| AND (:since_date is NULL OR commits.data_collection_date > :since_date) | ||
|
IsaacMilarky marked this conversation as resolved.
|
||
| AND commits.repo_id = :repo_id | ||
| UNION | ||
| SELECT DISTINCT | ||
|
|
@@ -286,7 +292,8 @@ | |
| contributors_aliases.alias_email = commits.cmt_author_raw_email | ||
| AND contributors.cntrb_id = contributors_aliases.cntrb_id | ||
| AND commits.repo_id = :repo_id | ||
| """).bindparams(repo_id=repo_id) | ||
| AND (:since_date is NULL OR commits.data_collection_date > :since_date) | ||
| """).bindparams(repo_id=repo_id,since_date=last_collected_date) | ||
|
IsaacMilarky marked this conversation as resolved.
|
||
|
|
||
|
|
||
| result = execute_sql(resolve_email_to_cntrb_id_sql) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Per @MoralCode 's prior comment; The analyze commits in parallel method would go from the last time facade ran, but this doesn't really get used. RIght now it gets the parent commits first, and gets the existing commits in the database already, and only goes through the commit hashes for commits that are "missing commits" =
parent commits - existing commitsThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is not intended to affect the main contributor analysis process. It is less about 'commits' per se.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Also note that the
start_dateis never used in the function called with it, which is another reason to simplify/remove it ...