Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
411684c
remove flake8 config from setup.cfg
menshikh-iv Nov 16, 2017
4a8825b
create distinct test_env for win
menshikh-iv Nov 16, 2017
dc9adc7
ignore stuff from tox
menshikh-iv Nov 16, 2017
73f9654
basic tox config
menshikh-iv Nov 16, 2017
8658974
add global env vars for full test run
menshikh-iv Nov 16, 2017
82c41a7
force-recreate for envs
menshikh-iv Nov 16, 2017
4cd4da1
show top20 slowest tests
menshikh-iv Nov 16, 2017
388c912
add upload/download wheels/docs
menshikh-iv Nov 16, 2017
53ffe25
fix E501 [1]
menshikh-iv Nov 16, 2017
9bcc7f2
fix E501 [2]
menshikh-iv Nov 17, 2017
1644039
fix E501 [3]
menshikh-iv Nov 17, 2017
a451e32
fix E501 [4]
menshikh-iv Nov 17, 2017
833488b
fix E501 [5]
menshikh-iv Nov 17, 2017
4f885ca
fix E501 [6]
menshikh-iv Nov 17, 2017
1d54741
travis + tox
menshikh-iv Nov 17, 2017
0cdcd0d
Install tox for travis
menshikh-iv Nov 17, 2017
cef285e
simplify travis file
menshikh-iv Nov 17, 2017
478cf9c
more verbosity with tox
menshikh-iv Nov 17, 2017
f1f49b7
Fix numpy scipy versions
menshikh-iv Nov 17, 2017
a69d8f7
Try to avoid pip install hang
menshikh-iv Nov 17, 2017
40ec317
Fix tox
menshikh-iv Nov 17, 2017
7016084
Add build_ext
menshikh-iv Nov 17, 2017
377c35f
Fix dtm test
menshikh-iv Nov 17, 2017
74713d8
remove install/run sh
menshikh-iv Nov 17, 2017
a7e657d
Fix imports & indentation
menshikh-iv Nov 17, 2017
b2d89f8
remove flake-diff
menshikh-iv Nov 17, 2017
a2cc05d
Add docs building to Travis
menshikh-iv Nov 17, 2017
343f5ed
join flake8 and docs to one job
menshikh-iv Nov 17, 2017
7196836
add re-run for failed tests (to avoid FP) + calculate code coverage
menshikh-iv Nov 18, 2017
4c841db
fix WR segfault (veeeery buggy implementation)
menshikh-iv Nov 18, 2017
6530208
attempt to make multiOS configuration
menshikh-iv Nov 20, 2017
41fe537
Merge branch 'develop' into improve-test-env
menshikh-iv Nov 20, 2017
ce50fb4
fix mistake with cython
menshikh-iv Nov 20, 2017
cd6d06b
Try to fix appveyor wheels problem
menshikh-iv Nov 20, 2017
4896882
Remove commented parts & add cache for travis
menshikh-iv Nov 20, 2017
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ Thumbs.db

# Other #
#########
.tox/
.cache/
.project
.pydevproject
.ropeproject
Expand Down
20 changes: 13 additions & 7 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,18 +5,24 @@ cache:
directories:
- $HOME/.cache/pip
- $HOME/.ccache

- $HOME/.pip-cache
dist: trusty
language: python


matrix:
include:
- env: PYTHON_VERSION="2.7" NUMPY_VERSION="1.11.3" SCIPY_VERSION="0.18.1" ONLY_CODESTYLE="yes"
- env: PYTHON_VERSION="2.7" NUMPY_VERSION="1.11.3" SCIPY_VERSION="0.18.1" ONLY_CODESTYLE="no"
- env: PYTHON_VERSION="3.5" NUMPY_VERSION="1.11.3" SCIPY_VERSION="0.18.1" ONLY_CODESTYLE="no"
- env: PYTHON_VERSION="3.6" NUMPY_VERSION="1.11.3" SCIPY_VERSION="0.18.1" ONLY_CODESTYLE="no"
- python: '2.7'
env: TOXENV="flake8, docs"

- python: '2.7'
env: TOXENV="py27-linux"

- python: '3.5'
env: TOXENV="py35-linux"

- python: '3.6'
env: TOXENV="py36-linux"

install: source continuous_integration/travis/install.sh
script: bash continuous_integration/travis/run.sh
install: pip install tox
script: tox -vv
51 changes: 5 additions & 46 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,29 +13,20 @@ environment:
secure: qXqY3dFmLOqvxa3Om2gQi/BjotTOK+EP2IPLolBNo0c61yDtNWxbmE4wH3up72Be

matrix:
# - PYTHON: "C:\\Python27"
# PYTHON_VERSION: "2.7.12"
# PYTHON_ARCH: "32"

- PYTHON: "C:\\Python27-x64"
PYTHON_VERSION: "2.7.12"
PYTHON_ARCH: "64"

# - PYTHON: "C:\\Python35"
# PYTHON_VERSION: "3.5.2"
# PYTHON_ARCH: "32"
TOXENV: "py27-win"

- PYTHON: "C:\\Python35-x64"
PYTHON_VERSION: "3.5.2"
PYTHON_ARCH: "64"

# - PYTHON: "C:\\Python36"
# PYTHON_VERSION: "3.6.0"
# PYTHON_ARCH: "32"
TOXENV: "py35-win"

- PYTHON: "C:\\Python36-x64"
PYTHON_VERSION: "3.6.0"
PYTHON_ARCH: "64"
TOXENV: "py36-win"

init:
- "ECHO %PYTHON% %PYTHON_VERSION% %PYTHON_ARCH%"
Expand All @@ -57,48 +48,16 @@ install:
# not already installed.
- "powershell ./continuous_integration/appveyor/install.ps1"
- "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%"
- "python -m pip install -U pip"
- "python -m pip install -U pip tox"

# Check that we have the expected version and architecture for Python
- "python --version"
- "python -c \"import struct; print(struct.calcsize('P') * 8)\""

# Install the build and runtime dependencies of the project.
- "%CMD_IN_ENV% pip install --timeout=60 --trusted-host 28daf2247a33ed269873-7b1aad3fab3cc330e1fd9d109892382a.r6.cf2.rackcdn.com -r continuous_integration/appveyor/requirements.txt"
- "%CMD_IN_ENV% python setup.py bdist_wheel bdist_wininst"
- ps: "ls dist"

# Install the genreated wheel package to test it
- "pip install --pre --no-index --find-links dist/ gensim"

# Not a .NET project, we build scikit-learn in the install step instead
build: false

test_script:
# Change to a non-source folder to make sure we run the tests on the
# installed library.
- "mkdir empty_folder"
- "cd empty_folder"
- "pip install pyemd testfixtures sklearn Morfessor==2.0.2a4"
- "pip freeze"
- "python -c \"import nose; nose.main()\" -s -v gensim"
# Move back to the project folder
- "cd .."

artifacts:
# Archive the generated wheel package in the ci.appveyor.com build report.
- path: dist\*
on_success:
# Upload the generated wheel package to Rackspace
# On Windows, Apache Libcloud cannot find a standard CA cert bundle so we
# disable the ssl checks.
- "python -m wheelhouse_uploader upload --no-ssl-check --local-folder=dist gensim-windows-wheels"

notifications:
- provider: Webhook
url: https://webhooks.gitter.im/e/62c44ad26933cd7ed7e8
on_build_success: false
on_build_failure: True
- tox -vv

cache:
# Use the appveyor cache to avoid re-downloading large archives such
Expand Down
159 changes: 0 additions & 159 deletions continuous_integration/travis/flake8_diff.sh

This file was deleted.

13 changes: 0 additions & 13 deletions continuous_integration/travis/install.sh

This file was deleted.

11 changes: 0 additions & 11 deletions continuous_integration/travis/run.sh

This file was deleted.

7 changes: 5 additions & 2 deletions gensim/corpora/indexedcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ def __init__(self, fname, index_fname=None):
self.length = None

@classmethod
def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progress_cnt=None, labels=None, metadata=False):
def serialize(serializer, fname, corpus, id2word=None, index_fname=None,
progress_cnt=None, labels=None, metadata=False):
"""
Iterate through the document stream `corpus`, saving the documents to `fname`
and recording byte offset of each document. Save the resulting index
Expand Down Expand Up @@ -93,7 +94,9 @@ def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progres
offsets = serializer.save_corpus(fname, corpus, id2word, **kwargs)

if offsets is None:
raise NotImplementedError("Called serialize on class %s which doesn't support indexing!" % serializer.__name__)
raise NotImplementedError(
"Called serialize on class %s which doesn't support indexing!" % serializer.__name__
)

# store offsets persistently, using pickle
# we shouldn't have to worry about self.index being a numpy.ndarray as the serializer will return
Expand Down
3 changes: 2 additions & 1 deletion gensim/corpora/lowcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,8 @@ def __init__(self, fname, id2word=None, line2words=split_on_space):
for doc in self:
all_terms.update(word for word, wordCnt in doc)
all_terms = sorted(all_terms) # sort the list of all words; rank in that list = word's integer id
self.id2word = dict(izip(xrange(len(all_terms)), all_terms)) # build a mapping of word id(int) -> word (string)
# build a mapping of word id(int) -> word (string)
self.id2word = dict(izip(xrange(len(all_terms)), all_terms))
else:
logger.info("using provided word mapping (%i ids)", len(id2word))
self.id2word = id2word
Expand Down
18 changes: 14 additions & 4 deletions gensim/corpora/sharded_corpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,10 @@ def resize_shards(self, shardsize):
for old_shard_n, old_shard_name in enumerate(old_shard_names):
os.remove(old_shard_name)
except Exception as e:
logger.error('Exception occurred during old shard no. %d removal: %s.\nAttempting to at least move new shards in.', old_shard_n, str(e))
logger.error(
'Exception occurred during old shard no. %d removal: %s.\nAttempting to at least move new shards in.',
old_shard_n, str(e)
)
finally:
# If something happens with cleaning up - try to at least get the
# new guys in.
Expand Down Expand Up @@ -673,7 +676,10 @@ def __add_to_slice(self, s_result, result_start, result_stop, start, stop):
Returns the resulting s_result.
"""
if (result_stop - result_start) != (stop - start):
raise ValueError('Result start/stop range different than stop/start range (%d - %d vs. %d - %d)'.format(result_start, result_stop, start, stop))
raise ValueError(
'Result start/stop range different than stop/start range (%d - %d vs. %d - %d)'
.format(result_start, result_stop, start, stop)
)

# Dense data: just copy using numpy's slice notation
if not self.sparse_serialization:
Expand All @@ -685,7 +691,10 @@ def __add_to_slice(self, s_result, result_start, result_stop, start, stop):
# result.
else:
if s_result.shape != (result_start, self.dim):
raise ValueError('Assuption about sparse s_result shape invalid: {0} expected rows, {1} real rows.'.format(result_start, s_result.shape[0]))
raise ValueError(
'Assuption about sparse s_result shape invalid: {0} expected rows, {1} real rows.'
.format(result_start, s_result.shape[0])
)

tmp_matrix = self.current_shard[start:stop]
s_result = sparse.vstack([s_result, tmp_matrix])
Expand Down Expand Up @@ -786,7 +795,8 @@ def save_corpus(fname, corpus, id2word=None, progress_cnt=1000, metadata=False,
ShardedCorpus(fname, corpus, **kwargs)

@classmethod
def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progress_cnt=None, labels=None, metadata=False, **kwargs):
def serialize(serializer, fname, corpus, id2word=None, index_fname=None, progress_cnt=None,
labels=None, metadata=False, **kwargs):
"""
Iterate through the document stream `corpus`, saving the documents
as a ShardedCorpus to `fname`.
Expand Down
3 changes: 2 additions & 1 deletion gensim/corpora/svmlightcorpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ def line2doc(self, line):
if not parts:
raise ValueError('invalid line format in %s' % self.fname)
target, fields = parts[0], [part.rsplit(':', 1) for part in parts[1:]]
doc = [(int(p1) - 1, float(p2)) for p1, p2 in fields if p1 != 'qid'] # ignore 'qid' features, convert 1-based feature ids to 0-based
# ignore 'qid' features, convert 1-based feature ids to 0-based
doc = [(int(p1) - 1, float(p2)) for p1, p2 in fields if p1 != 'qid']
return doc, target

@staticmethod
Expand Down
Loading