-
Notifications
You must be signed in to change notification settings - Fork 10
Expand file tree
/
Copy pathtest_constructors.py
More file actions
280 lines (191 loc) · 7.65 KB
/
test_constructors.py
File metadata and controls
280 lines (191 loc) · 7.65 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
import os
import pandas as pd
import pytest
from pandas.testing import assert_frame_equal
from pathlib import Path
from pins import constructors as c
from pins.tests.conftest import (
PATH_TO_EXAMPLE_BOARD,
PATH_TO_EXAMPLE_VERSION,
EXAMPLE_REL_PATH,
)
from pins.tests.helpers import rm_env
@pytest.fixture
def df_csv():
return pd.read_csv(PATH_TO_EXAMPLE_VERSION / "df_csv.csv")
def check_dir_writable(p_dir):
assert p_dir.parent.exists()
assert os.access(p_dir.parent.absolute(), os.W_OK)
def check_cache_file_path(p_file, p_cache):
rel_path = p_file.relative_to(p_cache)
# parents has every entry you'd get if you called .parents all the way to some root.
# for a relative path, the root is likely ".", so we subtract 1 to get the number
# of parent directories.
# note this essentially counts slashes, in a inter-OS friendly way.
n_parents = len(rel_path.parents) - 1
assert n_parents == 2
def construct_from_board(board):
prot = board.fs.protocol
fs_name = prot if isinstance(prot, str) else prot[0]
if fs_name == "file":
board = c.board_folder(board.board)
elif fs_name == "rsc":
board = c.board_rsconnect(
server_url=board.fs.api.server_url, api_key=board.fs.api.api_key
)
elif fs_name == "abfs":
board = c.board_azure(board.board)
else:
board = getattr(c, f"board_{fs_name}")(board.board)
return board
# End-to-end constructor tests
# there are two facets of boards: reading and writing.
# copied from test_compat
@pytest.mark.skip_on_github
def test_constructor_board_url_data(tmp_cache, http_example_board_path, df_csv):
board = c.board_urls(
http_example_board_path,
# could derive from example version path
pin_paths={"df_csv": "df_csv/20220214T163720Z-9bfad/"},
)
df = board.pin_read("df_csv")
# check data ----
assert_frame_equal(df, df_csv)
@pytest.mark.xfail
@pytest.mark.skip_on_github
def test_constructor_board_url_cache(tmp_cache, http_example_board_path, df_csv):
# TODO: downloading a pin does not put files in the same directory, since
# in this case we are hashing on the full url.
board = c.board_urls(
http_example_board_path,
# could derive from example version path
pin_paths={"df_csv": "df_csv/20220214T163718Z-eceac/"},
)
board.pin_read("df_csv")
# check cache ----
http_dirs = list(tmp_cache.glob("http_*"))
assert len(http_dirs) == 1
# there are two files in the flat cache (metadata, and the csv)
parent = http_dirs[0]
res = list(parent.rglob("*"))
assert len(res) == 2
# validate that it creates an empty metadata file
assert len(x for x in res if x.endswith("df_csv.csv")) == 1
assert len(x for x in res if x.endswith("data.txt")) == 1
assert len(list(parent.glob("**/*"))) == 2
@pytest.mark.skip_on_github
def test_constructor_board_url_file(tmp_cache, http_example_board_path):
# TODO: downloading a pin does not put files in the same directory, since
# in this case we are hashing on the full url.
board = c.board_urls(
http_example_board_path,
# could derive from example version path
pin_paths={"df_csv": "df_csv/20220214T163718Z-eceac/df_csv.csv"},
)
board.pin_download("df_csv")
# check cache ----
http_dirs = list(tmp_cache.glob("http_*"))
assert len(http_dirs) == 1
# there are two files in the flat cache (metadata, and the csv)
parent = http_dirs[0]
res = list(parent.rglob("*"))
assert len(res) == 1
assert str(res[0]).endswith("df_csv.csv")
new_board = eval(c.board_deparse(board), c.__dict__)
assert new_board.pin_list() == board.pin_list()
@pytest.mark.skip_on_github
def test_constructor_board_github(tmp_cache, http_example_board_path, df_csv):
board = c.board_github("machow", "pins-python", EXAMPLE_REL_PATH) # noqa
df = board.pin_read("df_csv")
assert_frame_equal(df, df_csv)
cache_options = list(tmp_cache.glob("github_*"))
assert len(cache_options) == 1
cache_dir = cache_options[0]
res = list(cache_dir.rglob("**/*.csv"))
assert len(res) == 1
check_cache_file_path(res[0], cache_dir)
@pytest.fixture(scope="function")
def board(backend):
# TODO: copied from test_compat.py
board = backend.create_tmp_board(str(PATH_TO_EXAMPLE_BOARD.absolute()))
yield board
backend.teardown_board(board)
def test_constructor_boards(board, df_csv, tmp_cache):
# TODO: would be nice to have fixtures for each board constructor
# doesn't need to copy over pins-compat content
# create board from constructor -------------------------------------------
board = construct_from_board(board)
# read a pin and check its contents ---------------------------------------
df = board.pin_read("df_csv")
# check data
assert_frame_equal(df, df_csv)
# check the cache structure -----------------------------------------------
# check cache
if board.fs.protocol == "file":
# no caching for local file boards
pass
else:
# check path structure ----
options = list(tmp_cache.glob("*"))
assert len(options) == 1
cache_dir = options[0]
res = list(cache_dir.rglob("*/*.csv"))
assert len(res) == 1
check_cache_file_path(res[0], cache_dir)
# check cache touch on access time ----
meta = board.pin_meta("df_csv")
p_cache_meta = (
Path(board._get_cache_path(meta.name, meta.version.version)) / "data.txt"
)
orig_access = p_cache_meta.stat().st_atime
board.pin_meta("df_csv")
new_access = p_cache_meta.stat().st_atime
assert orig_access < new_access
@pytest.fixture(scope="function")
def board2(backend):
board2 = backend.create_tmp_board()
yield board2
backend.teardown_board(board2)
def test_constructor_boards_multi_user(board2, df_csv, tmp_cache):
prot = board2.fs.protocol
fs_name = prot if isinstance(prot, str) else prot[0]
if fs_name == "rsc":
# TODO: RSConnect writes pin names like <user>/<name>, so would need to
# modify test
pytest.skip()
elif fs_name == "abfs":
fs_name = "azure"
first = construct_from_board(board2)
first.pin_write(df_csv, "df_csv", type="csv")
assert first.pin_list() == ["df_csv"]
second = construct_from_board(board2)
second.pin_write(df_csv, "another_df_csv", type="csv")
assert sorted(second.pin_list()) == sorted(["df_csv", "another_df_csv"])
# Board particulars ===========================================================
@pytest.mark.skip_on_github
def test_board_constructor_local_default_writable():
with rm_env("PINS_DATA_DIR"):
board = c.board_local()
p_board = Path(board.board)
check_dir_writable(p_board)
assert p_board.name == "pins-py"
def test_board_constructor_temp_writable():
with rm_env("PINS_DATA_DIR"):
board = c.board_temp()
p_board = Path(board.board)
check_dir_writable(p_board)
assert len(list(p_board.glob("*"))) == 0
def test_board_constructor_folder(tmp_dir2, df):
board = c.board_folder(str(tmp_dir2))
board.pin_write(df, "some_df", type="csv")
assert (tmp_dir2 / "some_df").exists()
df2 = board.pin_read("some_df")
assert df.equals(df2)
# Deparsing ===================================================================
def test_board_deparse(board):
prot = board.fs.protocol
with rm_env("CONNECT_API_KEY"):
if prot == "rsc":
os.environ["CONNECT_API_KEY"] = board.fs.api.api_key
new_board = eval(c.board_deparse(board), c.__dict__)
new_board.pin_list()