Skip to content

helpers

create_current_slug(query_doi, ignore_resources=None)

Creates a slug for the given query DOI at the current version, this is done with a nav slug which has no version.

Parameters:

Name Type Description Default
query_doi QueryDOI

the QueryDOI

required
ignore_resources

a list of resource IDs to ignore

None

Returns:

Type Description
str

a slug

Source code in ckanext/query_dois/routes/_helpers.py
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
def create_current_slug(query_doi: QueryDOI, ignore_resources=None) -> str:
    """
    Creates a slug for the given query DOI at the current version, this is done with a
    nav slug which has no version.

    :param query_doi: the QueryDOI
    :param ignore_resources: a list of resource IDs to ignore
    :returns: a slug
    """
    resource_ids = query_doi.get_resource_ids()
    if ignore_resources:
        resource_ids = [r for r in resource_ids if r not in ignore_resources]

    slug_data_dict = {
        'query': query_doi.query,
        'query_version': query_doi.query_version,
        'resource_ids': resource_ids,
        'nav_slug': True,
    }
    current_slug = toolkit.get_action('vds_slug_create')({}, slug_data_dict)
    return current_slug['slug']

encode_params(params, version=None, extras=None, for_api=False)

Encodes the parameters for a query in the CKAK resource view format and returns as a query string.

Parameters:

Name Type Description Default
params

a dict of parameters, such as a DatastoreQuery's query dict

required
version

the version to add into the query string (default: None)

None
extras

an optional dict of extra parameters to add as well as the ones found in the params dict (default: None)

None
for_api

whether the query string is for a CKAN resource view or an API get as it changes the format (default: False)

False

Returns:

Type Description

a query string of the query parameters (no ? at the start but will include & if needed)

Source code in ckanext/query_dois/routes/_helpers.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def encode_params(params, version=None, extras=None, for_api=False):
    """
    Encodes the parameters for a query in the CKAK resource view format and returns as a
    query string.

    :param params: a dict of parameters, such as a DatastoreQuery's query dict
    :param version: the version to add into the query string (default: None)
    :param extras: an optional dict of extra parameters to add as well as the ones found
        in the params dict (default: None)
    :param for_api: whether the query string is for a CKAN resource view or an API get
        as it changes the format (default: False)
    :returns: a query string of the query parameters (no ? at the start but will include
        & if needed)
    """
    query_string = {}
    extras = [] if extras is None else extras.items()
    # build the query string from the dicts we have first
    for param, value in itertools.chain(params.items(), extras):
        # make sure to ignore all version data in the dicts
        if param == 'version':
            continue
        if param == 'filters':
            value = copy.deepcopy(value)
            if version is None:
                value.pop('__version__', None)
        query_string[param] = value

    # now add the version in if needed
    if version is not None:
        query_string.setdefault('filters', {})['__version__'] = version

    # finally format any nested dicts correctly (this is for the filters field basically)
    for param, value in query_string.items():
        if isinstance(value, dict):
            if for_api:
                # the API takes the data in JSON format so we just need to serialise it
                value = json.dumps(value)
            else:
                # if the data is going in a query string for a resource view it needs to be
                # encoded in a special way
                parts = []
                for sub_key, sub_value in value.items():
                    if not isinstance(sub_value, list):
                        sub_value = [sub_value]
                    parts.extend('{}:{}'.format(sub_key, v) for v in sub_value)
                value = '|'.join(parts)
            query_string[param] = value

    return urlencode(query_string)

generate_rerun_urls(resource, package, query, rounded_version=None)

Generate a dict containing all the "rerun" URLs needed to allow the user to revisit the data either through the website or through the API. The dict returned will look like following:

{
    "page": {
        "original": ...
        "current": ...
    },
    "api": {
        "original": ...
        "current": ...
    }
}

Parameters:

Name Type Description Default
resource

the resource dict

required
package

the package dict

required
query

the query dict

required
rounded_version

the version rounded down to the nearest available on the resource

None

Returns:

Type Description

a dict of urls

Source code in ckanext/query_dois/routes/_helpers.py
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def generate_rerun_urls(resource, package, query, rounded_version=None):
    """
    Generate a dict containing all the "rerun" URLs needed to allow the user to revisit the data
    either through the website or through the API. The dict returned will look like following:

        {
            "page": {
                "original": ...
                "current": ...
            },
            "api": {
                "original": ...
                "current": ...
            }
        }

    :param resource: the resource dict
    :param package: the package dict
    :param query: the query dict
    :param rounded_version: the version rounded down to the nearest available on the resource
    :returns: a dict of urls
    """
    page_url = toolkit.url_for(
        'resource.read', id=package['name'], resource_id=resource['id']
    )
    api_url = '/api/action/datastore_search'
    api_extras = {'resource_id': resource['id']}
    url_dict = {
        'page': {
            'current': page_url + '?' + encode_params(query),
        }
    }
    if rounded_version is not None:
        url_dict['page']['original'] = (
            page_url + '?' + encode_params(query, version=rounded_version)
        )
        url_dict['api'] = {
            'current': api_url
            + '?'
            + encode_params(query, extras=api_extras, for_api=True),
            'original': api_url
            + '?'
            + encode_params(
                query, version=rounded_version, extras=api_extras, for_api=True
            ),
        }
    return url_dict

get_authors(packages)

Retrieves all the authors from the given packages, de-duplicates them (if necessary) and then returns them as a list.

Note that this function takes a list of packages as it is multi-package and therefore multi-resource ready.

Parameters:

Name Type Description Default
packages

the packages

required

Returns:

Type Description

a list of author(s)

Source code in ckanext/query_dois/routes/_helpers.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def get_authors(packages):
    """
    Retrieves all the authors from the given packages, de-duplicates them (if necessary)
    and then returns them as a list.

    Note that this function takes a list of packages as it is multi-package and
    therefore multi-resource ready.

    :param packages: the packages
    :returns: a list of author(s)
    """
    # use an ordered dict in the absence of a sorted set
    authors = OrderedDict()
    for package in packages:
        author = package['author']
        # some author values will contain many authors with a separator, perhaps , or ;
        for separator in (';', ','):
            if separator in author:
                authors.update({a: True for a in author.split(separator)})
                break
        else:
            # if the author value didn't contain a separator then we can just use the value as is
            authors[author] = True

    return list(authors.keys())

get_query_doi(doi)

Retrieves a QueryDOI object from the database for the given DOI, if there is one, otherwise returns None.

Parameters:

Name Type Description Default
doi

the doi (full doi, prefix/suffix)

required

Returns:

Type Description

A QueryDOI object or None

Source code in ckanext/query_dois/routes/_helpers.py
48
49
50
51
52
53
54
55
56
def get_query_doi(doi):
    """
    Retrieves a QueryDOI object from the database for the given DOI, if there is one,
    otherwise returns None.

    :param doi: the doi (full doi, prefix/suffix)
    :returns: A QueryDOI object or None
    """
    return model.Session.query(QueryDOI).filter(QueryDOI.doi == doi).first()

get_stats(query_doi)

Retrieve some simple stats about the query DOI - this includes the total downloads and the last download timestamp. Note that we are specifically looking for downloads here, no other actions are considered.

Parameters:

Name Type Description Default
query_doi

the QueryDOI object

required

Returns:

Type Description

a 3-tuple containing the total downloads, total saves and the last download timestamp

Source code in ckanext/query_dois/routes/_helpers.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def get_stats(query_doi):
    """
    Retrieve some simple stats about the query DOI - this includes the total downloads and the
    last download timestamp. Note that we are specifically looking for downloads here, no other
    actions are considered.

    :param query_doi: the QueryDOI object
    :returns: a 3-tuple containing the total downloads, total saves and the last download timestamp
    """
    # count how many download stats we have on this doi
    download_total = (
        model.Session.query(QueryDOIStat)
        .filter(QueryDOIStat.doi == query_doi.doi)
        .filter(QueryDOIStat.action == DOWNLOAD_ACTION)
        .count()
    )
    # count how many save stats we have on this doi
    save_total = (
        model.Session.query(QueryDOIStat)
        .filter(QueryDOIStat.doi == query_doi.doi)
        .filter(QueryDOIStat.action == SAVE_ACTION)
        .count()
    )
    # find the last stats object we have for this doi
    last = (
        model.Session.query(QueryDOIStat)
        .filter(QueryDOIStat.doi == query_doi.doi)
        .filter(QueryDOIStat.action == DOWNLOAD_ACTION)
        .order_by(QueryDOIStat.id.desc())
        .first()
    )
    return download_total, save_total, last.timestamp if last is not None else None

render_datastore_search_doi_page(query_doi)

Renders a DOI landing page for a datastore_search based query DOI.

Parameters:

Name Type Description Default
query_doi

the query DOI

required

Returns:

Type Description

the rendered page

Source code in ckanext/query_dois/routes/_helpers.py
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
def render_datastore_search_doi_page(query_doi):
    """
    Renders a DOI landing page for a datastore_search based query DOI.

    :param query_doi: the query DOI
    :returns: the rendered page
    """
    # currently we only deal with single resource query DOIs
    resource_id = query_doi.get_resource_ids()[0]
    rounded_version = query_doi.get_rounded_versions()[0]

    try:
        resource, package = get_resource_and_package(resource_id)
        is_inaccessible = False
        in_datastore = resource.get('datastore_active', False)
    except (toolkit.ObjectNotFound, toolkit.NotAuthorized):
        resource = None
        package = None
        in_datastore = False
        is_inaccessible = True

    # we ignore the saves count as it will always be 0 for a datastore_search DOI
    downloads, _saves, last_download_timestamp = get_stats(query_doi)
    usage_stats = {
        'downloads': downloads,
        'last_download_timestamp': last_download_timestamp,
    }

    # warnings
    warnings = []
    if is_inaccessible:
        warnings = [
            toolkit._(
                'All resources associated with this search have been deleted, moved, '
                'or are no longer available in their previous format.'
            )
        ]
    elif not in_datastore:
        warnings = [
            toolkit._(
                'All records associated with this search have been removed from the '
                'search index. The data may still exist, but they are no longer '
                'versioned and cannot be filtered.'
            )
        ]

    context = {
        'query_doi': query_doi,
        'doi': query_doi.doi,
        'resource': resource,
        'package': package,
        'version': rounded_version,
        'usage_stats': usage_stats,
        'is_inaccessible': is_inaccessible,
        'in_datastore': in_datastore,
        'warnings': warnings,
        # these are defaults for if the resource is inaccessible
        'package_doi': None,
        'authors': toolkit._('Unknown'),
        'reruns': {},
    }

    if not is_inaccessible:
        context.update(
            {
                # this is effectively an integration point with the ckanext-doi
                # extension. If there is demand we should open this up so that we can
                # support other dois on packages extensions
                'package_doi': (
                    package['doi'] if package.get('doi_status', False) else None
                ),
                'authors': get_authors([package]),
                'reruns': generate_rerun_urls(
                    resource,
                    package,
                    query_doi.query,
                    rounded_version if in_datastore else None,
                ),
            }
        )

    return toolkit.render('query_dois/single_landing_page.html', context)

render_multisearch_doi_page(query_doi)

Renders a DOI landing page for a datastore_multisearch based query DOI.

Parameters:

Name Type Description Default
query_doi QueryDOI

the query DOI

required

Returns:

Type Description

the rendered page

Source code in ckanext/query_dois/routes/_helpers.py
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
def render_multisearch_doi_page(query_doi: QueryDOI):
    """
    Renders a DOI landing page for a datastore_multisearch based query DOI.

    :param query_doi: the query DOI
    :returns: the rendered page
    """
    resource_ids = set(query_doi.get_resource_ids())

    accessible_packages = {}
    accessible_resources = {}
    inaccessible_resources = []

    offset = 0
    action = toolkit.get_action('current_package_list_with_resources')
    unchecked_resource_ids = resource_ids.copy()
    while len(unchecked_resource_ids) > 0:
        # DOI resources should always be public, so ignore anything that isn't
        context = {'ignore_auth': False, 'user': None}
        packages = action(context, {'offset': offset, 'limit': 200})
        if not packages:
            break
        for package in packages:
            package_accessible_resources = []
            for resource in package.get('resources', []):
                if resource['id'] in resource_ids:
                    unchecked_resource_ids.discard(resource['id'])
                    if resource['datastore_active']:
                        package_accessible_resources.append(resource['id'])
                        accessible_resources[resource['id']] = {
                            'name': resource['name'],
                            'package_id': package['id'],
                        }
                    else:
                        # non-datastore resources are "inaccessible" as DOI resources,
                        # but we can still show their details
                        inaccessible_resources.append(
                            InaccessibleResource(
                                id=resource['id'],
                                name=resource['name'],
                                package_id=package['id'],
                                package_name=package['name'],
                                package_title=package['title'],
                            )
                        )
            if package_accessible_resources:
                accessible_packages[package['id']] = {
                    'title': package['title'],
                    'name': package['name'],
                    'resource_ids': package_accessible_resources,
                }
        offset += len(packages)

    # if it couldn't be found, we don't know what it is
    for rid in unchecked_resource_ids:
        inaccessible_resources.append(InaccessibleResource(id=rid))

    inaccessible_count = len(inaccessible_resources)

    # usage stats
    downloads, saves, last_download_timestamp = get_stats(query_doi)
    usage_stats = {
        'downloads': downloads,
        'saves': saves,
        'last_download_timestamp': last_download_timestamp,
    }

    # current details
    sorted_resource_counts = sorted(
        [
            (k, v)
            for k, v in query_doi.resource_counts.items()
            if k in accessible_resources
        ],
        key=operator.itemgetter(1),
        reverse=True,
    )
    current_details = {
        'resource_count': len(accessible_resources),
        'package_count': len(accessible_packages),
        'sorted_resource_counts': sorted_resource_counts,
        'record_count': query_doi.count
        if inaccessible_count == 0
        else sum([v for k, v in sorted_resource_counts]),
    }

    # saved details
    if inaccessible_count == 0:
        saved_details = {
            'resource_count': len(accessible_resources),
            'record_count': query_doi.count,
            'missing_resources': 0,
            'missing_records': 0,
        }
    else:
        saved_details = {
            'resource_count': len(query_doi.resource_counts),
            'record_count': query_doi.count,
            'missing_resources': inaccessible_count,
            'missing_records': query_doi.count - current_details['record_count'],
        }

    # warnings
    warnings = []
    if len(accessible_resources) == 0:
        current_slug = None
        warnings = [
            toolkit._(
                'All resources associated with this search have been deleted, moved, '
                'or are no longer available in their previous format.'
            )
        ]
    else:
        current_slug = create_current_slug(
            query_doi, ignore_resources=[r.id for r in inaccessible_resources]
        )
        if inaccessible_count > 0:
            warnings.append(
                toolkit._(
                    'Some resources have been deleted, moved, or are no longer '
                    'available. Affected resources: '
                )
                + str(inaccessible_count)
            )

    # inaccessible resources
    unknown = {'resource_count': 0, 'record_count': 0}
    known = []
    for res in inaccessible_resources:
        if res.is_unknown:
            unknown['resource_count'] += 1
            unknown['record_count'] += query_doi.resource_counts[res.id]
        else:
            res.record_count = query_doi.resource_counts[res.id]
            known.append(res.as_dict())
    inaccessible_resource_details = known
    if unknown['resource_count'] > 0:
        inaccessible_resource_details.append(
            InaccessibleResource(
                id=None,
                name=' '.join(
                    [toolkit._('Unknown resources'), f'({unknown["resource_count"]})']
                ),
                package_title=toolkit._('Unknown package'),
                record_count=unknown['record_count'],
            ).as_dict()
        )

    context = {
        'query_doi': query_doi,
        'original_slug': query_doi.doi,
        'current_slug': current_slug,
        'usage_stats': usage_stats,
        'resources': accessible_resources,
        'packages': accessible_packages,
        'details': current_details,
        'saved_details': saved_details,
        'has_changed': inaccessible_count > 0,
        'is_inaccessible': len(accessible_resources) == 0,
        'warnings': warnings,
        'inaccessible_resources': inaccessible_resource_details,
    }
    return toolkit.render('query_dois/multisearch_landing_page.html', context)