release_schedule/lib/api/wikidata/wikidata_movie_api.dart

241 lines
9.7 KiB
Dart
Raw Normal View History

import 'dart:convert';
2023-11-16 12:51:45 +01:00
import 'dart:math';
import 'package:http/http.dart';
import 'package:intl/intl.dart';
import 'package:release_schedule/api/api_manager.dart';
import 'package:release_schedule/api/json_helper.dart';
import 'package:release_schedule/api/movie_api.dart';
import 'package:release_schedule/api/wikidata/wikidata_movie.dart';
import 'package:release_schedule/model/dates.dart';
2023-11-16 12:51:45 +01:00
class WikidataProperties {
static const String instanceOf = "P31";
static const String publicationDate = "P577";
static const String title = "P1476";
static const String partOfTheSeries = "P179";
static const String basedOn = "P144";
static const String derivativeWork = "P4969";
static const String genre = "P136";
static const String countryOfOrigin = "P496";
static const String director = "P57";
static const String castMember = "P161";
static const String distributedBy = "P750";
static const String afterAWorkBy = "P1877";
static const String duration = "P2047";
static const String reviewScore = "P444";
static const String fskFilmRating = "P1981";
static const String placeOfPublication = "P291";
2024-01-09 13:24:35 +01:00
static const String shortName = "P1813";
2023-11-16 12:51:45 +01:00
}
class WikidataEntities {
static const String film = "Q11424";
static const String filmProject = "Q18011172";
}
ApiManager _wikidataApi =
ApiManager("https://www.wikidata.org/w/api.php?origin=*");
2023-11-08 14:43:59 +01:00
class WikidataMovieApi implements MovieApi {
ApiManager queryApi =
ApiManager("https://query.wikidata.org/sparql?format=json&origin=*");
@override
Future<List<WikidataMovieData>> getUpcomingMovies(DateTime startDate,
[int count = 100]) async {
Response filmResponse = await queryApi.get(
"&query=${Uri.encodeComponent(_createUpcomingMovieQuery(startDate, WikidataEntities.film, count))}");
Response filmProjectResponse = await queryApi.get(
"&query=${Uri.encodeComponent(_createUpcomingMovieQuery(startDate, WikidataEntities.filmProject, count))}");
List<Response> responses = [filmResponse, filmProjectResponse];
for (var response in responses) {
if (response.statusCode != 200) {
throw Exception(
"The Wikidata request for upcoming movies failed with status ${response.statusCode} ${response.reasonPhrase}");
}
}
Iterable<Map<String, dynamic>> results =
responses.map((response) => jsonDecode(response.body));
Iterable<dynamic> entries =
results.expand((result) => result["results"]["bindings"]);
2023-11-16 12:51:45 +01:00
List<String> ids = entries
.map((entry) =>
RegExp(r"Q\d+$").firstMatch(entry["movie"]["value"])![0]!)
.toList();
return await _getMovieDataFromIds(ids);
2023-11-16 12:51:45 +01:00
}
Future<List<WikidataMovieData>> _getMovieDataFromIds(
List<String> movieIds) async {
// Wikidata limits the number of entities per request to 50
const batchSize = 50;
Map<String, dynamic> entities = {};
for (int i = 0; i < (movieIds.length / batchSize).ceil(); i++) {
final start = i * batchSize;
final end = min((i + 1) * batchSize, movieIds.length);
var response = await _wikidataApi.get(
2024-01-10 14:46:03 +01:00
"&action=wbgetentities&format=json&props=labels|claims|sitelinks/urls&ids=${movieIds.sublist(start, end).join("|")}");
2023-11-16 12:51:45 +01:00
Map<String, dynamic> result = jsonDecode(response.body);
Map<String, dynamic> batchEntities = result["entities"];
entities.addAll(batchEntities);
}
2023-11-16 12:51:45 +01:00
List<String> allCountryAndGenreIds = [];
// Add the country ids from the publication dates
allCountryAndGenreIds.addAll(selectInJson<String>(entities,
2023-11-16 12:51:45 +01:00
"*.claims.${WikidataProperties.publicationDate}.*.qualifiers.${WikidataProperties.placeOfPublication}.*.datavalue.value.id"));
// Add the genre ids
allCountryAndGenreIds.addAll(selectInJson<String>(entities,
2023-11-16 12:51:45 +01:00
"*.claims.${WikidataProperties.genre}.*.mainsnak.datavalue.value.id"));
allCountryAndGenreIds = allCountryAndGenreIds.toSet().toList();
// Prefetch all labels for countries and genres
// to reduce the number of api calls,
// they will be retrieved from the cache in fromWikidataEntity
await _getLabelsForEntities(allCountryAndGenreIds);
2024-01-10 14:46:03 +01:00
// Get wikipedia explaintexts
Iterable<String> allWikipediaTitles =
selectInJson<String>(entities, "*.sitelinks.enwiki.url")
.map((url) => url.split("/").last);
await _getWikipediaExplainTextForTitles(allWikipediaTitles.toList());
2023-11-16 12:51:45 +01:00
return movieIds
.map((id) => WikidataMovieData.fromWikidataEntity(id, entities[id]))
.toList();
}
@override
Future<List<WikidataMovieData>> searchForMovies(String searchTerm) async {
String haswbstatement =
"haswbstatement:${WikidataProperties.instanceOf}=${WikidataEntities.film}|${WikidataProperties.instanceOf}=${WikidataEntities.filmProject}";
String query =
"&action=query&list=search&format=json&srsearch=${Uri.encodeComponent(searchTerm)}%20$haswbstatement";
Response result = await _wikidataApi.get(query);
Map<String, dynamic> json = jsonDecode(result.body);
List<Map<String, dynamic>> searchResults =
selectInJson<Map<String, dynamic>>(json, "query.search.*").toList();
List<String> ids = searchResults
.map((result) => result["title"] as String)
.where((title) => RegExp(r"^Q\d+$").hasMatch(title))
.toList();
return await _getMovieDataFromIds(ids);
}
}
2023-11-11 15:05:11 +01:00
String _createUpcomingMovieQuery(
DateTime startDate, String instanceOf, int limit) {
2023-11-11 15:05:11 +01:00
String date = DateFormat("yyyy-MM-dd").format(startDate);
return """
SELECT
?movie
(MIN(?releaseDate) as ?minReleaseDate)
WHERE {
?movie wdt:${WikidataProperties.instanceOf} wd:$instanceOf;
wdt:${WikidataProperties.publicationDate} ?releaseDate.
?movie p:${WikidataProperties.publicationDate}/psv:${WikidataProperties.publicationDate} [wikibase:timePrecision ?precision].
FILTER (xsd:date(?releaseDate) >= xsd:date("$date"^^xsd:dateTime))
FILTER (?precision >= 10)
2023-11-11 15:05:11 +01:00
}
GROUP BY ?movie
2023-11-11 15:05:11 +01:00
ORDER BY ?minReleaseDate
LIMIT $limit""";
}
DatePrecision precisionFromWikidata(int precision) {
2023-11-11 15:05:11 +01:00
return switch (precision) {
2023-11-16 12:51:45 +01:00
>= 13 => DatePrecision.minute,
12 => DatePrecision.hour,
11 => DatePrecision.day,
2023-11-11 15:05:11 +01:00
10 => DatePrecision.month,
9 => DatePrecision.year,
8 => DatePrecision.decade,
< 8 => throw Exception("The precision was too low, value: $precision"),
_ => throw Exception("Unexpected precision value: $precision"),
};
}
Map<String, String> _labelCache = {};
Future<Map<String, String>> _getLabelsForEntities(
List<String> entityIds) async {
const batchSize = 50;
Map<String, String> labels = {};
for (int i = entityIds.length - 1; i >= 0; i--) {
if (_labelCache.containsKey(entityIds[i])) {
labels[entityIds[i]] = _labelCache[entityIds[i]]!;
entityIds.removeAt(i);
}
}
for (int i = 0; i < (entityIds.length / batchSize).ceil(); i++) {
final start = i * batchSize;
final end = min((i + 1) * batchSize, entityIds.length);
Response response = await _wikidataApi.get(
2024-01-09 13:24:35 +01:00
"&action=wbgetentities&format=json&props=labels|claims&ids=${entityIds.sublist(start, end).join("|")}");
Map<String, dynamic> result = jsonDecode(response.body);
Map<String, dynamic> batchEntities = result["entities"];
for (String entityId in batchEntities.keys) {
2024-01-09 13:24:35 +01:00
String? shortName = selectInJson(batchEntities[entityId],
"claims.${WikidataProperties.shortName}.*.mainsnak.datavalue.value")
.where((value) => value["language"] == "en")
.map((value) => (value["text"] as String))
.firstOrNull;
Map<String, dynamic> responseLabels = batchEntities[entityId]["labels"];
if (shortName != null) {
_labelCache[entityId] = labels[entityId] = shortName;
continue;
}
String label = responseLabels.containsKey("en")
? responseLabels["en"]["value"]
: responseLabels[responseLabels.keys.first]["value"];
_labelCache[entityId] = labels[entityId] = label;
}
}
return labels;
}
String getCachedLabelForEntity(String entityId) {
return _labelCache[entityId] ?? entityId;
}
2024-01-10 14:46:03 +01:00
ApiManager _wikipediaApi =
ApiManager("https://en.wikipedia.org/w/api.php?format=json&origin=*");
Map<String, Dated<String?>> _wikipediaExplainTextCache = {};
Future<Map<String, Dated<String?>>> _getWikipediaExplainTextForTitles(
2024-01-10 14:46:03 +01:00
List<String> pageTitles) async {
const batchSize = 50;
Map<String, Dated<String?>> explainTexts = {};
2024-01-10 14:46:03 +01:00
for (int i = pageTitles.length - 1; i >= 0; i--) {
if (_wikipediaExplainTextCache.containsKey(pageTitles[i])) {
explainTexts[pageTitles[i]] = _wikipediaExplainTextCache[pageTitles[i]]!;
2024-01-10 14:46:03 +01:00
pageTitles.removeAt(i);
}
}
for (int i = 0; i < (pageTitles.length / batchSize).ceil(); i++) {
final start = i * batchSize;
final end = min((i + 1) * batchSize, pageTitles.length);
Response response = await _wikipediaApi.get(
"&action=query&prop=extracts&exintro&explaintext&redirects=1&titles=${pageTitles.sublist(start, end).join("|")}");
Map<String, dynamic> result = jsonDecode(response.body);
List<dynamic> normalize = result["query"]["normalized"];
Map<String, dynamic> batchPages = result["query"]["pages"];
for (String pageId in batchPages.keys) {
String pageTitle = batchPages[pageId]["title"];
String originalTitle = normalize
.where((element) => element["to"] == pageTitle)
.firstOrNull?["from"] ??
pageTitle;
String? explainText = batchPages[pageId]["extract"];
if (explainText != null) {
_wikipediaExplainTextCache[originalTitle] =
explainTexts[originalTitle] = Dated.now(explainText);
2024-01-10 14:46:03 +01:00
}
}
}
return explainTexts;
}
Dated<String?>? getCachedWikipediaExplainTextFotTitle(String title) {
2024-01-10 14:46:03 +01:00
return _wikipediaExplainTextCache[title];
}