You are browsing a read-only backup copy of Wikitech. The live site can be found at wikitech.wikimedia.org

Analytics/Systems/Cluster/Hive/Queries/Wikidata: Difference between revisions

From Wikitech-static
< Analytics‎ | Systems‎ | Cluster‎ | Hive‎ | Queries
Jump to navigation Jump to search
imported>Milimetric
m (Milimetric moved page Analytics/Cluster/Hive/Queries/Wikidata to Analytics/Systems/Cluster/Hive/Queries/Wikidata: Reorganizing documentation)
 
imported>Addshore
No edit summary
 
Line 1: Line 1:
== Special:EntityData calls per user agent in a given hour ==
== 2018 - Wikidata editors that have also edited a Wikipedia ==
 
<source lang="SQL" enclose="div">
WITH
wikipedias AS (
  SELECT
    DISTINCT dbname
  FROM wmf_raw.mediawiki_project_namespace_map
  WHERE snapshot = '2018-02'
    AND hostname LIKE '%wikipedia.org'
),
 
wikidata_editors AS (
  SELECT
    DISTINCT event_user_text
  FROM wmf.mediawiki_history
  WHERE snapshot = '2018-02'
    AND event_entity = 'revision'
    AND event_type = 'create'
    AND wiki_db = 'wikidatawiki'
    AND NOT event_user_is_anonymous
    AND NOT ARRAY_CONTAINS(event_user_groups, 'bot')
)
 
SELECT
  mwh.wiki_db,
  COUNT(DISTINCT mwh.event_user_text) as wikidata_coeditors
FROM wmf.mediawiki_history mwh
  JOIN wikipedias w ON (mwh.wiki_db = w.dbname)
  JOIN wikidata_editors wde ON (mwh.event_user_text = wde.event_user_text)
WHERE snapshot = '2018-02'
  AND event_entity = 'revision'
  AND event_type = 'create'
  AND NOT mwh.event_user_is_anonymous
  AND NOT ARRAY_CONTAINS(mwh.event_user_groups, 'bot')
GROUP BY
  mwh.wiki_db
ORDER BY wikidata_coeditors DESC
LIMIT 1000
;
</source>
 
== 2015 - Special:EntityData calls per user agent in a given hour ==
<source lang="SQL" enclose="div">
<source lang="SQL" enclose="div">
SELECT
SELECT

Latest revision as of 13:32, 21 March 2018

2018 - Wikidata editors that have also edited a Wikipedia

WITH
wikipedias AS (
  SELECT
    DISTINCT dbname
  FROM wmf_raw.mediawiki_project_namespace_map
  WHERE snapshot = '2018-02'
    AND hostname LIKE '%wikipedia.org'
),

wikidata_editors AS (
  SELECT
    DISTINCT event_user_text
  FROM wmf.mediawiki_history
  WHERE snapshot = '2018-02'
    AND event_entity = 'revision'
    AND event_type = 'create'
    AND wiki_db = 'wikidatawiki'
    AND NOT event_user_is_anonymous
    AND NOT ARRAY_CONTAINS(event_user_groups, 'bot')
)

SELECT
  mwh.wiki_db,
  COUNT(DISTINCT mwh.event_user_text) as wikidata_coeditors
FROM wmf.mediawiki_history mwh
  JOIN wikipedias w ON (mwh.wiki_db = w.dbname)
  JOIN wikidata_editors wde ON (mwh.event_user_text = wde.event_user_text)
WHERE snapshot = '2018-02'
  AND event_entity = 'revision'
  AND event_type = 'create'
  AND NOT mwh.event_user_is_anonymous
  AND NOT ARRAY_CONTAINS(mwh.event_user_groups, 'bot')
GROUP BY
  mwh.wiki_db
ORDER BY wikidata_coeditors DESC
LIMIT 1000
;

2015 - Special:EntityData calls per user agent in a given hour

SELECT
  count(*) as count, user_agent
FROM
  webrequest
WHERE
  year = 2015
  AND month = 10
  AND day = 12
  AND hour = 1
  AND uri_host = "www.wikidata.org"
  AND http_status = 200
  AND http_method = "GET"
  AND uri_path LIKE "/wiki/Special:EntityData%"
GROUP BY user_agent
ORDER BY count
LIMIT 999999;