<?xml version="1.0" encoding="UTF-8"?>
<codeBook version="1.2.2" ID="KEN_2017-2018_MLCRM_v01_M" xml-lang="en" xmlns="http://www.icpsr.umich.edu/DDI" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.icpsr.umich.edu/DDI http://www.icpsr.umich.edu/DDI/Version1-2-2.xsd">
<docDscr>
  <citation>
    <titlStmt>
      <IDNo>DDI_KEN_2017-2018_MLCRM_v01_M_WB</IDNo>
    </titlStmt>
    <prodStmt>
      <producer abbr="DECDG" affiliation="World Bank" role="Documentation of the study">Development Economics Data Group</producer>
      <prodDate date="2021-03-24">2021-03-24</prodDate>
      <software version="v5">NADA</software>
    </prodStmt>
    <verStmt>
      <version>Version 01 (March 2021)</version>
    </verStmt>
  </citation>
</docDscr>
<stdyDscr>
  <citation>
    <titlStmt>
      <titl>Manually Labelled Crash Reports from Ma3Route 2017-2018</titl>
      <subTitl/>
      <altTitl>MLCRM 2017-2018</altTitl>
      <parTitl/>
      <IDNo>KEN_2017-2018_MLCRM_v01_M</IDNo>
    </titlStmt>
    <rspStmt>
      <AuthEnty affiliation="">World Bank</AuthEnty>
    </rspStmt>
    <prodStmt>
      <producer abbr="" affiliation="Development Impact Evaluation Department, World Bank" role="">Sveta Milusheva</producer>
      <producer abbr="" affiliation="Development Impact Evaluation Department, World Bank" role="">Robert Marty</producer>
      <producer abbr="" affiliation="Development Impact Evaluation Department, World Bank" role="">Guadalupe Bedoya</producer>
      <producer abbr="" affiliation="School of Architecture and Planning, Massachusetts Institute of Technology" role="">Sarah Williams</producer>
      <producer abbr="" affiliation="School of Information, University of California, Berkeley" role="">Elizabeth Resor</producer>
      <producer abbr="" affiliation="Development Impact Evaluation Department, World Bank" role="">Arianna Legovini</producer>
      <copyright/>
      <software version="5.0" date="2021-12-14">NADA</software>
      <grantNo/>
    </prodStmt>
    <distStmt>
      <contact affiliation="World Bank" URI="" email="rmarty@worldbank.org">Robert Marty</contact>
      <depDate date=""/>
      <distDate date=""/>
    </distStmt>
    <serStmt>
      <serName/>
      <serInfo>An associated ''Tweet IDs From Ma3Route 2012-2020" dataset is available at: https://microdata.worldbank.org/index.php/catalog/3820</serInfo>
    </serStmt>
    <verStmt>
      <version date="">- v2.1:  Edited, anonymous dataset for public distribution.</version>
      <verResp/>
      <notes/>
    </verStmt>
    <biblCit format=""/>
    <notes/>
  </citation>
  <stdyInfo>
    <studyBudget/>
    <subject>
    </subject>
    <abstract>The purpose of the Tweet IDs and Manually Labelled Crash Reports from Ma3Route 2017-2018 project is identify tweets from the @Ma3Route twitter handles that report road traffic crash reports. Using the Twitter API, tweets were scraped from Ma3Route, which is a mobile/web/SMS platform that crowdsources transport data and provides users with information on on road traffic crash reports as well as traffic, matatu directions, and driving reports.</abstract>
    <sumDscr>
      <collDate date="2017-07-01" event="start" cycle=""/>
      <collDate date="2018-07-31" event="end" cycle=""/>
      <nation abbr="KEN">Kenya</nation>
      <geogCover>Kenya (primarily Nairobi)</geogCover>
      <geogUnit/>
      <anlyUnit>Road traffic crash reports</anlyUnit>
      <universe>Tweets reporting road traffic crash reports, scraped from twitter handle @Ma3Route</universe>
      <dataKind>Observation data/ratings [obs]</dataKind>
    </sumDscr>
    <!-- qualityStatement - ddi2.5 - complex type
     
     This structure consists of two parts, standardsCompliance and otherQualityStatements. 
     In standardsCompliance list all specific standards complied with during the execution of this 
     study. Note the standard name and producer and how the study complied with the standard. 
     Enter any additional quality statements in otherQualityStatements.
     
     -->
    <qualityStatement>
      <standardsCompliance>
        <standard>
          <standardName/>
          <producer/>
        </standard>
        <complianceDescription/>
      </standardsCompliance>
      <otherQualityStatement/>
    </qualityStatement>
    <notes/>
    <!-- exPostEvaluation ddi2.5
      Use this section to describe evaluation procedures not address in data evaluation processes. 
      These may include issues such as timing of the study, sequencing issues, cost/budget issues, 
      relevance, instituional or legal arrangments etc. of the study. 
      
      The completionDate attribute holds the date the evaluation was completed. 
      The type attribute is an optional type to identify the type of evaluation with or without 
      the use of a controlled vocabulary.
    -->
    <exPostEvaluation completionDate="" type="">
      <evaluationProcess/>
      <outcomes/>
    </exPostEvaluation>
  </stdyInfo>
  <method>
    <dataColl>
      <timeMeth/>
      <!-- collectorTraining - DDI2.5
        
        Collector Training

        Describes the training provided to data collectors including internviewer training, process testing, 
        compliance with standards etc. This is repeatable for language and to capture different aspects of the 
        training process. The type attribute allows specification of the type of training being described.
        
        -->
      <collectorTraining type=""/>
      <frequenc/>
      <sampProc/>
      <sampleFrame>
        <sampleFrameName/>
        <custodian/>
        <universe/>
        <frameUnit isPrimary="">
          <unitType numberOfUnits=""/>
        </frameUnit>
        <updateProcedure/>
      </sampleFrame>
      <deviat/>
      <collMode>Other [oth]</collMode>
      <resInstru/>
      <!-- instrumentDevelopment - DDI2.5             
        Describe any development work on the data collection instrument. Type attribute allows for the optional use of a defined development type with or without use of a controlled vocabulary.
        -->
      <instrumentDevelopment type=""/>
      <collSitu>Using the Twitter API, tweets were scraped from the twitter handle @Ma3Route. Ma3Route is a mobile/web/SMS platform that crowdsources transport data and provides users with information on traffic, road traffic crash (RTC), matatu directions and driving reports. Users post RTC or traffic information to Ma3Route, where Ma3Route then publishes the post on Twitter. Tweets were obtained in order to identify tweets that reported RTC. Tweets from May 2012 to July 2020 were scraped and a "truth dataset", of tweets manually coded to determine if they reported crash reports and the location of the reported crashes, was generated. Additional information on the data is provided in additional documents found under the 'Documentation' tab.</collSitu>
      <actMin/>
      <ConOps/>
      <weight/>
      <cleanOps/>
    </dataColl>
    <notes/>
    <anlyInfo>
      <respRate/>
      <EstSmpErr/>
      <dataAppr/>
    </anlyInfo>
    <stdyClas/>
    <dataProcessing type=""/>
    <codingInstructions relatedProcesses="" type="">
      <txt/>
      <command formalLanguage=""/>
    </codingInstructions>
  </method>
  <dataAccs>
    <setAvail>
      <accsPlac URI=""/>
      <origArch/>
      <avlStatus/>
      <collSize/>
      <complete/>
      <fileQnty/>
      <notes/>
    </setAvail>
    <useStmt>
      <confDec required="yes" formNo="" URI="">Users of the data agree to keep confidential all data contained in these datasets and to make no attempt to identify, trace or contact any individual whose data is included in these datasets.</confDec>
      <restrctn/>
      <citReq>Use of the dataset must be acknowledged using a citation which would include:
- the Identification of the Primary Investigator
- the title of the survey (including country, acronym and year of implementation)
- the survey reference number
- the source and date of download

Example,

World Bank Group. Kenya - Manually Labelled Crash Reports from Ma3Route (MLCRM) 2017-2018. Ref. KEN_2017-2018_MLCRM_v01_M. Dataset downloaded from [url] on [date].</citReq>
      <deposReq/>
      <conditions>Licensed access</conditions>
      <disclaimer>The user of the data acknowledges that the original collector of the data, the authorized distributor of the data, and the relevant funding agency bear no responsibility for use of the data or for interpretations or inferences based upon such uses.</disclaimer>
    </useStmt>
    <notes/>
  </dataAccs>
  <notes/>
</stdyDscr>
<fileDscr ID="F1">
  <fileTxt>
    <fileName>twitter_truth</fileName>
    <fileCont>Using the Twitter API, tweets were scrapped from the twitter handle @Ma3Route. Ma3Route is a mobile/web/SMS platform that crowdsources transport data and provides users with information on traffic, road traffic crash (RTC), matatu directions and driving reports. Users post RTC or traffic information to Ma3Route, where Ma3Route then publishes the post on Twitter. Tweets were obtained in order to identify tweets that reported RTC. This dataset includes a manually labelled dataset of a subset of tweets indicating which tweets report a crash and the location of crashes.</fileCont>
    <dimensns>
      <caseQnty>9479</caseQnty>
      <varQnty>9</varQnty>
    </dimensns>
    <dataChck></dataChck>
    <dataMsng></dataMsng>
    <verStmt>
      <version></version>
    </verStmt>
  </fileTxt>
  <notes></notes>
</fileDscr>
<dataDscr>
<var ID="V1" name="uid" files="F1" intrvl="contin">
  <labl>Unique ID</labl>
  <imputation>Unique ID</imputation>
  <security>Unique ID</security>
  <embargo>Unique ID</embargo>
  <respUnit>Unique ID</respUnit>
  <qstn>
    <qstnLit>Unique ID</qstnLit>
  </qstn>
  <sumStat type="vald">9479</sumStat>
  <sumStat type="invd"/>
  <sumStat type="min">1</sumStat>
  <sumStat type="max">9479</sumStat>
  <varFormat type="numeric"/>
</var>
<var ID="V2" name="tweet_id" files="F1" intrvl="discrete">
  <labl>Tweet ID</labl>
  <imputation>Tweet ID</imputation>
  <security>Tweet ID</security>
  <embargo>Tweet ID</embargo>
  <respUnit>Tweet ID</respUnit>
  <qstn>
    <qstnLit>Tweet ID</qstnLit>
  </qstn>
  <sumStat type="vald">7724</sumStat>
  <sumStat type="invd"/>
  <varFormat type="character"/>
</var>
<var ID="V3" name="created_at" files="F1" intrvl="contin">
  <labl>Time Date/Time (EAT)</labl>
  <imputation>Time Date/Time (EAT)</imputation>
  <security>Time Date/Time (EAT)</security>
  <embargo>Time Date/Time (EAT)</embargo>
  <respUnit>Time Date/Time (EAT)</respUnit>
  <qstn>
    <qstnLit>Time Date/Time (EAT)</qstnLit>
  </qstn>
  <sumStat type="vald">9479</sumStat>
  <sumStat type="invd"/>
  <sumStat type="min">1814496118000</sumStat>
  <sumStat type="max">1848681624000</sumStat>
  <varFormat type="numeric"/>
</var>
<var ID="V4" name="crash_report" files="F1" intrvl="discrete">
  <labl>Tweet reports crash</labl>
  <imputation>Tweet reports crash</imputation>
  <security>Tweet reports crash</security>
  <embargo>Tweet reports crash</embargo>
  <respUnit>Tweet reports crash</respUnit>
  <qstn>
    <qstnLit>Tweet reports crash</qstnLit>
  </qstn>
  <sumStat type="vald">9479</sumStat>
  <sumStat type="invd"/>
  <sumStat type="min"/>
  <sumStat type="max">1</sumStat>
  <catgry>
    <labl>No</labl>
    <catStat type="vald"/>
  </catgry>
  <catgry>
    <catValu>1</catValu>
    <labl>Yes</labl>
    <catStat type="invd"/>
  </catgry>
  <varFormat type="numeric"/>
</var>
<var ID="V5" name="latitude" files="F1" intrvl="contin">
  <labl>Latitude of crash</labl>
  <imputation>Latitude of crash</imputation>
  <security>Latitude of crash</security>
  <embargo>Latitude of crash</embargo>
  <respUnit>Latitude of crash</respUnit>
  <qstn>
    <qstnLit>Latitude of crash</qstnLit>
  </qstn>
  <sumStat type="vald">4193</sumStat>
  <sumStat type="invd">5286</sumStat>
  <sumStat type="min">-4.06</sumStat>
  <sumStat type="max">1.257</sumStat>
  <varFormat type="numeric"/>
</var>
<var ID="V6" name="longitude" files="F1" intrvl="contin">
  <labl>Longitude of crash</labl>
  <imputation>Longitude of crash</imputation>
  <security>Longitude of crash</security>
  <embargo>Longitude of crash</embargo>
  <respUnit>Longitude of crash</respUnit>
  <qstn>
    <qstnLit>Longitude of crash</qstnLit>
  </qstn>
  <sumStat type="vald">4191</sumStat>
  <sumStat type="invd">5288</sumStat>
  <sumStat type="min">34.146</sumStat>
  <sumStat type="max">40.171</sumStat>
  <varFormat type="numeric"/>
</var>
<var ID="V7" name="crash_id_c1" files="F1" intrvl="contin">
  <labl>Crash ID (from coder 1)</labl>
  <imputation>Crash ID (from coder 1)</imputation>
  <security>Crash ID (from coder 1)</security>
  <embargo>Crash ID (from coder 1)</embargo>
  <respUnit>Crash ID (from coder 1)</respUnit>
  <qstn>
    <qstnLit>Crash ID (from coder 1)</qstnLit>
  </qstn>
  <sumStat type="vald">3796</sumStat>
  <sumStat type="invd">5683</sumStat>
  <sumStat type="min">907</sumStat>
  <sumStat type="max">41898</sumStat>
  <varFormat type="numeric"/>
</var>
<var ID="V8" name="crash_id_c2" files="F1" intrvl="contin">
  <labl>Crash ID (from coder 2)</labl>
  <imputation>Crash ID (from coder 2)</imputation>
  <security>Crash ID (from coder 2)</security>
  <embargo>Crash ID (from coder 2)</embargo>
  <respUnit>Crash ID (from coder 2)</respUnit>
  <qstn>
    <qstnLit>Crash ID (from coder 2)</qstnLit>
  </qstn>
  <sumStat type="vald">3674</sumStat>
  <sumStat type="invd">5805</sumStat>
  <sumStat type="min">909</sumStat>
  <sumStat type="max">47227</sumStat>
  <varFormat type="numeric"/>
</var>
<var ID="V9" name="crash_landmark" files="F1" intrvl="discrete">
  <labl>Landmark used to geocode</labl>
  <imputation>Landmark used to geocode</imputation>
  <security>Landmark used to geocode</security>
  <embargo>Landmark used to geocode</embargo>
  <respUnit>Landmark used to geocode</respUnit>
  <qstn>
    <qstnLit>Landmark used to geocode</qstnLit>
  </qstn>
  <sumStat type="vald">4210</sumStat>
  <sumStat type="invd"/>
  <varFormat type="character"/>
</var>
</dataDscr></codeBook>
