Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
ad98fef
start to wiki interface, looks like sentence splitting is working
matthewfl Feb 21, 2015
57c2480
trying to build wiki to conll interface similar to the existing raw t…
matthewfl Feb 22, 2015
9b050c6
-__-
matthewfl Feb 25, 2015
deab92a
wiki doc appears to be correctly put together now
matthewfl Feb 25, 2015
3fdd569
seems to be generating the correct output
matthewfl Mar 1, 2015
8636e26
use threads
matthewfl Mar 1, 2015
ce8ab3f
minor bug fixes
matthewfl Mar 1, 2015
4346d03
trying to run the wikipedia interface now
matthewfl Mar 1, 2015
419e35c
changes to simply seralize the wiki documents
matthewfl Mar 3, 2015
e5b69f1
hopefully fix some bugs
matthewfl Mar 8, 2015
acf3eee
Merge branch 'wiki-train-better-serialize' into wiki-train
matthewfl Mar 8, 2015
5535e73
adding some comments
matthewfl Mar 13, 2015
b28c1b8
buggy f1 scorer
matthewfl Mar 13, 2015
8b4bf01
fix f1 metric
matthewfl Mar 14, 2015
bb12bd1
some bug fixes
matthewfl Mar 17, 2015
8c77dfb
make the gold follow the redirect db as they currently reference old …
matthewfl Mar 26, 2015
1ff173d
attempt at adding more queries to find the matching page title
matthewfl Mar 27, 2015
88d05a2
some changes to trying to generate queries
matthewfl Mar 28, 2015
b8a9e17
lot more queries being generated, but about 1/3 as many impossible qu…
matthewfl Apr 1, 2015
4ce0c43
attempt to include gold data when extracting useful components from w…
matthewfl Apr 3, 2015
9f3752e
better printing, and fixes to links db
matthewfl Apr 9, 2015
83ad954
failed attempt to simply add global wikification features, going to n…
matthewfl Apr 13, 2015
5b5a5d7
remove unused sql attempt
matthewfl Apr 13, 2015
b577da3
text db for getting some bow features from documents
matthewfl Apr 14, 2015
92b5173
some bug fixes and reduce memory pressure
matthewfl Apr 14, 2015
1a5e945
fixes for document word vectors
matthewfl Apr 19, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
berkeley-entity-models.tgz
data.tgz
data/
expers/
models/
project/project/
project/target/
target/
specify_execDir/
17 changes: 17 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# some random useful functions

TARGET = target/scala-2.11/berkeley-entity-assembly-1.jar

all: $(TARGET)

$(TARGET): $(wildcard src/**/*)
sbt assembly

aceTester: $(TARGET)
java -cp $(TARGET) edu.berkeley.nlp.entity.wiki.ACETester -dataPath data/ace05/ace05-all-conll

queryModel: $(TARGET)
java -cp $(TARGET) edu.berkeley.nlp.entity.wiki.QueryChooser -wikiDBPath models/wiki-db-ace.ser.gz

wikiLimited: $(TARGET)
java -cp $(TARGET) edu.berkeley.nlp.entity.preprocess.PreprocessingDriver ++config/base.conf -inputDir ../WikificationACL2011Data/WikipediaSample/RawTextsTrain/ -outputDir /tmp/gggg/raw/ -mode WIKILIMITED
3 changes: 2 additions & 1 deletion build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ name := "berkeley-entity"

version := "1"

scalaVersion := "2.11.2"
scalaVersion := "2.11.6"

assemblySettings

mainClass in assembly := Some("edu.berkeley.nlp.entity.Driver")

unmanagedResourceDirectories in Compile += { baseDirectory.value / "resources/" }
9 changes: 9 additions & 0 deletions resources/Messages_de.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
wiki.tags.toc.content=Inhaltsverzeichnis
wiki.api.url=http://de.wikipedia.org/w/api.php
wiki.api.category1=Kategorie
wiki.api.image1=Datei
wiki.api.image2=Bild
wiki.api.template1=Vorlage
wiki.api.category2=Category
wiki.api.image2=Image
wiki.api.template2=Template
36 changes: 36 additions & 0 deletions resources/Messages_en.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
wiki.tags.toc.content=Contents
wiki.api.url=http://en.wikipedia.org/w/api.php
wiki.api.media1=Media
wiki.api.media2=Media
wiki.api.special1=Special
wiki.api.special2=Special
wiki.api.talk1=Talk
wiki.api.talk2=Talk
wiki.api.user1=User
wiki.api.user2=User
wiki.api.usertalk1=User_talk
wiki.api.usertalk2=User_talk
wiki.api.meta1=Meta
wiki.api.meta2=Meta
wiki.api.metatalk1=Meta_talk
wiki.api.metatalk2=Meta_talk
wiki.api.image1=Image
wiki.api.image2=File
wiki.api.imagetalk1=Image_talk
wiki.api.imagetalk2=File_talk
wiki.api.mediawiki1=MediaWiki
wiki.api.mediawiki2=MediaWiki
wiki.api.mediawikitalk1=MediaWiki_talk
wiki.api.mediawikitalk2=MediaWiki_talk
wiki.api.template1=Template
wiki.api.template2=Template
wiki.api.templatetalk1=Template_talk
wiki.api.templatetalk2=Template_talk
wiki.api.help1=Help
wiki.api.help2=Help
wiki.api.helptalk1=Help_talk
wiki.api.helptalk2=Help_talk
wiki.api.category1=Category
wiki.api.category2=Category
wiki.api.categorytalk1=Category_talk
wiki.api.categorytalk2=Category_talk
8 changes: 8 additions & 0 deletions resources/Messages_es.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
wiki.tags.toc.content=Contenido
wiki.api.url=http://es.wikipedia.org/w/api.php
wiki.api.category1=Categor\u00EDa
wiki.api.image1=Imagen
wiki.api.template1=Plantilla
wiki.api.category2=Category
wiki.api.image2=Image
wiki.api.template2=Template
8 changes: 8 additions & 0 deletions resources/Messages_fr.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
wiki.tags.toc.content=Sommaire
wiki.api.url=http://fr.wikipedia.org/w/api.php
wiki.api.category1=Cat\u00E9gorie
wiki.api.image1=Image
wiki.api.template1=Mod\u00E8le
wiki.api.category2=Category
wiki.api.image2=Image
wiki.api.template2=Template
8 changes: 8 additions & 0 deletions resources/Messages_it.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
wiki.tags.toc.content=Indice
wiki.api.url=http://it.wikipedia.org/w/api.php
wiki.api.category1=Categoria
wiki.api.image1=Immagine
wiki.api.template1=Template
wiki.api.category2=Category
wiki.api.image2=File
wiki.api.template2=Template
38 changes: 38 additions & 0 deletions resources/Messages_pt_BR.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#Generated by ResourceBundle Editor (http://eclipse-rbe.sourceforge.net)

wiki.api.category1 = Categoria
wiki.api.category2 = Categoria
wiki.api.categorytalk1 = Categoria_falar
wiki.api.categorytalk2 = Categoria_falar
wiki.api.help1 = Ajuda
wiki.api.help2 = Ajuda
wiki.api.helptalk1 = Ajuda_falar
wiki.api.helptalk2 = Ajuda_falar
wiki.api.image1 = Imagem
wiki.api.image2 = Arquivo
wiki.api.imagetalk1 = Imagem_falar
wiki.api.imagetalk2 = Arquivo_falar
wiki.api.media1 = M\u00EDdia
wiki.api.media2 = M\u00EDdia
wiki.api.mediawiki1 = MediaWiki
wiki.api.mediawiki2 = MediaWiki
wiki.api.mediawikitalk1 = MediaWiki_falar
wiki.api.mediawikitalk2 = MediaWiki_falar
wiki.api.meta1 = Meta
wiki.api.meta2 = Meta
wiki.api.metatalk1 = Meta_falar
wiki.api.metatalk2 = Meta_falar
wiki.api.special1 = Especial
wiki.api.special2 = Especial
wiki.api.talk1 = Falar
wiki.api.talk2 = Falar
wiki.api.template1 = Modelo
wiki.api.template2 = Modelo
wiki.api.templatetalk1 = Modelo_falar
wiki.api.templatetalk2 = Modelo_falar
wiki.api.url = http://br.wikipedia.org/w/api.php
wiki.api.user1 = Usu\u00E1rio
wiki.api.user2 = Usu\u00E1rio
wiki.api.usertalk1 = Usu\u00E1rio_falar
wiki.api.usertalk2 = Usu\u00E1rio_falar
wiki.tags.toc.content = Conte\u00FAdo
Loading