diff --git a/.gitignore b/.gitignore index 1148800..8451cac 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ */materials -config.json \ No newline at end of file +config.json +homework.xml \ No newline at end of file diff --git a/assignment1/homework1.xml b/assignment1/homework1.xml deleted file mode 100644 index a96e39d..0000000 --- a/assignment1/homework1.xml +++ /dev/null @@ -1,176 +0,0 @@ - - - - - - q1a - Count the number of tuples in Inproceedings - - - 2956396 - - - - q1b - Count the number of tuples in Article - - - 2738932 - - - - q1c - Count the number of tuples in Authorship - - - 18128940 - - - - q2a - Add a column "Area" in the Inproceedings table. - - - Done. - [] - - - - q2b - Populate the column 'Area' with the values from the table if there is a match, otherwise set it to 'UNKNOWN' - - - 13126 rows affected. - 13758 rows affected. - 6289 rows affected. - 43475 rows affected. - 2879748 rows affected. - [] - - - - q3a - Find the number of authors who published in each area (do not consider UNKNOWN). - - - Database 43836 - ML-AI 143124 - Systems 23744 - Theory 33626 - - - - q3b - Find the top-10 authors who published the most number of ``Database'' papers. - - - Divesh Srivastava 150 - H. V. Jagadish 127 - Surajit Chaudhuri 127 - Jiawei Han 0001 110 - Philip S. Yu 110 - Xuemin Lin 0001 109 - Jeffrey F. Naughton 108 - Beng Chin Ooi 105 - Hector Garcia-Molina 104 - Michael Stonebraker 100 - - - - q3c - Find the number of authors who published in exactly two of the four areas (do not consider UNKNOWN). - - - WITH area_authors AS ( - SELECT authorship.pubkey, author, area FROM inproceedings, authorship - WHERE - inproceedings.pubkey = authorship.pubkey - AND area != 'UNKNOWN' - ), - area_counts AS ( - SELECT author, COUNT(area) AS a_cnt FROM area_authors GROUP BY author - ) - - SELECT COUNT(*) as cnt FROM area_counts WHERE a_cnt = 2; - - - - q3d - Find the number of authors who wrote more journal papers than conference papers (irrespective of research areas). - ccnt - ]]> - - 1301165 - - - - q3e - - Among the authors who have published at least one “Database” paper (in any year), find the top-5 authors who published the - most number of papers (journal OR conference, in any area) since the year 2000 (including the year 2000). - - - - ??? - - -